fix: newline chunking across channels

2026-01-25 04:05:14 +00:00
parent ca78ccf74c
commit 458e731f8b
80 changed files with 580 additions and 91 deletions
--- a/src/auto-reply/chunk.test.ts
+++ b/src/auto-reply/chunk.test.ts
@@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
 import {
  chunkByNewline,
  chunkMarkdownText,
+  chunkMarkdownTextWithMode,
  chunkText,
  chunkTextWithMode,
  resolveChunkMode,
@@ -246,10 +247,10 @@ describe("chunkByNewline", () => {
    expect(chunks).toEqual(["Line one", "Line two", "Line three"]);
  });

-  it("filters empty lines", () => {
+  it("preserves blank lines by folding into the next chunk", () => {
    const text = "Line one\n\n\nLine two\n\nLine three";
    const chunks = chunkByNewline(text, 1000);
-    expect(chunks).toEqual(["Line one", "Line two", "Line three"]);
+    expect(chunks).toEqual(["Line one", "\n\nLine two", "\nLine three"]);
  });

  it("trims whitespace from lines", () => {
@@ -258,6 +259,12 @@ describe("chunkByNewline", () => {
    expect(chunks).toEqual(["Line one", "Line two"]);
  });

+  it("preserves leading blank lines on the first chunk", () => {
+    const text = "\n\nLine one\nLine two";
+    const chunks = chunkByNewline(text, 1000);
+    expect(chunks).toEqual(["\n\nLine one", "Line two"]);
+  });
+
  it("falls back to length-based for long lines", () => {
    const text = "Short line\n" + "a".repeat(50) + "\nAnother short";
    const chunks = chunkByNewline(text, 20);
@@ -269,6 +276,12 @@ describe("chunkByNewline", () => {
    expect(chunks[4]).toBe("Another short");
  });

+  it("does not split long lines when splitLongLines is false", () => {
+    const text = "a".repeat(50);
+    const chunks = chunkByNewline(text, 20, { splitLongLines: false });
+    expect(chunks).toEqual([text]);
+  });
+
  it("returns empty array for empty input", () => {
    expect(chunkByNewline("", 100)).toEqual([]);
  });
@@ -276,6 +289,18 @@ describe("chunkByNewline", () => {
  it("returns empty array for whitespace-only input", () => {
    expect(chunkByNewline("   \n\n   ", 100)).toEqual([]);
  });
+
+  it("preserves trailing blank lines on the last chunk", () => {
+    const text = "Line one\n\n";
+    const chunks = chunkByNewline(text, 1000);
+    expect(chunks).toEqual(["Line one\n\n"]);
+  });
+
+  it("keeps whitespace when trimLines is false", () => {
+    const text = "  indented line  \nNext";
+    const chunks = chunkByNewline(text, 1000, { trimLines: false });
+    expect(chunks).toEqual(["  indented line  ", "Next"]);
+  });
 });

 describe("chunkTextWithMode", () => {
@@ -292,6 +317,26 @@ describe("chunkTextWithMode", () => {
  });
 });

+describe("chunkMarkdownTextWithMode", () => {
+  it("uses markdown-aware chunking for length mode", () => {
+    const text = "Line one\nLine two";
+    expect(chunkMarkdownTextWithMode(text, 1000, "length")).toEqual(chunkMarkdownText(text, 1000));
+  });
+
+  it("uses newline-based chunking for newline mode", () => {
+    const text = "Line one\nLine two";
+    expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual(["Line one", "Line two"]);
+  });
+
+  it("does not split inside code fences for newline mode", () => {
+    const text = "```js\nconst a = 1;\nconst b = 2;\n```\nAfter";
+    expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual([
+      "```js\nconst a = 1;\nconst b = 2;\n```",
+      "After",
+    ]);
+  });
+});
+
 describe("resolveChunkMode", () => {
  it("returns length as default", () => {
    expect(resolveChunkMode(undefined, "telegram")).toBe("length");
@@ -304,16 +349,16 @@ describe("resolveChunkMode", () => {
    expect(resolveChunkMode(cfg, "__internal__")).toBe("length");
  });

-  it("supports provider-level overrides for bluebubbles", () => {
-    const cfg = { channels: { bluebubbles: { chunkMode: "newline" as const } } };
-    expect(resolveChunkMode(cfg, "bluebubbles")).toBe("newline");
+  it("supports provider-level overrides for slack", () => {
+    const cfg = { channels: { slack: { chunkMode: "newline" as const } } };
+    expect(resolveChunkMode(cfg, "slack")).toBe("newline");
    expect(resolveChunkMode(cfg, "discord")).toBe("length");
  });

-  it("supports account-level overrides for bluebubbles", () => {
+  it("supports account-level overrides for slack", () => {
    const cfg = {
      channels: {
-        bluebubbles: {
+        slack: {
          chunkMode: "length" as const,
          accounts: {
            primary: { chunkMode: "newline" as const },
@@ -321,12 +366,7 @@ describe("resolveChunkMode", () => {
        },
      },
    };
-    expect(resolveChunkMode(cfg, "bluebubbles", "primary")).toBe("newline");
-    expect(resolveChunkMode(cfg, "bluebubbles", "other")).toBe("length");
-  });
-
-  it("ignores chunkMode for non-bluebubbles providers", () => {
-    const cfg = { channels: { ["telegram" as string]: { chunkMode: "newline" as const } } };
-    expect(resolveChunkMode(cfg, "telegram")).toBe("length");
+    expect(resolveChunkMode(cfg, "slack", "primary")).toBe("newline");
+    expect(resolveChunkMode(cfg, "slack", "other")).toBe("length");
  });
 });
--- a/src/auto-reply/chunk.ts
+++ b/src/auto-reply/chunk.ts
@@ -101,8 +101,6 @@ export function resolveChunkMode(
  accountId?: string | null,
 ): ChunkMode {
  if (!provider || provider === INTERNAL_MESSAGE_CHANNEL) return DEFAULT_CHUNK_MODE;
-  // Chunk mode is only supported for BlueBubbles.
-  if (provider !== "bluebubbles") return DEFAULT_CHUNK_MODE;
  const channelsConfig = cfg?.channels as Record<string, unknown> | undefined;
  const providerConfig = (channelsConfig?.[provider] ??
    (cfg as Record<string, unknown> | undefined)?.[provider]) as ProviderChunkConfig | undefined;
@@ -111,25 +109,56 @@ export function resolveChunkMode(
 }

 /**
- * Split text on newlines, filtering empty lines.
- * Lines exceeding maxLineLength are further split using length-based chunking.
+ * Split text on newlines, trimming line whitespace.
+ * Blank lines are folded into the next non-empty line as leading "\n" prefixes.
+ * Long lines can be split by length (default) or kept intact via splitLongLines:false.
 */
-export function chunkByNewline(text: string, maxLineLength: number): string[] {
+export function chunkByNewline(
+  text: string,
+  maxLineLength: number,
+  opts?: {
+    splitLongLines?: boolean;
+    trimLines?: boolean;
+    isSafeBreak?: (index: number) => boolean;
+  },
+): string[] {
  if (!text) return [];
-  const lines = text.split("\n");
+  if (maxLineLength <= 0) return text.trim() ? [text] : [];
+  const splitLongLines = opts?.splitLongLines !== false;
+  const trimLines = opts?.trimLines !== false;
+  const lines = splitByNewline(text, opts?.isSafeBreak);
  const chunks: string[] = [];
+  let pendingBlankLines = 0;

  for (const line of lines) {
    const trimmed = line.trim();
-    if (!trimmed) continue; // skip empty lines
-
-    if (trimmed.length <= maxLineLength) {
-      chunks.push(trimmed);
-    } else {
-      // Long line: fall back to length-based chunking
-      const subChunks = chunkText(trimmed, maxLineLength);
-      chunks.push(...subChunks);
+    if (!trimmed) {
+      pendingBlankLines += 1;
+      continue;
    }
+
+    const maxPrefix = Math.max(0, maxLineLength - 1);
+    const cappedBlankLines = pendingBlankLines > 0 ? Math.min(pendingBlankLines, maxPrefix) : 0;
+    const prefix = cappedBlankLines > 0 ? "\n".repeat(cappedBlankLines) : "";
+    pendingBlankLines = 0;
+
+    const lineValue = trimLines ? trimmed : line;
+    if (!splitLongLines || lineValue.length + prefix.length <= maxLineLength) {
+      chunks.push(prefix + lineValue);
+      continue;
+    }
+
+    const firstLimit = Math.max(1, maxLineLength - prefix.length);
+    const first = lineValue.slice(0, firstLimit);
+    chunks.push(prefix + first);
+    const remaining = lineValue.slice(firstLimit);
+    if (remaining) {
+      chunks.push(...chunkText(remaining, maxLineLength));
+    }
+  }
+
+  if (pendingBlankLines > 0 && chunks.length > 0) {
+    chunks[chunks.length - 1] += "\n".repeat(pendingBlankLines);
  }

  return chunks;
@@ -140,11 +169,59 @@ export function chunkByNewline(text: string, maxLineLength: number): string[] {
 */
 export function chunkTextWithMode(text: string, limit: number, mode: ChunkMode): string[] {
  if (mode === "newline") {
-    return chunkByNewline(text, limit);
+    const chunks: string[] = [];
+    const lineChunks = chunkByNewline(text, limit, { splitLongLines: false });
+    for (const line of lineChunks) {
+      const nested = chunkText(line, limit);
+      if (!nested.length && line) {
+        chunks.push(line);
+        continue;
+      }
+      chunks.push(...nested);
+    }
+    return chunks;
  }
  return chunkText(text, limit);
 }

+export function chunkMarkdownTextWithMode(text: string, limit: number, mode: ChunkMode): string[] {
+  if (mode === "newline") {
+    const spans = parseFenceSpans(text);
+    const chunks: string[] = [];
+    const lineChunks = chunkByNewline(text, limit, {
+      splitLongLines: false,
+      trimLines: false,
+      isSafeBreak: (index) => isSafeFenceBreak(spans, index),
+    });
+    for (const line of lineChunks) {
+      const nested = chunkMarkdownText(line, limit);
+      if (!nested.length && line) {
+        chunks.push(line);
+        continue;
+      }
+      chunks.push(...nested);
+    }
+    return chunks;
+  }
+  return chunkMarkdownText(text, limit);
+}
+
+function splitByNewline(
+  text: string,
+  isSafeBreak: (index: number) => boolean = () => true,
+): string[] {
+  const lines: string[] = [];
+  let start = 0;
+  for (let i = 0; i < text.length; i++) {
+    if (text[i] === "\n" && isSafeBreak(i)) {
+      lines.push(text.slice(start, i));
+      start = i + 1;
+    }
+  }
+  lines.push(text.slice(start));
+  return lines;
+}
+
 export function chunkText(text: string, limit: number): string[] {
  if (!text) return [];
  if (limit <= 0) return [text];
--- a/src/auto-reply/reply/block-streaming.ts
+++ b/src/auto-reply/reply/block-streaming.ts
@@ -69,7 +69,7 @@ export function resolveBlockStreamingChunking(
  });
  const chunkCfg = cfg?.agents?.defaults?.blockStreamingChunk;

-  // BlueBubbles-only: if chunkMode is "newline", use newline-based streaming
+  // When chunkMode is "newline", use newline-based streaming
  const channelChunkMode = resolveChunkMode(cfg, providerKey, accountId);
  if (channelChunkMode === "newline") {
    // For newline mode: use very low minChars to flush quickly on newlines
@@ -103,7 +103,7 @@ export function resolveBlockStreamingCoalescing(
 ): BlockStreamingCoalescing | undefined {
  const providerKey = normalizeChunkProvider(provider);

-  // BlueBubbles-only: when chunkMode is "newline", disable coalescing to send each line immediately
+  // When chunkMode is "newline", disable coalescing to send each line immediately
  const channelChunkMode = resolveChunkMode(cfg, providerKey, accountId);
  if (channelChunkMode === "newline") {
    return undefined;