diff --git a/src/auto-reply/chunk.test.ts b/src/auto-reply/chunk.test.ts index 335576b3c..ddd3478f2 100644 --- a/src/auto-reply/chunk.test.ts +++ b/src/auto-reply/chunk.test.ts @@ -25,6 +25,45 @@ function expectFencesBalanced(chunks: string[]) { } } +type ChunkCase = { + name: string; + text: string; + limit: number; + expected: string[]; +}; + +function runChunkCases( + chunker: (text: string, limit: number) => string[], + cases: ChunkCase[], +) { + for (const { name, text, limit, expected } of cases) { + it(name, () => { + expect(chunker(text, limit)).toEqual(expected); + }); + } +} + +const parentheticalCases: ChunkCase[] = [ + { + name: "keeps parenthetical phrases together", + text: "Heads up now (Though now I'm curious)ok", + limit: 35, + expected: ["Heads up now", "(Though now I'm curious)ok"], + }, + { + name: "handles nested parentheses", + text: "Hello (outer (inner) end) world", + limit: 26, + expected: ["Hello (outer (inner) end)", "world"], + }, + { + name: "ignores unmatched closing parentheses", + text: "Hello) world (ok)", + limit: 12, + expected: ["Hello)", "world (ok)"], + }, +]; + describe("chunkText", () => { it("keeps multi-line text in one chunk when under limit", () => { const text = "Line one\n\nLine two\n\nLine three"; @@ -68,11 +107,7 @@ describe("chunkText", () => { expect(chunks).toEqual(["Supercalif", "ragilistic", "expialidoc", "ious"]); }); - it("keeps parenthetical phrases together", () => { - const text = "Heads up now (Though now I'm curious)ok"; - const chunks = chunkText(text, 35); - expect(chunks).toEqual(["Heads up now", "(Though now I'm curious)ok"]); - }); + runChunkCases(chunkText, [parentheticalCases[0]]); }); describe("resolveTextChunkLimit", () => { @@ -191,17 +226,7 @@ describe("chunkMarkdownText", () => { } }); - it("keeps parenthetical phrases together", () => { - const text = "Heads up now (Though now I'm curious)ok"; - const chunks = chunkMarkdownText(text, 35); - expect(chunks).toEqual(["Heads up now", "(Though now I'm curious)ok"]); - }); - - it("handles nested parentheses", () => { - const text = "Hello (outer (inner) end) world"; - const chunks = chunkMarkdownText(text, 26); - expect(chunks).toEqual(["Hello (outer (inner) end)", "world"]); - }); + runChunkCases(chunkMarkdownText, parentheticalCases); it("hard-breaks when a parenthetical exceeds the limit", () => { const text = `(${"a".repeat(80)})`; @@ -209,10 +234,4 @@ describe("chunkMarkdownText", () => { expect(chunks[0]?.length).toBe(20); expect(chunks.join("")).toBe(text); }); - - it("ignores unmatched closing parentheses", () => { - const text = "Hello) world (ok)"; - const chunks = chunkMarkdownText(text, 12); - expect(chunks).toEqual(["Hello)", "world (ok)"]); - }); }); diff --git a/src/auto-reply/chunk.ts b/src/auto-reply/chunk.ts index b64bbe5bf..44ab80c76 100644 --- a/src/auto-reply/chunk.ts +++ b/src/auto-reply/chunk.ts @@ -91,23 +91,7 @@ export function chunkText(text: string, limit: number): string[] { const window = remaining.slice(0, limit); // 1) Prefer a newline break inside the window (outside parentheses). - let lastNewline = -1; - let lastWhitespace = -1; - let depth = 0; - for (let i = 0; i < window.length; i++) { - const char = window[i]; - if (char === "(") { - depth += 1; - continue; - } - if (char === ")" && depth > 0) { - depth -= 1; - continue; - } - if (depth !== 0) continue; - if (char === "\n") lastNewline = i; - else if (/\s/.test(char)) lastWhitespace = i; - } + const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window); // 2) Otherwise prefer the last whitespace (word boundary) inside the window. let breakIdx = lastNewline > 0 ? lastNewline : lastWhitespace; @@ -243,12 +227,26 @@ function pickSafeBreakIndex( window: string, spans: ReturnType, ): number { + const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints( + window, + (index) => isSafeFenceBreak(spans, index), + ); + + if (lastNewline > 0) return lastNewline; + if (lastWhitespace > 0) return lastWhitespace; + return -1; +} + +function scanParenAwareBreakpoints( + window: string, + isAllowed: (index: number) => boolean = () => true, +): { lastNewline: number; lastWhitespace: number } { let lastNewline = -1; let lastWhitespace = -1; let depth = 0; for (let i = 0; i < window.length; i++) { - if (!isSafeFenceBreak(spans, i)) continue; + if (!isAllowed(i)) continue; const char = window[i]; if (char === "(") { depth += 1; @@ -263,7 +261,5 @@ function pickSafeBreakIndex( else if (/\s/.test(char)) lastWhitespace = i; } - if (lastNewline > 0) return lastNewline; - if (lastWhitespace > 0) return lastWhitespace; - return -1; + return { lastNewline, lastWhitespace }; }