diff --git a/src/auto-reply/chunk.test.ts b/src/auto-reply/chunk.test.ts index f2256c082..7007e0abc 100644 --- a/src/auto-reply/chunk.test.ts +++ b/src/auto-reply/chunk.test.ts @@ -343,6 +343,17 @@ describe("chunkMarkdownTextWithMode", () => { const text = "```js\nconst a = 1;\nconst b = 2;\n```\nAfter"; expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual([text]); }); + + it("does not split on blank lines inside a fenced code block", () => { + const text = "```python\ndef my_function():\n x = 1\n\n y = 2\n return x + y\n```"; + expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual([text]); + }); + + it("splits on blank lines between a code fence and following paragraph", () => { + const fence = "```python\ndef my_function():\n x = 1\n\n y = 2\n return x + y\n```"; + const text = `${fence}\n\nAfter`; + expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual([fence, "After"]); + }); }); describe("resolveChunkMode", () => { diff --git a/src/auto-reply/chunk.ts b/src/auto-reply/chunk.ts index 340886c7e..1615699b9 100644 --- a/src/auto-reply/chunk.ts +++ b/src/auto-reply/chunk.ts @@ -189,11 +189,19 @@ export function chunkByParagraph(text: string, limit: number): string[] { return normalized.length <= limit ? [normalized] : chunkText(normalized, limit); } + const spans = parseFenceSpans(normalized); + const parts: string[] = []; const re = /\n[\t ]*\n+/g; // paragraph break: blank line(s), allowing whitespace let lastIndex = 0; for (const match of normalized.matchAll(re)) { const idx = match.index ?? 0; + + // Do not split on blank lines that occur inside fenced code blocks. + if (!isSafeFenceBreak(spans, idx)) { + continue; + } + parts.push(normalized.slice(lastIndex, idx)); lastIndex = idx + match[0].length; }