Fix newline chunking: split on blank lines even under limit

2026-01-24 23:30:14 -08:00
parent 46fa1c1301
commit 0975aa4a7c
1 changed files with 15 additions and 45 deletions
--- a/src/auto-reply/chunk.ts
+++ b/src/auto-reply/chunk.ts
@@ -177,69 +177,39 @@ export function chunkByNewline(
 export function chunkByParagraph(text: string, limit: number): string[] {
  if (!text) return [];
  if (limit <= 0) return [text];
  if (text.length <= limit) return [text];
  // Normalize to \n so blank line detection is consistent.
  const normalized = text.replace(/\r\n?/g, "\n");
-  const parts: string[] = [];
+  // Fast-path: if there are no blank-line paragraph separators, do not split.
-  const seps: string[] = [];
+  // (We *do not* early-return based on `limit` — newline mode is about paragraph
  // boundaries, not only exceeding a length limit.)
  const paragraphRe = /\n[\t ]*\n+/;
  if (!paragraphRe.test(normalized)) {
    return normalized.length <= limit ? [normalized] : chunkText(normalized, limit);
  }
  const parts: string[] = [];
  const re = /\n[\t ]*\n+/g; // paragraph break: blank line(s), allowing whitespace
  let lastIndex = 0;
  for (const match of normalized.matchAll(re)) {
    const idx = match.index ?? 0;
    parts.push(normalized.slice(lastIndex, idx));
    seps.push(match[0]);
    lastIndex = idx + match[0].length;
  }
  parts.push(normalized.slice(lastIndex));
  const chunks: string[] = [];
-  let current = "";
+  for (const part of parts) {
-  let pendingSep = "";
+    const paragraph = part.replace(/\s+$/g, "");
-
+    if (!paragraph.trim()) continue;
-  const flush = () => {
+    if (paragraph.length <= limit) {
-    const out = current.trimEnd();
+      chunks.push(paragraph);
-    if (out) chunks.push(out);
+    } else {
-    current = "";
+      chunks.push(...chunkText(paragraph, limit));
  };
  for (let i = 0; i < parts.length; i++) {
    const paragraph = parts[i] ?? "";
    if (!paragraph.trim() && i === parts.length - 1) break;
    const prefix = pendingSep;
    pendingSep = seps[i] ?? "";
    const candidate = current
      ? `${current}${prefix}${paragraph}`
      : // Cap leading blank lines so we never exceed `limit` with just prefixes.
        `${prefix.slice(0, Math.max(0, limit - 1))}${paragraph}`;
    if (candidate.length <= limit) {
      current = candidate;
      continue;
    }
    // Can't fit this paragraph into the current chunk.
    if (current) flush();
    const paragraphWithPrefix = `${prefix}${paragraph}`;
    if (paragraphWithPrefix.length <= limit) {
      current = paragraphWithPrefix;
      continue;
    }
    // Paragraph itself is too long; split it by length (preferring newlines/whitespace).
    const prefixCap = prefix.slice(0, Math.max(0, limit - 1));
    const remainingLimit = Math.max(1, limit - prefixCap.length);
    const [first, ...rest] = chunkText(paragraph, remainingLimit);
    if (first) chunks.push(prefixCap + first);
    chunks.push(...rest);
  }
  if (current.trim()) flush();
  return chunks;
 }