From 0130ecd8004bfe640446d0afd78d4cfe05942bf0 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sun, 25 Jan 2026 13:24:00 +0000
Subject: [PATCH] fix: paragraph-aware newline chunking (#1726)

Thanks @tyler6204

Co-authored-by: Tyler Yust <64381258+tyler6204@users.noreply.github.com>
---
 CHANGELOG.md                       |  1 +
 docs/channels/bluebubbles.md       |  2 +-
 docs/channels/discord.md           |  4 ++--
 docs/channels/imessage.md          |  4 ++--
 docs/channels/matrix.md            |  2 +-
 docs/channels/msteams.md           |  2 +-
 docs/channels/nextcloud-talk.md    |  2 +-
 docs/channels/signal.md            |  4 ++--
 docs/channels/slack.md             |  2 +-
 docs/channels/telegram.md          |  4 ++--
 docs/channels/whatsapp.md          |  2 +-
 docs/concepts/streaming.md         |  2 +-
 docs/gateway/configuration.md      |  2 +-
 src/auto-reply/chunk.test.ts       |  5 +++++
 src/auto-reply/chunk.ts            | 16 +++++++++++++---
 src/discord/chunk.test.ts          |  2 +-
 src/infra/outbound/deliver.test.ts |  7 +++----
 17 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebff8d858..2bc3ebae3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -53,6 +53,7 @@ Docs: https://docs.clawd.bot
 - Media understanding: skip image understanding when the primary model already supports vision. (#1747) Thanks @tyler6204.
 - Models: default missing custom provider fields so minimal configs are accepted.
 - Messaging: keep newline chunking safe for fenced markdown blocks across channels.
+- Messaging: treat newline chunking as paragraph-aware (blank-line splits) to keep lists and headings together. (#1726) Thanks @tyler6204.
 - TUI: reload history after gateway reconnect to restore session state. (#1663)
 - Heartbeat: normalize target identifiers for consistent routing.
 - Exec: keep approvals for elevated ask unless full mode. (#1616) Thanks @ivancasco.
diff --git a/docs/channels/bluebubbles.md b/docs/channels/bluebubbles.md
index 1dd8e560d..a1f4a0892 100644
--- a/docs/channels/bluebubbles.md
+++ b/docs/channels/bluebubbles.md
@@ -196,7 +196,7 @@ Provider options:
 - `channels.bluebubbles.sendReadReceipts`: Send read receipts (default: `true`).
 - `channels.bluebubbles.blockStreaming`: Enable block streaming (default: `true`).
 - `channels.bluebubbles.textChunkLimit`: Outbound chunk size in chars (default: 4000).
-- `channels.bluebubbles.chunkMode`: `length` (default) splits only when exceeding `textChunkLimit`; `newline` splits on every newline and sends each line immediately during streaming.
+- `channels.bluebubbles.chunkMode`: `length` (default) splits only when exceeding `textChunkLimit`; `newline` splits on blank lines (paragraph boundaries) before length chunking.
 - `channels.bluebubbles.mediaMaxMb`: Inbound media cap in MB (default: 8).
 - `channels.bluebubbles.historyLimit`: Max group messages for context (0 disables).
 - `channels.bluebubbles.dmHistoryLimit`: DM history limit.
diff --git a/docs/channels/discord.md b/docs/channels/discord.md
index f63fd45c9..12dd28084 100644
--- a/docs/channels/discord.md
+++ b/docs/channels/discord.md
@@ -205,7 +205,7 @@ Notes:
 ## Capabilities & limits
 - DMs and guild text channels (threads are treated as separate channels; voice not supported).
 - Typing indicators sent best-effort; message chunking uses `channels.discord.textChunkLimit` (default 2000) and splits tall replies by line count (`channels.discord.maxLinesPerMessage`, default 17).
-- Optional newline chunking: set `channels.discord.chunkMode="newline"` to split on each line before length chunking.
+- Optional newline chunking: set `channels.discord.chunkMode="newline"` to split on blank lines (paragraph boundaries) before length chunking.
 - File uploads supported up to the configured `channels.discord.mediaMaxMb` (default 8 MB).
 - Mention-gated guild replies by default to avoid noisy bots.
 - Reply context is injected when a message references another message (quoted content + ids).
@@ -307,7 +307,7 @@ ack reaction after the bot replies.
 - `guilds.<id>.requireMention`: per-guild mention requirement (overridable per channel).
 - `guilds.<id>.reactionNotifications`: reaction system event mode (`off`, `own`, `all`, `allowlist`).
 - `textChunkLimit`: outbound text chunk size (chars). Default: 2000.
-- `chunkMode`: `length` (default) splits only when exceeding `textChunkLimit`; `newline` splits on every newline before length chunking.
+- `chunkMode`: `length` (default) splits only when exceeding `textChunkLimit`; `newline` splits on blank lines (paragraph boundaries) before length chunking.
 - `maxLinesPerMessage`: soft max line count per message. Default: 17.
 - `mediaMaxMb`: clamp inbound media saved to disk.
 - `historyLimit`: number of recent guild messages to include as context when replying to a mention (default 20; falls back to `messages.groupChat.historyLimit`; `0` disables).
diff --git a/docs/channels/imessage.md b/docs/channels/imessage.md
index 316822dc5..bae945e8c 100644
--- a/docs/channels/imessage.md
+++ b/docs/channels/imessage.md
@@ -219,7 +219,7 @@ This is useful when you want an isolated personality/model for a specific thread
 
 ## Limits
 - Outbound text is chunked to `channels.imessage.textChunkLimit` (default 4000).
-- Optional newline chunking: set `channels.imessage.chunkMode="newline"` to split on each line before length chunking.
+- Optional newline chunking: set `channels.imessage.chunkMode="newline"` to split on blank lines (paragraph boundaries) before length chunking.
 - Media uploads are capped by `channels.imessage.mediaMaxMb` (default 16).
 
 ## Addressing / delivery targets
@@ -254,7 +254,7 @@ Provider options:
 - `channels.imessage.includeAttachments`: ingest attachments into context.
 - `channels.imessage.mediaMaxMb`: inbound/outbound media cap (MB).
 - `channels.imessage.textChunkLimit`: outbound chunk size (chars).
-- `channels.imessage.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking.
+- `channels.imessage.chunkMode`: `length` (default) or `newline` to split on blank lines (paragraph boundaries) before length chunking.
 
 Related global options:
 - `agents.list[].groupChat.mentionPatterns` (or `messages.groupChat.mentionPatterns`).
diff --git a/docs/channels/matrix.md b/docs/channels/matrix.md
index 77a2989d5..2d9025f51 100644
--- a/docs/channels/matrix.md
+++ b/docs/channels/matrix.md
@@ -215,7 +215,7 @@ Provider options:
 - `channels.matrix.initialSyncLimit`: initial sync limit.
 - `channels.matrix.threadReplies`: `off | inbound | always` (default: inbound).
 - `channels.matrix.textChunkLimit`: outbound text chunk size (chars).
-- `channels.matrix.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking.
+- `channels.matrix.chunkMode`: `length` (default) or `newline` to split on blank lines (paragraph boundaries) before length chunking.
 - `channels.matrix.dm.policy`: `pairing | allowlist | open | disabled` (default: pairing).
 - `channels.matrix.dm.allowFrom`: DM allowlist (user IDs or display names). `open` requires `"*"`. The wizard resolves names to IDs when possible.
 - `channels.matrix.groupPolicy`: `allowlist | open | disabled` (default: allowlist).
diff --git a/docs/channels/msteams.md b/docs/channels/msteams.md
index de3b064b2..2f6ed5f83 100644
--- a/docs/channels/msteams.md
+++ b/docs/channels/msteams.md
@@ -415,7 +415,7 @@ Key settings (see `/gateway/configuration` for shared channel patterns):
 - `channels.msteams.dmPolicy`: `pairing | allowlist | open | disabled` (default: pairing)
 - `channels.msteams.allowFrom`: allowlist for DMs (AAD object IDs, UPNs, or display names). The wizard resolves names to IDs during setup when Graph access is available.
 - `channels.msteams.textChunkLimit`: outbound text chunk size.
-- `channels.msteams.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking.
+- `channels.msteams.chunkMode`: `length` (default) or `newline` to split on blank lines (paragraph boundaries) before length chunking.
 - `channels.msteams.mediaAllowHosts`: allowlist for inbound attachment hosts (defaults to Microsoft/Teams domains).
 - `channels.msteams.requireMention`: require @mention in channels/groups (default true).
 - `channels.msteams.replyStyle`: `thread | top-level` (see [Reply Style](#reply-style-threads-vs-posts)).
diff --git a/docs/channels/nextcloud-talk.md b/docs/channels/nextcloud-talk.md
index 43c1595ed..abc696444 100644
--- a/docs/channels/nextcloud-talk.md
+++ b/docs/channels/nextcloud-talk.md
@@ -114,7 +114,7 @@ Provider options:
 - `channels.nextcloud-talk.dmHistoryLimit`: DM history limit (0 disables).
 - `channels.nextcloud-talk.dms`: per-DM overrides (historyLimit).
 - `channels.nextcloud-talk.textChunkLimit`: outbound text chunk size (chars).
-- `channels.nextcloud-talk.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking.
+- `channels.nextcloud-talk.chunkMode`: `length` (default) or `newline` to split on blank lines (paragraph boundaries) before length chunking.
 - `channels.nextcloud-talk.blockStreaming`: disable block streaming for this channel.
 - `channels.nextcloud-talk.blockStreamingCoalesce`: block streaming coalesce tuning.
 - `channels.nextcloud-talk.mediaMaxMb`: inbound media cap (MB).
diff --git a/docs/channels/signal.md b/docs/channels/signal.md
index 0ba89385d..c154b0591 100644
--- a/docs/channels/signal.md
+++ b/docs/channels/signal.md
@@ -111,7 +111,7 @@ Groups:
 
 ## Media + limits
 - Outbound text is chunked to `channels.signal.textChunkLimit` (default 4000).
-- Optional newline chunking: set `channels.signal.chunkMode="newline"` to split on each line before length chunking.
+- Optional newline chunking: set `channels.signal.chunkMode="newline"` to split on blank lines (paragraph boundaries) before length chunking.
 - Attachments supported (base64 fetched from `signal-cli`).
 - Default media cap: `channels.signal.mediaMaxMb` (default 8).
 - Use `channels.signal.ignoreAttachments` to skip downloading media.
@@ -170,7 +170,7 @@ Provider options:
 - `channels.signal.historyLimit`: max group messages to include as context (0 disables).
 - `channels.signal.dmHistoryLimit`: DM history limit in user turns. Per-user overrides: `channels.signal.dms["<phone_or_uuid>"].historyLimit`.
 - `channels.signal.textChunkLimit`: outbound chunk size (chars).
-- `channels.signal.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking.
+- `channels.signal.chunkMode`: `length` (default) or `newline` to split on blank lines (paragraph boundaries) before length chunking.
 - `channels.signal.mediaMaxMb`: inbound/outbound media cap (MB).
 
 Related global options:
diff --git a/docs/channels/slack.md b/docs/channels/slack.md
index 44bc84035..5f768db0e 100644
--- a/docs/channels/slack.md
+++ b/docs/channels/slack.md
@@ -349,7 +349,7 @@ ack reaction after the bot replies.
 
 ## Limits
 - Outbound text is chunked to `channels.slack.textChunkLimit` (default 4000).
-- Optional newline chunking: set `channels.slack.chunkMode="newline"` to split on each line before length chunking.
+- Optional newline chunking: set `channels.slack.chunkMode="newline"` to split on blank lines (paragraph boundaries) before length chunking.
 - Media uploads are capped by `channels.slack.mediaMaxMb` (default 20).
 
 ## Reply threading
diff --git a/docs/channels/telegram.md b/docs/channels/telegram.md
index eb558cf74..e708e2e64 100644
--- a/docs/channels/telegram.md
+++ b/docs/channels/telegram.md
@@ -135,7 +135,7 @@ Notes:
 
 ## Limits
 - Outbound text is chunked to `channels.telegram.textChunkLimit` (default 4000).
-- Optional newline chunking: set `channels.telegram.chunkMode="newline"` to split on each line before length chunking.
+- Optional newline chunking: set `channels.telegram.chunkMode="newline"` to split on blank lines (paragraph boundaries) before length chunking.
 - Media downloads/uploads are capped by `channels.telegram.mediaMaxMb` (default 5).
 - Telegram Bot API requests time out after `channels.telegram.timeoutSeconds` (default 500 via grammY). Set lower to avoid long hangs.
 - Group history context uses `channels.telegram.historyLimit` (or `channels.telegram.accounts.*.historyLimit`), falling back to `messages.groupChat.historyLimit`. Set `0` to disable (default 50).
@@ -524,7 +524,7 @@ Provider options:
 - `channels.telegram.accounts.<account>.capabilities.inlineButtons`: per-account override.
 - `channels.telegram.replyToMode`: `off | first | all` (default: `first`).
 - `channels.telegram.textChunkLimit`: outbound chunk size (chars).
-- `channels.telegram.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking.
+- `channels.telegram.chunkMode`: `length` (default) or `newline` to split on blank lines (paragraph boundaries) before length chunking.
 - `channels.telegram.linkPreview`: toggle link previews for outbound messages (default: true).
 - `channels.telegram.streamMode`: `off | partial | block` (draft streaming).
 - `channels.telegram.mediaMaxMb`: inbound/outbound media cap (MB).
diff --git a/docs/channels/whatsapp.md b/docs/channels/whatsapp.md
index 517c71b93..4759cf4c9 100644
--- a/docs/channels/whatsapp.md
+++ b/docs/channels/whatsapp.md
@@ -271,7 +271,7 @@ WhatsApp can automatically send emoji reactions to incoming messages immediately
 
 ## Limits
 - Outbound text is chunked to `channels.whatsapp.textChunkLimit` (default 4000).
-- Optional newline chunking: set `channels.whatsapp.chunkMode="newline"` to split on each line before length chunking.
+- Optional newline chunking: set `channels.whatsapp.chunkMode="newline"` to split on blank lines (paragraph boundaries) before length chunking.
 - Inbound media saves are capped by `channels.whatsapp.mediaMaxMb` (default 50 MB).
 - Outbound media items are capped by `agents.defaults.mediaMaxMb` (default 5 MB).
 
diff --git a/docs/concepts/streaming.md b/docs/concepts/streaming.md
index 8019e4cca..6f9609ca6 100644
--- a/docs/concepts/streaming.md
+++ b/docs/concepts/streaming.md
@@ -38,7 +38,7 @@ Legend:
 - `agents.defaults.blockStreamingChunk`: `{ minChars, maxChars, breakPreference? }`.
 - `agents.defaults.blockStreamingCoalesce`: `{ minChars?, maxChars?, idleMs? }` (merge streamed blocks before send).
 - Channel hard cap: `*.textChunkLimit` (e.g., `channels.whatsapp.textChunkLimit`).
-- Channel chunk mode: `*.chunkMode` (`length` default, `newline` splits on each line before length chunking).
+- Channel chunk mode: `*.chunkMode` (`length` default, `newline` splits on blank lines (paragraph boundaries) before length chunking).
 - Discord soft cap: `channels.discord.maxLinesPerMessage` (default 17) splits tall replies to avoid UI clipping.
 
 **Boundary semantics:**
diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md
index 3b16be5b1..868126101 100644
--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@@ -1131,7 +1131,7 @@ Reaction notification modes:
 - `own`: reactions on the bot's own messages (default).
 - `all`: all reactions on all messages.
 - `allowlist`: reactions from `guilds.<id>.users` on all messages (empty list disables).
-Outbound text is chunked by `channels.discord.textChunkLimit` (default 2000). Set `channels.discord.chunkMode="newline"` to split on line boundaries before length chunking. Discord clients can clip very tall messages, so `channels.discord.maxLinesPerMessage` (default 17) splits long multi-line replies even when under 2000 chars.
+Outbound text is chunked by `channels.discord.textChunkLimit` (default 2000). Set `channels.discord.chunkMode="newline"` to split on blank lines (paragraph boundaries) before length chunking. Discord clients can clip very tall messages, so `channels.discord.maxLinesPerMessage` (default 17) splits long multi-line replies even when under 2000 chars.
 Retry policy defaults and behavior are documented in [Retry policy](/concepts/retry).
 
 ### `channels.googlechat` (Chat API webhook)
diff --git a/src/auto-reply/chunk.test.ts b/src/auto-reply/chunk.test.ts
index 7007e0abc..545899843 100644
--- a/src/auto-reply/chunk.test.ts
+++ b/src/auto-reply/chunk.test.ts
@@ -344,6 +344,11 @@ describe("chunkMarkdownTextWithMode", () => {
     expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual([text]);
   });
 
+  it("defers long markdown paragraphs to markdown chunking in newline mode", () => {
+    const text = `\`\`\`js\n${"const a = 1;\n".repeat(20)}\`\`\``;
+    expect(chunkMarkdownTextWithMode(text, 40, "newline")).toEqual(chunkMarkdownText(text, 40));
+  });
+
   it("does not split on blank lines inside a fenced code block", () => {
     const text = "```python\ndef my_function():\n    x = 1\n\n    y = 2\n    return x + y\n```";
     expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual([text]);
diff --git a/src/auto-reply/chunk.ts b/src/auto-reply/chunk.ts
index 1615699b9..c4fd31ed8 100644
--- a/src/auto-reply/chunk.ts
+++ b/src/auto-reply/chunk.ts
@@ -173,10 +173,16 @@ export function chunkByNewline(
  * - Only breaks at paragraph separators ("\n\n" or more, allowing whitespace on blank lines)
  * - Packs multiple paragraphs into a single chunk up to `limit`
  * - Falls back to length-based splitting when a single paragraph exceeds `limit`
+ *   (unless `splitLongParagraphs` is disabled)
  */
-export function chunkByParagraph(text: string, limit: number): string[] {
+export function chunkByParagraph(
+  text: string,
+  limit: number,
+  opts?: { splitLongParagraphs?: boolean },
+): string[] {
   if (!text) return [];
   if (limit <= 0) return [text];
+  const splitLongParagraphs = opts?.splitLongParagraphs !== false;
 
   // Normalize to \n so blank line detection is consistent.
   const normalized = text.replace(/\r\n?/g, "\n");
@@ -186,7 +192,9 @@ export function chunkByParagraph(text: string, limit: number): string[] {
   // boundaries, not only exceeding a length limit.)
   const paragraphRe = /\n[\t ]*\n+/;
   if (!paragraphRe.test(normalized)) {
-    return normalized.length <= limit ? [normalized] : chunkText(normalized, limit);
+    if (normalized.length <= limit) return [normalized];
+    if (!splitLongParagraphs) return [normalized];
+    return chunkText(normalized, limit);
   }
 
   const spans = parseFenceSpans(normalized);
@@ -213,6 +221,8 @@ export function chunkByParagraph(text: string, limit: number): string[] {
     if (!paragraph.trim()) continue;
     if (paragraph.length <= limit) {
       chunks.push(paragraph);
+    } else if (!splitLongParagraphs) {
+      chunks.push(paragraph);
     } else {
       chunks.push(...chunkText(paragraph, limit));
     }
@@ -235,7 +245,7 @@ export function chunkMarkdownTextWithMode(text: string, limit: number, mode: Chu
   if (mode === "newline") {
     // Paragraph chunking is fence-safe because we never split at arbitrary indices.
     // If a paragraph must be split by length, defer to the markdown-aware chunker.
-    const paragraphChunks = chunkByParagraph(text, limit);
+    const paragraphChunks = chunkByParagraph(text, limit, { splitLongParagraphs: false });
     const out: string[] = [];
     for (const chunk of paragraphChunks) {
       const nested = chunkMarkdownText(chunk, limit);
diff --git a/src/discord/chunk.test.ts b/src/discord/chunk.test.ts
index f8e18e2b4..13ec1b8e7 100644
--- a/src/discord/chunk.test.ts
+++ b/src/discord/chunk.test.ts
@@ -58,7 +58,7 @@ describe("chunkDiscordText", () => {
       maxLines: 50,
       chunkMode: "newline",
     });
-    expect(chunks).toEqual(["```js\nconst a = 1;\nconst b = 2;\n```", "After"]);
+    expect(chunks).toEqual([text]);
   });
 
   it("reserves space for closing fences when chunking", () => {
diff --git a/src/infra/outbound/deliver.test.ts b/src/infra/outbound/deliver.test.ts
index d259366b4..a80a3f482 100644
--- a/src/infra/outbound/deliver.test.ts
+++ b/src/infra/outbound/deliver.test.ts
@@ -192,7 +192,7 @@ describe("deliverOutboundPayloads", () => {
     expect(sendWhatsApp).toHaveBeenNthCalledWith(
       2,
       "+1555",
-      "\nLine two",
+      "Line two",
       expect.objectContaining({ verbose: false }),
     );
   });
@@ -241,9 +241,8 @@ describe("deliverOutboundPayloads", () => {
       payloads: [{ text }],
     });
 
-    expect(chunker).toHaveBeenCalledTimes(2);
-    expect(chunker).toHaveBeenNthCalledWith(1, "```js\nconst a = 1;\nconst b = 2;\n```", 4000);
-    expect(chunker).toHaveBeenNthCalledWith(2, "After", 4000);
+    expect(chunker).toHaveBeenCalledTimes(1);
+    expect(chunker).toHaveBeenNthCalledWith(1, text, 4000);
   });
 
   it("uses iMessage media maxBytes from agent fallback", async () => {