From 458e731f8b00909caa3989609445052df8c6d982 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 25 Jan 2026 04:05:14 +0000 Subject: [PATCH] fix: newline chunking across channels --- CHANGELOG.md | 1 + docs/channels/discord.md | 2 + docs/channels/imessage.md | 2 + docs/channels/matrix.md | 1 + docs/channels/msteams.md | 1 + docs/channels/nextcloud-talk.md | 1 + docs/channels/signal.md | 2 + docs/channels/slack.md | 1 + docs/channels/telegram.md | 2 + docs/channels/whatsapp.md | 3 +- docs/concepts/streaming.md | 1 + docs/gateway/configuration.md | 8 +- extensions/googlechat/src/channel.ts | 1 + extensions/googlechat/src/monitor.ts | 15 ++- extensions/imessage/src/channel.ts | 1 + extensions/matrix/src/config-schema.ts | 1 + .../matrix/src/matrix/monitor/replies.ts | 10 +- extensions/matrix/src/matrix/send.test.ts | 2 + extensions/matrix/src/matrix/send.ts | 7 +- extensions/matrix/src/outbound.ts | 1 + extensions/matrix/src/types.ts | 2 + extensions/mattermost/src/channel.ts | 1 + extensions/mattermost/src/config-schema.ts | 1 + .../mattermost/src/mattermost/monitor.ts | 7 +- extensions/mattermost/src/types.ts | 2 + extensions/msteams/src/messenger.test.ts | 21 ++-- extensions/msteams/src/messenger.ts | 16 ++- extensions/msteams/src/outbound.ts | 1 + extensions/msteams/src/reply-dispatcher.ts | 2 + extensions/nextcloud-talk/src/channel.ts | 1 + .../nextcloud-talk/src/config-schema.ts | 1 + extensions/nextcloud-talk/src/types.ts | 2 + extensions/signal/src/channel.ts | 1 + extensions/telegram/src/channel.ts | 1 + extensions/whatsapp/src/channel.ts | 1 + extensions/zalo/src/channel.ts | 1 + extensions/zalo/src/monitor.ts | 13 ++- extensions/zalouser/src/channel.ts | 1 + extensions/zalouser/src/monitor.ts | 14 ++- src/auto-reply/chunk.test.ts | 68 ++++++++--- src/auto-reply/chunk.ts | 107 +++++++++++++++--- src/auto-reply/reply/block-streaming.ts | 4 +- src/channels/plugins/outbound/imessage.ts | 1 + src/channels/plugins/outbound/signal.ts | 1 + src/channels/plugins/outbound/telegram.ts | 1 + src/channels/plugins/outbound/whatsapp.ts | 1 + src/channels/plugins/types.adapters.ts | 1 + src/config/types.discord.ts | 2 + src/config/types.googlechat.ts | 2 + src/config/types.imessage.ts | 2 + src/config/types.msteams.ts | 2 + src/config/types.signal.ts | 2 + src/config/types.slack.ts | 2 + src/config/types.telegram.ts | 2 + src/config/types.whatsapp.ts | 4 + src/config/zod-schema.providers-core.ts | 7 ++ src/config/zod-schema.providers-whatsapp.ts | 2 + src/discord/chunk.test.ts | 12 +- src/discord/chunk.ts | 27 +++++ .../monitor/message-handler.process.ts | 2 + src/discord/monitor/native-command.ts | 16 ++- src/discord/monitor/reply-delivery.ts | 12 +- src/discord/send.outbound.ts | 4 + src/discord/send.shared.ts | 24 +++- src/imessage/monitor/deliver.ts | 5 +- src/infra/outbound/deliver.test.ts | 78 +++++++++++++ src/infra/outbound/deliver.ts | 28 ++++- src/plugins/runtime/index.ts | 2 + src/plugins/runtime/types.ts | 3 + src/signal/monitor.ts | 11 +- src/slack/monitor/replies.ts | 16 ++- src/slack/monitor/slash.ts | 3 + src/slack/send.ts | 16 ++- src/telegram/bot-message-dispatch.ts | 3 + src/telegram/bot-native-commands.ts | 3 + src/telegram/bot/delivery.ts | 30 ++++- src/web/accounts.ts | 2 + src/web/auto-reply/deliver-reply.ts | 6 +- src/web/auto-reply/monitor.ts | 1 + src/web/auto-reply/monitor/process-message.ts | 4 +- 80 files changed, 580 insertions(+), 91 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d82acd4f..b805886a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Docs: https://docs.clawd.bot - Voice Call: return stream TwiML for outbound conversation calls on initial Twilio webhook. (#1634) - Google Chat: tighten email allowlist matching, typing cleanup, media caps, and onboarding/docs/tests. (#1635) Thanks @iHildy. - Google Chat: normalize space targets without double `spaces/` prefix. +- Messaging: keep newline chunking safe for fenced markdown blocks across channels. ## 2026.1.23-1 diff --git a/docs/channels/discord.md b/docs/channels/discord.md index ca6ff6c9c..f63fd45c9 100644 --- a/docs/channels/discord.md +++ b/docs/channels/discord.md @@ -205,6 +205,7 @@ Notes: ## Capabilities & limits - DMs and guild text channels (threads are treated as separate channels; voice not supported). - Typing indicators sent best-effort; message chunking uses `channels.discord.textChunkLimit` (default 2000) and splits tall replies by line count (`channels.discord.maxLinesPerMessage`, default 17). +- Optional newline chunking: set `channels.discord.chunkMode="newline"` to split on each line before length chunking. - File uploads supported up to the configured `channels.discord.mediaMaxMb` (default 8 MB). - Mention-gated guild replies by default to avoid noisy bots. - Reply context is injected when a message references another message (quoted content + ids). @@ -306,6 +307,7 @@ ack reaction after the bot replies. - `guilds..requireMention`: per-guild mention requirement (overridable per channel). - `guilds..reactionNotifications`: reaction system event mode (`off`, `own`, `all`, `allowlist`). - `textChunkLimit`: outbound text chunk size (chars). Default: 2000. +- `chunkMode`: `length` (default) splits only when exceeding `textChunkLimit`; `newline` splits on every newline before length chunking. - `maxLinesPerMessage`: soft max line count per message. Default: 17. - `mediaMaxMb`: clamp inbound media saved to disk. - `historyLimit`: number of recent guild messages to include as context when replying to a mention (default 20; falls back to `messages.groupChat.historyLimit`; `0` disables). diff --git a/docs/channels/imessage.md b/docs/channels/imessage.md index 83b54cf5a..316822dc5 100644 --- a/docs/channels/imessage.md +++ b/docs/channels/imessage.md @@ -219,6 +219,7 @@ This is useful when you want an isolated personality/model for a specific thread ## Limits - Outbound text is chunked to `channels.imessage.textChunkLimit` (default 4000). +- Optional newline chunking: set `channels.imessage.chunkMode="newline"` to split on each line before length chunking. - Media uploads are capped by `channels.imessage.mediaMaxMb` (default 16). ## Addressing / delivery targets @@ -253,6 +254,7 @@ Provider options: - `channels.imessage.includeAttachments`: ingest attachments into context. - `channels.imessage.mediaMaxMb`: inbound/outbound media cap (MB). - `channels.imessage.textChunkLimit`: outbound chunk size (chars). +- `channels.imessage.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking. Related global options: - `agents.list[].groupChat.mentionPatterns` (or `messages.groupChat.mentionPatterns`). diff --git a/docs/channels/matrix.md b/docs/channels/matrix.md index cafdacdf1..77a2989d5 100644 --- a/docs/channels/matrix.md +++ b/docs/channels/matrix.md @@ -215,6 +215,7 @@ Provider options: - `channels.matrix.initialSyncLimit`: initial sync limit. - `channels.matrix.threadReplies`: `off | inbound | always` (default: inbound). - `channels.matrix.textChunkLimit`: outbound text chunk size (chars). +- `channels.matrix.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking. - `channels.matrix.dm.policy`: `pairing | allowlist | open | disabled` (default: pairing). - `channels.matrix.dm.allowFrom`: DM allowlist (user IDs or display names). `open` requires `"*"`. The wizard resolves names to IDs when possible. - `channels.matrix.groupPolicy`: `allowlist | open | disabled` (default: allowlist). diff --git a/docs/channels/msteams.md b/docs/channels/msteams.md index 3315153e6..de3b064b2 100644 --- a/docs/channels/msteams.md +++ b/docs/channels/msteams.md @@ -415,6 +415,7 @@ Key settings (see `/gateway/configuration` for shared channel patterns): - `channels.msteams.dmPolicy`: `pairing | allowlist | open | disabled` (default: pairing) - `channels.msteams.allowFrom`: allowlist for DMs (AAD object IDs, UPNs, or display names). The wizard resolves names to IDs during setup when Graph access is available. - `channels.msteams.textChunkLimit`: outbound text chunk size. +- `channels.msteams.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking. - `channels.msteams.mediaAllowHosts`: allowlist for inbound attachment hosts (defaults to Microsoft/Teams domains). - `channels.msteams.requireMention`: require @mention in channels/groups (default true). - `channels.msteams.replyStyle`: `thread | top-level` (see [Reply Style](#reply-style-threads-vs-posts)). diff --git a/docs/channels/nextcloud-talk.md b/docs/channels/nextcloud-talk.md index 756b2fe30..43c1595ed 100644 --- a/docs/channels/nextcloud-talk.md +++ b/docs/channels/nextcloud-talk.md @@ -114,6 +114,7 @@ Provider options: - `channels.nextcloud-talk.dmHistoryLimit`: DM history limit (0 disables). - `channels.nextcloud-talk.dms`: per-DM overrides (historyLimit). - `channels.nextcloud-talk.textChunkLimit`: outbound text chunk size (chars). +- `channels.nextcloud-talk.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking. - `channels.nextcloud-talk.blockStreaming`: disable block streaming for this channel. - `channels.nextcloud-talk.blockStreamingCoalesce`: block streaming coalesce tuning. - `channels.nextcloud-talk.mediaMaxMb`: inbound media cap (MB). diff --git a/docs/channels/signal.md b/docs/channels/signal.md index 1456b6b2d..d1d17426b 100644 --- a/docs/channels/signal.md +++ b/docs/channels/signal.md @@ -95,6 +95,7 @@ Groups: ## Media + limits - Outbound text is chunked to `channels.signal.textChunkLimit` (default 4000). +- Optional newline chunking: set `channels.signal.chunkMode="newline"` to split on each line before length chunking. - Attachments supported (base64 fetched from `signal-cli`). - Default media cap: `channels.signal.mediaMaxMb` (default 8). - Use `channels.signal.ignoreAttachments` to skip downloading media. @@ -152,6 +153,7 @@ Provider options: - `channels.signal.historyLimit`: max group messages to include as context (0 disables). - `channels.signal.dmHistoryLimit`: DM history limit in user turns. Per-user overrides: `channels.signal.dms[""].historyLimit`. - `channels.signal.textChunkLimit`: outbound chunk size (chars). +- `channels.signal.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking. - `channels.signal.mediaMaxMb`: inbound/outbound media cap (MB). Related global options: diff --git a/docs/channels/slack.md b/docs/channels/slack.md index b112612e1..bf7bcbf49 100644 --- a/docs/channels/slack.md +++ b/docs/channels/slack.md @@ -349,6 +349,7 @@ ack reaction after the bot replies. ## Limits - Outbound text is chunked to `channels.slack.textChunkLimit` (default 4000). +- Optional newline chunking: set `channels.slack.chunkMode="newline"` to split on each line before length chunking. - Media uploads are capped by `channels.slack.mediaMaxMb` (default 20). ## Reply threading diff --git a/docs/channels/telegram.md b/docs/channels/telegram.md index da29b3c90..6251e5530 100644 --- a/docs/channels/telegram.md +++ b/docs/channels/telegram.md @@ -128,6 +128,7 @@ Notes: ## Limits - Outbound text is chunked to `channels.telegram.textChunkLimit` (default 4000). +- Optional newline chunking: set `channels.telegram.chunkMode="newline"` to split on each line before length chunking. - Media downloads/uploads are capped by `channels.telegram.mediaMaxMb` (default 5). - Telegram Bot API requests time out after `channels.telegram.timeoutSeconds` (default 500 via grammY). Set lower to avoid long hangs. - Group history context uses `channels.telegram.historyLimit` (or `channels.telegram.accounts.*.historyLimit`), falling back to `messages.groupChat.historyLimit`. Set `0` to disable (default 50). @@ -516,6 +517,7 @@ Provider options: - `channels.telegram.accounts..capabilities.inlineButtons`: per-account override. - `channels.telegram.replyToMode`: `off | first | all` (default: `first`). - `channels.telegram.textChunkLimit`: outbound chunk size (chars). +- `channels.telegram.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking. - `channels.telegram.streamMode`: `off | partial | block` (draft streaming). - `channels.telegram.mediaMaxMb`: inbound/outbound media cap (MB). - `channels.telegram.retry`: retry policy for outbound Telegram API calls (attempts, minDelayMs, maxDelayMs, jitter). diff --git a/docs/channels/whatsapp.md b/docs/channels/whatsapp.md index a496d1654..517c71b93 100644 --- a/docs/channels/whatsapp.md +++ b/docs/channels/whatsapp.md @@ -271,12 +271,13 @@ WhatsApp can automatically send emoji reactions to incoming messages immediately ## Limits - Outbound text is chunked to `channels.whatsapp.textChunkLimit` (default 4000). +- Optional newline chunking: set `channels.whatsapp.chunkMode="newline"` to split on each line before length chunking. - Inbound media saves are capped by `channels.whatsapp.mediaMaxMb` (default 50 MB). - Outbound media items are capped by `agents.defaults.mediaMaxMb` (default 5 MB). ## Outbound send (text + media) - Uses active web listener; error if gateway not running. -- Text chunking: 4k max per message (configurable via `channels.whatsapp.textChunkLimit`). +- Text chunking: 4k max per message (configurable via `channels.whatsapp.textChunkLimit`, optional `channels.whatsapp.chunkMode`). - Media: - Image/video/audio/document supported. - Audio sent as PTT; `audio/ogg` => `audio/ogg; codecs=opus`. diff --git a/docs/concepts/streaming.md b/docs/concepts/streaming.md index ae4fecc85..8019e4cca 100644 --- a/docs/concepts/streaming.md +++ b/docs/concepts/streaming.md @@ -38,6 +38,7 @@ Legend: - `agents.defaults.blockStreamingChunk`: `{ minChars, maxChars, breakPreference? }`. - `agents.defaults.blockStreamingCoalesce`: `{ minChars?, maxChars?, idleMs? }` (merge streamed blocks before send). - Channel hard cap: `*.textChunkLimit` (e.g., `channels.whatsapp.textChunkLimit`). +- Channel chunk mode: `*.chunkMode` (`length` default, `newline` splits on each line before length chunking). - Discord soft cap: `channels.discord.maxLinesPerMessage` (default 17) splits tall replies to avoid UI clipping. **Boundary semantics:** diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 1d4e95cb0..e9212e692 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -504,6 +504,7 @@ For groups, use `channels.whatsapp.groupPolicy` + `channels.whatsapp.groupAllowF dmPolicy: "pairing", // pairing | allowlist | open | disabled allowFrom: ["+15555550123", "+447700900123"], textChunkLimit: 4000, // optional outbound chunk size (chars) + chunkMode: "length", // optional chunking mode (length | newline) mediaMaxMb: 50 // optional inbound media cap (MB) } } @@ -1105,6 +1106,7 @@ Multi-account support lives under `channels.discord.accounts` (see the multi-acc }, historyLimit: 20, // include last N guild messages as context textChunkLimit: 2000, // optional outbound text chunk size (chars) + chunkMode: "length", // optional chunking mode (length | newline) maxLinesPerMessage: 17, // soft max lines per message (Discord UI clipping) retry: { // outbound retry policy attempts: 3, @@ -1125,7 +1127,7 @@ Reaction notification modes: - `own`: reactions on the bot's own messages (default). - `all`: all reactions on all messages. - `allowlist`: reactions from `guilds..users` on all messages (empty list disables). -Outbound text is chunked by `channels.discord.textChunkLimit` (default 2000). Discord clients can clip very tall messages, so `channels.discord.maxLinesPerMessage` (default 17) splits long multi-line replies even when under 2000 chars. +Outbound text is chunked by `channels.discord.textChunkLimit` (default 2000). Set `channels.discord.chunkMode="newline"` to split on line boundaries before length chunking. Discord clients can clip very tall messages, so `channels.discord.maxLinesPerMessage` (default 17) splits long multi-line replies even when under 2000 chars. Retry policy defaults and behavior are documented in [Retry policy](/concepts/retry). ### `channels.googlechat` (Chat API webhook) @@ -1218,6 +1220,7 @@ Slack runs in Socket Mode and requires both a bot token and app token: ephemeral: true }, textChunkLimit: 4000, + chunkMode: "length", mediaMaxMb: 20 } } @@ -1267,7 +1270,8 @@ Mattermost requires a bot token plus the base URL for your server: dmPolicy: "pairing", chatmode: "oncall", // oncall | onmessage | onchar oncharPrefixes: [">", "!"], - textChunkLimit: 4000 + textChunkLimit: 4000, + chunkMode: "length" } } } diff --git a/extensions/googlechat/src/channel.ts b/extensions/googlechat/src/channel.ts index dc8a27414..3abd3b264 100644 --- a/extensions/googlechat/src/channel.ts +++ b/extensions/googlechat/src/channel.ts @@ -374,6 +374,7 @@ export const googlechatPlugin: ChannelPlugin = { deliveryMode: "direct", chunker: (text, limit) => getGoogleChatRuntime().channel.text.chunkMarkdownText(text, limit), + chunkerMode: "markdown", textChunkLimit: 4000, resolveTarget: ({ to, allowFrom, mode }) => { const trimmed = to?.trim() ?? ""; diff --git a/extensions/googlechat/src/monitor.ts b/extensions/googlechat/src/monitor.ts index b94aa2e89..fee138807 100644 --- a/extensions/googlechat/src/monitor.ts +++ b/extensions/googlechat/src/monitor.ts @@ -684,6 +684,7 @@ async function processMessageWithPipeline(params: { spaceId, runtime, core, + config, statusSink, typingMessageName, }); @@ -725,10 +726,11 @@ async function deliverGoogleChatReply(params: { spaceId: string; runtime: GoogleChatRuntimeEnv; core: GoogleChatCoreRuntime; + config: ClawdbotConfig; statusSink?: (patch: { lastInboundAt?: number; lastOutboundAt?: number }) => void; typingMessageName?: string; }): Promise { - const { payload, account, spaceId, runtime, core, statusSink, typingMessageName } = params; + const { payload, account, spaceId, runtime, core, config, statusSink, typingMessageName } = params; const mediaList = payload.mediaUrls?.length ? payload.mediaUrls : payload.mediaUrl @@ -799,7 +801,16 @@ async function deliverGoogleChatReply(params: { if (payload.text) { const chunkLimit = account.config.textChunkLimit ?? 4000; - const chunks = core.channel.text.chunkMarkdownText(payload.text, chunkLimit); + const chunkMode = core.channel.text.resolveChunkMode( + config, + "googlechat", + account.accountId, + ); + const chunks = core.channel.text.chunkMarkdownTextWithMode( + payload.text, + chunkLimit, + chunkMode, + ); for (let i = 0; i < chunks.length; i++) { const chunk = chunks[i]; try { diff --git a/extensions/imessage/src/channel.ts b/extensions/imessage/src/channel.ts index d13341706..50615cd22 100644 --- a/extensions/imessage/src/channel.ts +++ b/extensions/imessage/src/channel.ts @@ -186,6 +186,7 @@ export const imessagePlugin: ChannelPlugin = { outbound: { deliveryMode: "direct", chunker: (text, limit) => getIMessageRuntime().channel.text.chunkText(text, limit), + chunkerMode: "text", textChunkLimit: 4000, sendText: async ({ cfg, to, text, accountId, deps }) => { const send = deps?.sendIMessage ?? getIMessageRuntime().channel.imessage.sendMessageIMessage; diff --git a/extensions/matrix/src/config-schema.ts b/extensions/matrix/src/config-schema.ts index b153ae40f..62b327d40 100644 --- a/extensions/matrix/src/config-schema.ts +++ b/extensions/matrix/src/config-schema.ts @@ -50,6 +50,7 @@ export const MatrixConfigSchema = z.object({ replyToMode: z.enum(["off", "first", "all"]).optional(), threadReplies: z.enum(["off", "inbound", "always"]).optional(), textChunkLimit: z.number().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), mediaMaxMb: z.number().optional(), autoJoin: z.enum(["always", "allowlist", "off"]).optional(), autoJoinAllowlist: z.array(allowFromEntry).optional(), diff --git a/extensions/matrix/src/matrix/monitor/replies.ts b/extensions/matrix/src/matrix/monitor/replies.ts index d2a6e34da..f79ef5926 100644 --- a/extensions/matrix/src/matrix/monitor/replies.ts +++ b/extensions/matrix/src/matrix/monitor/replies.ts @@ -16,10 +16,11 @@ export async function deliverMatrixReplies(params: { tableMode?: MarkdownTableMode; }): Promise { const core = getMatrixRuntime(); + const cfg = core.config.loadConfig(); const tableMode = params.tableMode ?? core.channel.text.resolveMarkdownTableMode({ - cfg: core.config.loadConfig(), + cfg, channel: "matrix", accountId: params.accountId, }); @@ -29,6 +30,7 @@ export async function deliverMatrixReplies(params: { } }; const chunkLimit = Math.min(params.textLimit, 4000); + const chunkMode = core.channel.text.resolveChunkMode(cfg, "matrix", params.accountId); let hasReplied = false; for (const reply of params.replies) { const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0; @@ -54,7 +56,11 @@ export async function deliverMatrixReplies(params: { Boolean(id) && (params.replyToMode === "all" || !hasReplied); if (mediaList.length === 0) { - for (const chunk of core.channel.text.chunkMarkdownText(text, chunkLimit)) { + for (const chunk of core.channel.text.chunkMarkdownTextWithMode( + text, + chunkLimit, + chunkMode, + )) { const trimmed = chunk.trim(); if (!trimmed) continue; await sendMessageMatrix(params.roomId, trimmed, { diff --git a/extensions/matrix/src/matrix/send.test.ts b/extensions/matrix/src/matrix/send.test.ts index 2f0053ecf..c647eedb9 100644 --- a/extensions/matrix/src/matrix/send.test.ts +++ b/extensions/matrix/src/matrix/send.test.ts @@ -42,7 +42,9 @@ const runtimeStub = { channel: { text: { resolveTextChunkLimit: () => 4000, + resolveChunkMode: () => "length", chunkMarkdownText: (text: string) => (text ? [text] : []), + chunkMarkdownTextWithMode: (text: string) => (text ? [text] : []), resolveMarkdownTableMode: () => "code", convertMarkdownTables: (text: string) => text, }, diff --git a/extensions/matrix/src/matrix/send.ts b/extensions/matrix/src/matrix/send.ts index 79d20471c..264bd6429 100644 --- a/extensions/matrix/src/matrix/send.ts +++ b/extensions/matrix/src/matrix/send.ts @@ -61,7 +61,12 @@ export async function sendMessageMatrix( ); const textLimit = getCore().channel.text.resolveTextChunkLimit(cfg, "matrix"); const chunkLimit = Math.min(textLimit, MATRIX_TEXT_LIMIT); - const chunks = getCore().channel.text.chunkMarkdownText(convertedMessage, chunkLimit); + const chunkMode = getCore().channel.text.resolveChunkMode(cfg, "matrix", opts.accountId); + const chunks = getCore().channel.text.chunkMarkdownTextWithMode( + convertedMessage, + chunkLimit, + chunkMode, + ); const threadId = normalizeThreadId(opts.threadId); const relation = threadId ? buildThreadRelation(threadId, opts.replyToId) diff --git a/extensions/matrix/src/outbound.ts b/extensions/matrix/src/outbound.ts index efcc337f2..fd30e3ded 100644 --- a/extensions/matrix/src/outbound.ts +++ b/extensions/matrix/src/outbound.ts @@ -6,6 +6,7 @@ import { sendMessageMatrix, sendPollMatrix } from "./matrix/send.js"; export const matrixOutbound: ChannelOutboundAdapter = { deliveryMode: "direct", chunker: (text, limit) => getMatrixRuntime().channel.text.chunkMarkdownText(text, limit), + chunkerMode: "markdown", textChunkLimit: 4000, sendText: async ({ to, text, deps, replyToId, threadId }) => { const send = deps?.sendMatrix ?? sendMessageMatrix; diff --git a/extensions/matrix/src/types.ts b/extensions/matrix/src/types.ts index b7ff7facd..f44f1074d 100644 --- a/extensions/matrix/src/types.ts +++ b/extensions/matrix/src/types.ts @@ -69,6 +69,8 @@ export type MatrixConfig = { threadReplies?: "off" | "inbound" | "always"; /** Outbound text chunk size (chars). Default: 4000. */ textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; /** Max outbound media size in MB. */ mediaMaxMb?: number; /** Auto-join invites (always|allowlist|off). Default: always. */ diff --git a/extensions/mattermost/src/channel.ts b/extensions/mattermost/src/channel.ts index 5d0837423..e12931883 100644 --- a/extensions/mattermost/src/channel.ts +++ b/extensions/mattermost/src/channel.ts @@ -158,6 +158,7 @@ export const mattermostPlugin: ChannelPlugin = { outbound: { deliveryMode: "direct", chunker: (text, limit) => getMattermostRuntime().channel.text.chunkMarkdownText(text, limit), + chunkerMode: "markdown", textChunkLimit: 4000, resolveTarget: ({ to }) => { const trimmed = to?.trim(); diff --git a/extensions/mattermost/src/config-schema.ts b/extensions/mattermost/src/config-schema.ts index 40ae8a31a..2a1b76248 100644 --- a/extensions/mattermost/src/config-schema.ts +++ b/extensions/mattermost/src/config-schema.ts @@ -25,6 +25,7 @@ const MattermostAccountSchemaBase = z groupAllowFrom: z.array(z.union([z.string(), z.number()])).optional(), groupPolicy: GroupPolicySchema.optional().default("allowlist"), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), blockStreaming: z.boolean().optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), }) diff --git a/extensions/mattermost/src/mattermost/monitor.ts b/extensions/mattermost/src/mattermost/monitor.ts index 659ca83aa..96c5971a5 100644 --- a/extensions/mattermost/src/mattermost/monitor.ts +++ b/extensions/mattermost/src/mattermost/monitor.ts @@ -738,7 +738,12 @@ export async function monitorMattermostProvider(opts: MonitorMattermostOpts = {} const mediaUrls = payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []); const text = core.channel.text.convertMarkdownTables(payload.text ?? "", tableMode); if (mediaUrls.length === 0) { - const chunks = core.channel.text.chunkMarkdownText(text, textLimit); + const chunkMode = core.channel.text.resolveChunkMode( + cfg, + "mattermost", + account.accountId, + ); + const chunks = core.channel.text.chunkMarkdownTextWithMode(text, textLimit, chunkMode); for (const chunk of chunks.length > 0 ? chunks : [text]) { if (!chunk) continue; await sendMessageMattermost(to, chunk, { diff --git a/extensions/mattermost/src/types.ts b/extensions/mattermost/src/types.ts index a80196142..3348f3cca 100644 --- a/extensions/mattermost/src/types.ts +++ b/extensions/mattermost/src/types.ts @@ -36,6 +36,8 @@ export type MattermostAccountConfig = { groupPolicy?: GroupPolicy; /** Outbound text chunk size (chars). Default: 4000. */ textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; /** Disable block streaming for this account. */ blockStreaming?: boolean; /** Merge streamed block replies before sending. */ diff --git a/extensions/msteams/src/messenger.test.ts b/extensions/msteams/src/messenger.test.ts index 9fbd628c5..681dade29 100644 --- a/extensions/msteams/src/messenger.test.ts +++ b/extensions/msteams/src/messenger.test.ts @@ -9,18 +9,21 @@ import { } from "./messenger.js"; import { setMSTeamsRuntime } from "./runtime.js"; +const chunkMarkdownText = (text: string, limit: number) => { + if (!text) return []; + if (limit <= 0 || text.length <= limit) return [text]; + const chunks: string[] = []; + for (let index = 0; index < text.length; index += limit) { + chunks.push(text.slice(index, index + limit)); + } + return chunks; +}; + const runtimeStub = { channel: { text: { - chunkMarkdownText: (text: string, limit: number) => { - if (!text) return []; - if (limit <= 0 || text.length <= limit) return [text]; - const chunks: string[] = []; - for (let index = 0; index < text.length; index += limit) { - chunks.push(text.slice(index, index + limit)); - } - return chunks; - }, + chunkMarkdownText, + chunkMarkdownTextWithMode: chunkMarkdownText, resolveMarkdownTableMode: () => "code", convertMarkdownTables: (text: string) => text, }, diff --git a/extensions/msteams/src/messenger.ts b/extensions/msteams/src/messenger.ts index a5eb99b73..f19cde27b 100644 --- a/extensions/msteams/src/messenger.ts +++ b/extensions/msteams/src/messenger.ts @@ -1,4 +1,5 @@ import { + type ChunkMode, isSilentReplyText, loadWebMedia, type MarkdownTableMode, @@ -63,6 +64,7 @@ export type MSTeamsReplyRenderOptions = { chunkText?: boolean; mediaMode?: "split" | "inline"; tableMode?: MarkdownTableMode; + chunkMode?: ChunkMode; }; /** @@ -129,11 +131,16 @@ function pushTextMessages( opts: { chunkText: boolean; chunkLimit: number; + chunkMode: ChunkMode; }, ) { if (!text) return; if (opts.chunkText) { - for (const chunk of getMSTeamsRuntime().channel.text.chunkMarkdownText(text, opts.chunkLimit)) { + for (const chunk of getMSTeamsRuntime().channel.text.chunkMarkdownTextWithMode( + text, + opts.chunkLimit, + opts.chunkMode, + )) { const trimmed = chunk.trim(); if (!trimmed || isSilentReplyText(trimmed, SILENT_REPLY_TOKEN)) continue; out.push({ text: trimmed }); @@ -197,6 +204,7 @@ export function renderReplyPayloadsToMessages( const out: MSTeamsRenderedMessage[] = []; const chunkLimit = Math.min(options.textChunkLimit, 4000); const chunkText = options.chunkText !== false; + const chunkMode = options.chunkMode ?? "length"; const mediaMode = options.mediaMode ?? "split"; const tableMode = options.tableMode ?? @@ -215,7 +223,7 @@ export function renderReplyPayloadsToMessages( if (!text && mediaList.length === 0) continue; if (mediaList.length === 0) { - pushTextMessages(out, text, { chunkText, chunkLimit }); + pushTextMessages(out, text, { chunkText, chunkLimit, chunkMode }); continue; } @@ -229,13 +237,13 @@ export function renderReplyPayloadsToMessages( if (mediaList[i]) out.push({ mediaUrl: mediaList[i] }); } } else { - pushTextMessages(out, text, { chunkText, chunkLimit }); + pushTextMessages(out, text, { chunkText, chunkLimit, chunkMode }); } continue; } // mediaMode === "split" - pushTextMessages(out, text, { chunkText, chunkLimit }); + pushTextMessages(out, text, { chunkText, chunkLimit, chunkMode }); for (const mediaUrl of mediaList) { if (!mediaUrl) continue; out.push({ mediaUrl }); diff --git a/extensions/msteams/src/outbound.ts b/extensions/msteams/src/outbound.ts index 16fdd5c91..c3b0d37ee 100644 --- a/extensions/msteams/src/outbound.ts +++ b/extensions/msteams/src/outbound.ts @@ -7,6 +7,7 @@ import { sendMessageMSTeams, sendPollMSTeams } from "./send.js"; export const msteamsOutbound: ChannelOutboundAdapter = { deliveryMode: "direct", chunker: (text, limit) => getMSTeamsRuntime().channel.text.chunkMarkdownText(text, limit), + chunkerMode: "markdown", textChunkLimit: 4000, pollMaxOptions: 12, sendText: async ({ cfg, to, text, deps }) => { diff --git a/extensions/msteams/src/reply-dispatcher.ts b/extensions/msteams/src/reply-dispatcher.ts index 449a14fe2..c83867a65 100644 --- a/extensions/msteams/src/reply-dispatcher.ts +++ b/extensions/msteams/src/reply-dispatcher.ts @@ -59,6 +59,7 @@ export function createMSTeamsReplyDispatcher(params: { cfg: params.cfg, agentId: params.agentId, }); + const chunkMode = core.channel.text.resolveChunkMode(params.cfg, "msteams"); const { dispatcher, replyOptions, markDispatchIdle } = core.channel.reply.createReplyDispatcherWithTyping({ @@ -75,6 +76,7 @@ export function createMSTeamsReplyDispatcher(params: { chunkText: true, mediaMode: "split", tableMode, + chunkMode, }); const mediaMaxBytes = resolveChannelMediaMaxBytes({ cfg: params.cfg, diff --git a/extensions/nextcloud-talk/src/channel.ts b/extensions/nextcloud-talk/src/channel.ts index a41b2a16f..471f2b64e 100644 --- a/extensions/nextcloud-talk/src/channel.ts +++ b/extensions/nextcloud-talk/src/channel.ts @@ -247,6 +247,7 @@ export const nextcloudTalkPlugin: ChannelPlugin = outbound: { deliveryMode: "direct", chunker: (text, limit) => getNextcloudTalkRuntime().channel.text.chunkMarkdownText(text, limit), + chunkerMode: "markdown", textChunkLimit: 4000, sendText: async ({ to, text, accountId, replyToId }) => { const result = await sendMessageNextcloudTalk(to, text, { diff --git a/extensions/nextcloud-talk/src/config-schema.ts b/extensions/nextcloud-talk/src/config-schema.ts index b047c7903..8eb5fa27b 100644 --- a/extensions/nextcloud-talk/src/config-schema.ts +++ b/extensions/nextcloud-talk/src/config-schema.ts @@ -44,6 +44,7 @@ export const NextcloudTalkAccountSchemaBase = z dmHistoryLimit: z.number().int().min(0).optional(), dms: z.record(z.string(), DmConfigSchema.optional()).optional(), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), blockStreaming: z.boolean().optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), mediaMaxMb: z.number().positive().optional(), diff --git a/extensions/nextcloud-talk/src/types.ts b/extensions/nextcloud-talk/src/types.ts index 18525ccab..2aa81a2cd 100644 --- a/extensions/nextcloud-talk/src/types.ts +++ b/extensions/nextcloud-talk/src/types.ts @@ -62,6 +62,8 @@ export type NextcloudTalkAccountConfig = { dms?: Record; /** Outbound text chunk size (chars). Default: 4000. */ textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; /** Disable block streaming for this account. */ blockStreaming?: boolean; /** Merge streamed block replies before sending. */ diff --git a/extensions/signal/src/channel.ts b/extensions/signal/src/channel.ts index dbd628f8d..e9a3e2955 100644 --- a/extensions/signal/src/channel.ts +++ b/extensions/signal/src/channel.ts @@ -207,6 +207,7 @@ export const signalPlugin: ChannelPlugin = { outbound: { deliveryMode: "direct", chunker: (text, limit) => getSignalRuntime().channel.text.chunkText(text, limit), + chunkerMode: "text", textChunkLimit: 4000, sendText: async ({ cfg, to, text, accountId, deps }) => { const send = deps?.sendSignal ?? getSignalRuntime().channel.signal.sendMessageSignal; diff --git a/extensions/telegram/src/channel.ts b/extensions/telegram/src/channel.ts index c0d018c83..76bc37ff8 100644 --- a/extensions/telegram/src/channel.ts +++ b/extensions/telegram/src/channel.ts @@ -251,6 +251,7 @@ export const telegramPlugin: ChannelPlugin = { outbound: { deliveryMode: "direct", chunker: (text, limit) => getTelegramRuntime().channel.text.chunkMarkdownText(text, limit), + chunkerMode: "markdown", textChunkLimit: 4000, sendText: async ({ to, text, accountId, deps, replyToId, threadId }) => { const send = diff --git a/extensions/whatsapp/src/channel.ts b/extensions/whatsapp/src/channel.ts index c9dd9da28..9d37fcf2a 100644 --- a/extensions/whatsapp/src/channel.ts +++ b/extensions/whatsapp/src/channel.ts @@ -276,6 +276,7 @@ export const whatsappPlugin: ChannelPlugin = { outbound: { deliveryMode: "gateway", chunker: (text, limit) => getWhatsAppRuntime().channel.text.chunkText(text, limit), + chunkerMode: "text", textChunkLimit: 4000, pollMaxOptions: 12, resolveTarget: ({ to, allowFrom, mode }) => { diff --git a/extensions/zalo/src/channel.ts b/extensions/zalo/src/channel.ts index b9eca12c6..5ed95dddc 100644 --- a/extensions/zalo/src/channel.ts +++ b/extensions/zalo/src/channel.ts @@ -288,6 +288,7 @@ export const zaloPlugin: ChannelPlugin = { if (remaining.length) chunks.push(remaining); return chunks; }, + chunkerMode: "text", textChunkLimit: 2000, sendText: async ({ to, text, accountId, cfg }) => { const result = await sendMessageZalo(to, text, { diff --git a/extensions/zalo/src/monitor.ts b/extensions/zalo/src/monitor.ts index 44a279354..e8526562e 100644 --- a/extensions/zalo/src/monitor.ts +++ b/extensions/zalo/src/monitor.ts @@ -596,6 +596,8 @@ async function processMessageWithPipeline(params: { chatId, runtime, core, + config, + accountId: account.accountId, statusSink, fetcher, tableMode, @@ -614,11 +616,13 @@ async function deliverZaloReply(params: { chatId: string; runtime: ZaloRuntimeEnv; core: ZaloCoreRuntime; + config: ClawdbotConfig; + accountId?: string; statusSink?: (patch: { lastInboundAt?: number; lastOutboundAt?: number }) => void; fetcher?: ZaloFetch; tableMode?: MarkdownTableMode; }): Promise { - const { payload, token, chatId, runtime, core, statusSink, fetcher } = params; + const { payload, token, chatId, runtime, core, config, accountId, statusSink, fetcher } = params; const tableMode = params.tableMode ?? "code"; const text = core.channel.text.convertMarkdownTables(payload.text ?? "", tableMode); @@ -644,7 +648,12 @@ async function deliverZaloReply(params: { } if (text) { - const chunks = core.channel.text.chunkMarkdownText(text, ZALO_TEXT_LIMIT); + const chunkMode = core.channel.text.resolveChunkMode(config, "zalo", accountId); + const chunks = core.channel.text.chunkMarkdownTextWithMode( + text, + ZALO_TEXT_LIMIT, + chunkMode, + ); for (const chunk of chunks) { try { await sendMessage(token, { chat_id: chatId, text: chunk }, fetcher); diff --git a/extensions/zalouser/src/channel.ts b/extensions/zalouser/src/channel.ts index eeb7b0299..6554d7874 100644 --- a/extensions/zalouser/src/channel.ts +++ b/extensions/zalouser/src/channel.ts @@ -506,6 +506,7 @@ export const zalouserPlugin: ChannelPlugin = { if (remaining.length) chunks.push(remaining); return chunks; }, + chunkerMode: "text", textChunkLimit: 2000, sendText: async ({ to, text, accountId, cfg }) => { const account = resolveZalouserAccountSync({ cfg: cfg as ClawdbotConfig, accountId }); diff --git a/extensions/zalouser/src/monitor.ts b/extensions/zalouser/src/monitor.ts index 97e5a4be3..01f085d0d 100644 --- a/extensions/zalouser/src/monitor.ts +++ b/extensions/zalouser/src/monitor.ts @@ -332,6 +332,8 @@ async function processMessage( isGroup, runtime, core, + config, + accountId: account.accountId, statusSink, tableMode: core.channel.text.resolveMarkdownTableMode({ cfg: config, @@ -356,10 +358,13 @@ async function deliverZalouserReply(params: { isGroup: boolean; runtime: RuntimeEnv; core: ZalouserCoreRuntime; + config: ClawdbotConfig; + accountId?: string; statusSink?: (patch: { lastInboundAt?: number; lastOutboundAt?: number }) => void; tableMode?: MarkdownTableMode; }): Promise { - const { payload, profile, chatId, isGroup, runtime, core, statusSink } = params; + const { payload, profile, chatId, isGroup, runtime, core, config, accountId, statusSink } = + params; const tableMode = params.tableMode ?? "code"; const text = core.channel.text.convertMarkdownTables(payload.text ?? "", tableMode); @@ -390,7 +395,12 @@ async function deliverZalouserReply(params: { } if (text) { - const chunks = core.channel.text.chunkMarkdownText(text, ZALOUSER_TEXT_LIMIT); + const chunkMode = core.channel.text.resolveChunkMode(config, "zalouser", accountId); + const chunks = core.channel.text.chunkMarkdownTextWithMode( + text, + ZALOUSER_TEXT_LIMIT, + chunkMode, + ); logVerbose(core, runtime, `Sending ${chunks.length} text chunk(s) to ${chatId}`); for (const chunk of chunks) { try { diff --git a/src/auto-reply/chunk.test.ts b/src/auto-reply/chunk.test.ts index 7a4b41d0e..01069d852 100644 --- a/src/auto-reply/chunk.test.ts +++ b/src/auto-reply/chunk.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest"; import { chunkByNewline, chunkMarkdownText, + chunkMarkdownTextWithMode, chunkText, chunkTextWithMode, resolveChunkMode, @@ -246,10 +247,10 @@ describe("chunkByNewline", () => { expect(chunks).toEqual(["Line one", "Line two", "Line three"]); }); - it("filters empty lines", () => { + it("preserves blank lines by folding into the next chunk", () => { const text = "Line one\n\n\nLine two\n\nLine three"; const chunks = chunkByNewline(text, 1000); - expect(chunks).toEqual(["Line one", "Line two", "Line three"]); + expect(chunks).toEqual(["Line one", "\n\nLine two", "\nLine three"]); }); it("trims whitespace from lines", () => { @@ -258,6 +259,12 @@ describe("chunkByNewline", () => { expect(chunks).toEqual(["Line one", "Line two"]); }); + it("preserves leading blank lines on the first chunk", () => { + const text = "\n\nLine one\nLine two"; + const chunks = chunkByNewline(text, 1000); + expect(chunks).toEqual(["\n\nLine one", "Line two"]); + }); + it("falls back to length-based for long lines", () => { const text = "Short line\n" + "a".repeat(50) + "\nAnother short"; const chunks = chunkByNewline(text, 20); @@ -269,6 +276,12 @@ describe("chunkByNewline", () => { expect(chunks[4]).toBe("Another short"); }); + it("does not split long lines when splitLongLines is false", () => { + const text = "a".repeat(50); + const chunks = chunkByNewline(text, 20, { splitLongLines: false }); + expect(chunks).toEqual([text]); + }); + it("returns empty array for empty input", () => { expect(chunkByNewline("", 100)).toEqual([]); }); @@ -276,6 +289,18 @@ describe("chunkByNewline", () => { it("returns empty array for whitespace-only input", () => { expect(chunkByNewline(" \n\n ", 100)).toEqual([]); }); + + it("preserves trailing blank lines on the last chunk", () => { + const text = "Line one\n\n"; + const chunks = chunkByNewline(text, 1000); + expect(chunks).toEqual(["Line one\n\n"]); + }); + + it("keeps whitespace when trimLines is false", () => { + const text = " indented line \nNext"; + const chunks = chunkByNewline(text, 1000, { trimLines: false }); + expect(chunks).toEqual([" indented line ", "Next"]); + }); }); describe("chunkTextWithMode", () => { @@ -292,6 +317,26 @@ describe("chunkTextWithMode", () => { }); }); +describe("chunkMarkdownTextWithMode", () => { + it("uses markdown-aware chunking for length mode", () => { + const text = "Line one\nLine two"; + expect(chunkMarkdownTextWithMode(text, 1000, "length")).toEqual(chunkMarkdownText(text, 1000)); + }); + + it("uses newline-based chunking for newline mode", () => { + const text = "Line one\nLine two"; + expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual(["Line one", "Line two"]); + }); + + it("does not split inside code fences for newline mode", () => { + const text = "```js\nconst a = 1;\nconst b = 2;\n```\nAfter"; + expect(chunkMarkdownTextWithMode(text, 1000, "newline")).toEqual([ + "```js\nconst a = 1;\nconst b = 2;\n```", + "After", + ]); + }); +}); + describe("resolveChunkMode", () => { it("returns length as default", () => { expect(resolveChunkMode(undefined, "telegram")).toBe("length"); @@ -304,16 +349,16 @@ describe("resolveChunkMode", () => { expect(resolveChunkMode(cfg, "__internal__")).toBe("length"); }); - it("supports provider-level overrides for bluebubbles", () => { - const cfg = { channels: { bluebubbles: { chunkMode: "newline" as const } } }; - expect(resolveChunkMode(cfg, "bluebubbles")).toBe("newline"); + it("supports provider-level overrides for slack", () => { + const cfg = { channels: { slack: { chunkMode: "newline" as const } } }; + expect(resolveChunkMode(cfg, "slack")).toBe("newline"); expect(resolveChunkMode(cfg, "discord")).toBe("length"); }); - it("supports account-level overrides for bluebubbles", () => { + it("supports account-level overrides for slack", () => { const cfg = { channels: { - bluebubbles: { + slack: { chunkMode: "length" as const, accounts: { primary: { chunkMode: "newline" as const }, @@ -321,12 +366,7 @@ describe("resolveChunkMode", () => { }, }, }; - expect(resolveChunkMode(cfg, "bluebubbles", "primary")).toBe("newline"); - expect(resolveChunkMode(cfg, "bluebubbles", "other")).toBe("length"); - }); - - it("ignores chunkMode for non-bluebubbles providers", () => { - const cfg = { channels: { ["telegram" as string]: { chunkMode: "newline" as const } } }; - expect(resolveChunkMode(cfg, "telegram")).toBe("length"); + expect(resolveChunkMode(cfg, "slack", "primary")).toBe("newline"); + expect(resolveChunkMode(cfg, "slack", "other")).toBe("length"); }); }); diff --git a/src/auto-reply/chunk.ts b/src/auto-reply/chunk.ts index 281612e37..c77c0cd9f 100644 --- a/src/auto-reply/chunk.ts +++ b/src/auto-reply/chunk.ts @@ -101,8 +101,6 @@ export function resolveChunkMode( accountId?: string | null, ): ChunkMode { if (!provider || provider === INTERNAL_MESSAGE_CHANNEL) return DEFAULT_CHUNK_MODE; - // Chunk mode is only supported for BlueBubbles. - if (provider !== "bluebubbles") return DEFAULT_CHUNK_MODE; const channelsConfig = cfg?.channels as Record | undefined; const providerConfig = (channelsConfig?.[provider] ?? (cfg as Record | undefined)?.[provider]) as ProviderChunkConfig | undefined; @@ -111,25 +109,56 @@ export function resolveChunkMode( } /** - * Split text on newlines, filtering empty lines. - * Lines exceeding maxLineLength are further split using length-based chunking. + * Split text on newlines, trimming line whitespace. + * Blank lines are folded into the next non-empty line as leading "\n" prefixes. + * Long lines can be split by length (default) or kept intact via splitLongLines:false. */ -export function chunkByNewline(text: string, maxLineLength: number): string[] { +export function chunkByNewline( + text: string, + maxLineLength: number, + opts?: { + splitLongLines?: boolean; + trimLines?: boolean; + isSafeBreak?: (index: number) => boolean; + }, +): string[] { if (!text) return []; - const lines = text.split("\n"); + if (maxLineLength <= 0) return text.trim() ? [text] : []; + const splitLongLines = opts?.splitLongLines !== false; + const trimLines = opts?.trimLines !== false; + const lines = splitByNewline(text, opts?.isSafeBreak); const chunks: string[] = []; + let pendingBlankLines = 0; for (const line of lines) { const trimmed = line.trim(); - if (!trimmed) continue; // skip empty lines - - if (trimmed.length <= maxLineLength) { - chunks.push(trimmed); - } else { - // Long line: fall back to length-based chunking - const subChunks = chunkText(trimmed, maxLineLength); - chunks.push(...subChunks); + if (!trimmed) { + pendingBlankLines += 1; + continue; } + + const maxPrefix = Math.max(0, maxLineLength - 1); + const cappedBlankLines = pendingBlankLines > 0 ? Math.min(pendingBlankLines, maxPrefix) : 0; + const prefix = cappedBlankLines > 0 ? "\n".repeat(cappedBlankLines) : ""; + pendingBlankLines = 0; + + const lineValue = trimLines ? trimmed : line; + if (!splitLongLines || lineValue.length + prefix.length <= maxLineLength) { + chunks.push(prefix + lineValue); + continue; + } + + const firstLimit = Math.max(1, maxLineLength - prefix.length); + const first = lineValue.slice(0, firstLimit); + chunks.push(prefix + first); + const remaining = lineValue.slice(firstLimit); + if (remaining) { + chunks.push(...chunkText(remaining, maxLineLength)); + } + } + + if (pendingBlankLines > 0 && chunks.length > 0) { + chunks[chunks.length - 1] += "\n".repeat(pendingBlankLines); } return chunks; @@ -140,11 +169,59 @@ export function chunkByNewline(text: string, maxLineLength: number): string[] { */ export function chunkTextWithMode(text: string, limit: number, mode: ChunkMode): string[] { if (mode === "newline") { - return chunkByNewline(text, limit); + const chunks: string[] = []; + const lineChunks = chunkByNewline(text, limit, { splitLongLines: false }); + for (const line of lineChunks) { + const nested = chunkText(line, limit); + if (!nested.length && line) { + chunks.push(line); + continue; + } + chunks.push(...nested); + } + return chunks; } return chunkText(text, limit); } +export function chunkMarkdownTextWithMode(text: string, limit: number, mode: ChunkMode): string[] { + if (mode === "newline") { + const spans = parseFenceSpans(text); + const chunks: string[] = []; + const lineChunks = chunkByNewline(text, limit, { + splitLongLines: false, + trimLines: false, + isSafeBreak: (index) => isSafeFenceBreak(spans, index), + }); + for (const line of lineChunks) { + const nested = chunkMarkdownText(line, limit); + if (!nested.length && line) { + chunks.push(line); + continue; + } + chunks.push(...nested); + } + return chunks; + } + return chunkMarkdownText(text, limit); +} + +function splitByNewline( + text: string, + isSafeBreak: (index: number) => boolean = () => true, +): string[] { + const lines: string[] = []; + let start = 0; + for (let i = 0; i < text.length; i++) { + if (text[i] === "\n" && isSafeBreak(i)) { + lines.push(text.slice(start, i)); + start = i + 1; + } + } + lines.push(text.slice(start)); + return lines; +} + export function chunkText(text: string, limit: number): string[] { if (!text) return []; if (limit <= 0) return [text]; diff --git a/src/auto-reply/reply/block-streaming.ts b/src/auto-reply/reply/block-streaming.ts index fb462b107..82fa85919 100644 --- a/src/auto-reply/reply/block-streaming.ts +++ b/src/auto-reply/reply/block-streaming.ts @@ -69,7 +69,7 @@ export function resolveBlockStreamingChunking( }); const chunkCfg = cfg?.agents?.defaults?.blockStreamingChunk; - // BlueBubbles-only: if chunkMode is "newline", use newline-based streaming + // When chunkMode is "newline", use newline-based streaming const channelChunkMode = resolveChunkMode(cfg, providerKey, accountId); if (channelChunkMode === "newline") { // For newline mode: use very low minChars to flush quickly on newlines @@ -103,7 +103,7 @@ export function resolveBlockStreamingCoalescing( ): BlockStreamingCoalescing | undefined { const providerKey = normalizeChunkProvider(provider); - // BlueBubbles-only: when chunkMode is "newline", disable coalescing to send each line immediately + // When chunkMode is "newline", disable coalescing to send each line immediately const channelChunkMode = resolveChunkMode(cfg, providerKey, accountId); if (channelChunkMode === "newline") { return undefined; diff --git a/src/channels/plugins/outbound/imessage.ts b/src/channels/plugins/outbound/imessage.ts index 3e415d6bb..03dd07222 100644 --- a/src/channels/plugins/outbound/imessage.ts +++ b/src/channels/plugins/outbound/imessage.ts @@ -6,6 +6,7 @@ import type { ChannelOutboundAdapter } from "../types.js"; export const imessageOutbound: ChannelOutboundAdapter = { deliveryMode: "direct", chunker: chunkText, + chunkerMode: "text", textChunkLimit: 4000, sendText: async ({ cfg, to, text, accountId, deps }) => { const send = deps?.sendIMessage ?? sendMessageIMessage; diff --git a/src/channels/plugins/outbound/signal.ts b/src/channels/plugins/outbound/signal.ts index 0939e7b6b..c2f0710cf 100644 --- a/src/channels/plugins/outbound/signal.ts +++ b/src/channels/plugins/outbound/signal.ts @@ -6,6 +6,7 @@ import type { ChannelOutboundAdapter } from "../types.js"; export const signalOutbound: ChannelOutboundAdapter = { deliveryMode: "direct", chunker: chunkText, + chunkerMode: "text", textChunkLimit: 4000, sendText: async ({ cfg, to, text, accountId, deps }) => { const send = deps?.sendSignal ?? sendMessageSignal; diff --git a/src/channels/plugins/outbound/telegram.ts b/src/channels/plugins/outbound/telegram.ts index 8cf4d6946..9b138705a 100644 --- a/src/channels/plugins/outbound/telegram.ts +++ b/src/channels/plugins/outbound/telegram.ts @@ -21,6 +21,7 @@ function parseThreadId(threadId?: string | number | null) { export const telegramOutbound: ChannelOutboundAdapter = { deliveryMode: "direct", chunker: markdownToTelegramHtmlChunks, + chunkerMode: "markdown", textChunkLimit: 4000, sendText: async ({ to, text, accountId, deps, replyToId, threadId }) => { const send = deps?.sendTelegram ?? sendMessageTelegram; diff --git a/src/channels/plugins/outbound/whatsapp.ts b/src/channels/plugins/outbound/whatsapp.ts index af4cb2ff1..303a015da 100644 --- a/src/channels/plugins/outbound/whatsapp.ts +++ b/src/channels/plugins/outbound/whatsapp.ts @@ -8,6 +8,7 @@ import { missingTargetError } from "../../../infra/outbound/target-errors.js"; export const whatsappOutbound: ChannelOutboundAdapter = { deliveryMode: "gateway", chunker: chunkText, + chunkerMode: "text", textChunkLimit: 4000, pollMaxOptions: 12, resolveTarget: ({ to, allowFrom, mode }) => { diff --git a/src/channels/plugins/types.adapters.ts b/src/channels/plugins/types.adapters.ts index ccd7009c5..982975d44 100644 --- a/src/channels/plugins/types.adapters.ts +++ b/src/channels/plugins/types.adapters.ts @@ -84,6 +84,7 @@ export type ChannelOutboundContext = { export type ChannelOutboundAdapter = { deliveryMode: "direct" | "gateway" | "hybrid"; chunker?: ((text: string, limit: number) => string[]) | null; + chunkerMode?: "text" | "markdown"; textChunkLimit?: number; pollMaxOptions?: number; resolveTarget?: (params: { diff --git a/src/config/types.discord.ts b/src/config/types.discord.ts index ae434dd15..071d6e6a7 100644 --- a/src/config/types.discord.ts +++ b/src/config/types.discord.ts @@ -108,6 +108,8 @@ export type DiscordAccountConfig = { groupPolicy?: GroupPolicy; /** Outbound text chunk size (chars). Default: 2000. */ textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; /** Disable block streaming for this account. */ blockStreaming?: boolean; /** Merge streamed block replies before sending. */ diff --git a/src/config/types.googlechat.ts b/src/config/types.googlechat.ts index 33233ef78..5fceff49e 100644 --- a/src/config/types.googlechat.ts +++ b/src/config/types.googlechat.ts @@ -78,6 +78,8 @@ export type GoogleChatAccountConfig = { dms?: Record; /** Outbound text chunk size (chars). Default: 4000. */ textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; blockStreaming?: boolean; /** Merge streamed block replies before sending. */ blockStreamingCoalesce?: BlockStreamingCoalesceConfig; diff --git a/src/config/types.imessage.ts b/src/config/types.imessage.ts index ca83c0fe0..88ceb02c1 100644 --- a/src/config/types.imessage.ts +++ b/src/config/types.imessage.ts @@ -54,6 +54,8 @@ export type IMessageAccountConfig = { mediaMaxMb?: number; /** Outbound text chunk size (chars). Default: 4000. */ textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; blockStreaming?: boolean; /** Merge streamed block replies before sending. */ blockStreamingCoalesce?: BlockStreamingCoalesceConfig; diff --git a/src/config/types.msteams.ts b/src/config/types.msteams.ts index 05e27527a..a8552c6eb 100644 --- a/src/config/types.msteams.ts +++ b/src/config/types.msteams.ts @@ -72,6 +72,8 @@ export type MSTeamsConfig = { groupPolicy?: GroupPolicy; /** Outbound text chunk size (chars). Default: 4000. */ textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; /** Merge streamed block replies before sending. */ blockStreamingCoalesce?: BlockStreamingCoalesceConfig; /** diff --git a/src/config/types.signal.ts b/src/config/types.signal.ts index 7cdaf0bf7..d5614bda3 100644 --- a/src/config/types.signal.ts +++ b/src/config/types.signal.ts @@ -57,6 +57,8 @@ export type SignalAccountConfig = { dms?: Record; /** Outbound text chunk size (chars). Default: 4000. */ textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; blockStreaming?: boolean; /** Merge streamed block replies before sending. */ blockStreamingCoalesce?: BlockStreamingCoalesceConfig; diff --git a/src/config/types.slack.ts b/src/config/types.slack.ts index 0662bf36f..564248503 100644 --- a/src/config/types.slack.ts +++ b/src/config/types.slack.ts @@ -116,6 +116,8 @@ export type SlackAccountConfig = { /** Per-DM config overrides keyed by user ID. */ dms?: Record; textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; blockStreaming?: boolean; /** Merge streamed block replies before sending. */ blockStreamingCoalesce?: BlockStreamingCoalesceConfig; diff --git a/src/config/types.telegram.ts b/src/config/types.telegram.ts index 1ef7e7387..9fa51be9c 100644 --- a/src/config/types.telegram.ts +++ b/src/config/types.telegram.ts @@ -80,6 +80,8 @@ export type TelegramAccountConfig = { dms?: Record; /** Outbound text chunk size (chars). Default: 4000. */ textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; /** Disable block streaming for this account. */ blockStreaming?: boolean; /** Chunking config for draft streaming in `streamMode: "block"`. */ diff --git a/src/config/types.whatsapp.ts b/src/config/types.whatsapp.ts index ce1851ea0..84d7379fd 100644 --- a/src/config/types.whatsapp.ts +++ b/src/config/types.whatsapp.ts @@ -55,6 +55,8 @@ export type WhatsAppConfig = { dms?: Record; /** Outbound text chunk size (chars). Default: 4000. */ textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; /** Maximum media file size in MB. Default: 50. */ mediaMaxMb?: number; /** Disable block streaming for this account. */ @@ -122,6 +124,8 @@ export type WhatsAppAccountConfig = { /** Per-DM config overrides keyed by user ID. */ dms?: Record; textChunkLimit?: number; + /** Chunking mode: "length" (default) splits by size; "newline" splits on every newline. */ + chunkMode?: "length" | "newline"; mediaMaxMb?: number; blockStreaming?: boolean; /** Merge streamed block replies before sending. */ diff --git a/src/config/zod-schema.providers-core.ts b/src/config/zod-schema.providers-core.ts index 7489ad0d9..1f5860f45 100644 --- a/src/config/zod-schema.providers-core.ts +++ b/src/config/zod-schema.providers-core.ts @@ -102,6 +102,7 @@ export const TelegramAccountSchemaBase = z dmHistoryLimit: z.number().int().min(0).optional(), dms: z.record(z.string(), DmConfigSchema.optional()).optional(), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), blockStreaming: z.boolean().optional(), draftChunk: BlockStreamingChunkSchema.optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), @@ -212,6 +213,7 @@ export const DiscordAccountSchema = z dmHistoryLimit: z.number().int().min(0).optional(), dms: z.record(z.string(), DmConfigSchema.optional()).optional(), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), blockStreaming: z.boolean().optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), maxLinesPerMessage: z.number().int().positive().optional(), @@ -310,6 +312,7 @@ export const GoogleChatAccountSchema = z dmHistoryLimit: z.number().int().min(0).optional(), dms: z.record(z.string(), DmConfigSchema.optional()).optional(), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), blockStreaming: z.boolean().optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), mediaMaxMb: z.number().positive().optional(), @@ -401,6 +404,7 @@ export const SlackAccountSchema = z dmHistoryLimit: z.number().int().min(0).optional(), dms: z.record(z.string(), DmConfigSchema.optional()).optional(), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), blockStreaming: z.boolean().optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), mediaMaxMb: z.number().positive().optional(), @@ -494,6 +498,7 @@ export const SignalAccountSchemaBase = z dmHistoryLimit: z.number().int().min(0).optional(), dms: z.record(z.string(), DmConfigSchema.optional()).optional(), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), blockStreaming: z.boolean().optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), mediaMaxMb: z.number().int().positive().optional(), @@ -554,6 +559,7 @@ export const IMessageAccountSchemaBase = z includeAttachments: z.boolean().optional(), mediaMaxMb: z.number().int().positive().optional(), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), blockStreaming: z.boolean().optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), groups: z @@ -712,6 +718,7 @@ export const MSTeamsConfigSchema = z groupAllowFrom: z.array(z.string()).optional(), groupPolicy: GroupPolicySchema.optional().default("allowlist"), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), mediaAllowHosts: z.array(z.string()).optional(), requireMention: z.boolean().optional(), diff --git a/src/config/zod-schema.providers-whatsapp.ts b/src/config/zod-schema.providers-whatsapp.ts index 5a0d62379..7266f8bf6 100644 --- a/src/config/zod-schema.providers-whatsapp.ts +++ b/src/config/zod-schema.providers-whatsapp.ts @@ -30,6 +30,7 @@ export const WhatsAppAccountSchema = z dmHistoryLimit: z.number().int().min(0).optional(), dms: z.record(z.string(), DmConfigSchema.optional()).optional(), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), mediaMaxMb: z.number().int().positive().optional(), blockStreaming: z.boolean().optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), @@ -85,6 +86,7 @@ export const WhatsAppConfigSchema = z dmHistoryLimit: z.number().int().min(0).optional(), dms: z.record(z.string(), DmConfigSchema.optional()).optional(), textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), mediaMaxMb: z.number().int().positive().optional().default(50), blockStreaming: z.boolean().optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), diff --git a/src/discord/chunk.test.ts b/src/discord/chunk.test.ts index 069865273..f8e18e2b4 100644 --- a/src/discord/chunk.test.ts +++ b/src/discord/chunk.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from "vitest"; -import { chunkDiscordText } from "./chunk.js"; +import { chunkDiscordText, chunkDiscordTextWithMode } from "./chunk.js"; function countLines(text: string) { return text.split("\n").length; @@ -51,6 +51,16 @@ describe("chunkDiscordText", () => { expect(chunks.at(-1)).toContain("Done."); }); + it("keeps fenced blocks intact when chunkMode is newline", () => { + const text = "```js\nconst a = 1;\nconst b = 2;\n```\nAfter"; + const chunks = chunkDiscordTextWithMode(text, { + maxChars: 2000, + maxLines: 50, + chunkMode: "newline", + }); + expect(chunks).toEqual(["```js\nconst a = 1;\nconst b = 2;\n```", "After"]); + }); + it("reserves space for closing fences when chunking", () => { const body = "a".repeat(120); const text = `\`\`\`txt\n${body}\n\`\`\``; diff --git a/src/discord/chunk.ts b/src/discord/chunk.ts index c9bbb1f62..8f3980d0f 100644 --- a/src/discord/chunk.ts +++ b/src/discord/chunk.ts @@ -1,3 +1,5 @@ +import { chunkMarkdownTextWithMode, type ChunkMode } from "../auto-reply/chunk.js"; + export type ChunkDiscordTextOpts = { /** Max characters per Discord message. Default: 2000. */ maxChars?: number; @@ -178,6 +180,31 @@ export function chunkDiscordText(text: string, opts: ChunkDiscordTextOpts = {}): return rebalanceReasoningItalics(text, chunks); } +export function chunkDiscordTextWithMode( + text: string, + opts: ChunkDiscordTextOpts & { chunkMode?: ChunkMode }, +): string[] { + const chunkMode = opts.chunkMode ?? "length"; + if (chunkMode !== "newline") { + return chunkDiscordText(text, opts); + } + const lineChunks = chunkMarkdownTextWithMode( + text, + Math.max(1, Math.floor(opts.maxChars ?? DEFAULT_MAX_CHARS)), + "newline", + ); + const chunks: string[] = []; + for (const line of lineChunks) { + const nested = chunkDiscordText(line, opts); + if (!nested.length && line) { + chunks.push(line); + continue; + } + chunks.push(...nested); + } + return chunks; +} + // Keep italics intact for reasoning payloads that are wrapped once with `_…_`. // When Discord chunking splits the message, we close italics at the end of // each chunk and reopen at the start of the next so every chunk renders diff --git a/src/discord/monitor/message-handler.process.ts b/src/discord/monitor/message-handler.process.ts index 8ced7affc..c26718ecf 100644 --- a/src/discord/monitor/message-handler.process.ts +++ b/src/discord/monitor/message-handler.process.ts @@ -22,6 +22,7 @@ import { createReplyDispatcherWithTyping } from "../../auto-reply/reply/reply-di import type { ReplyPayload } from "../../auto-reply/types.js"; import { recordInboundSession } from "../../channels/session.js"; import { readSessionUpdatedAt, resolveStorePath } from "../../config/sessions.js"; +import { resolveChunkMode } from "../../auto-reply/chunk.js"; import { resolveMarkdownTableMode } from "../../config/markdown-tables.js"; import { danger, logVerbose, shouldLogVerbose } from "../../globals.js"; import { buildAgentSessionKey } from "../../routing/resolve-route.js"; @@ -346,6 +347,7 @@ export async function processDiscordMessage(ctx: DiscordMessagePreflightContext) textLimit, maxLinesPerMessage: discordConfig?.maxLinesPerMessage, tableMode, + chunkMode: resolveChunkMode(cfg, "discord", accountId), }); replyReference.markSent(); }, diff --git a/src/discord/monitor/native-command.ts b/src/discord/monitor/native-command.ts index 94ff20a2e..75c9b3b2b 100644 --- a/src/discord/monitor/native-command.ts +++ b/src/discord/monitor/native-command.ts @@ -12,7 +12,7 @@ import { import { ApplicationCommandOptionType, ButtonStyle } from "discord-api-types/v10"; import { resolveEffectiveMessagesConfig, resolveHumanDelayConfig } from "../../agents/identity.js"; -import { resolveTextChunkLimit } from "../../auto-reply/chunk.js"; +import { resolveChunkMode, resolveTextChunkLimit } from "../../auto-reply/chunk.js"; import { buildCommandTextFromArgs, findCommandByNativeName, @@ -40,7 +40,7 @@ import { } from "../../pairing/pairing-store.js"; import { resolveAgentRoute } from "../../routing/resolve-route.js"; import { loadWebMedia } from "../../web/media.js"; -import { chunkDiscordText } from "../chunk.js"; +import { chunkDiscordTextWithMode } from "../chunk.js"; import { resolveCommandAuthorizedFromAuthorizers } from "../../channels/command-gating.js"; import { allowListMatches, @@ -767,6 +767,7 @@ async function dispatchDiscordCommandInteraction(params: { }), maxLinesPerMessage: discordConfig?.maxLinesPerMessage, preferFollowUp: preferFollowUp || didReply, + chunkMode: resolveChunkMode(cfg, "discord", accountId), }); } catch (error) { if (isDiscordUnknownInteraction(error)) { @@ -797,8 +798,9 @@ async function deliverDiscordInteractionReply(params: { textLimit: number; maxLinesPerMessage?: number; preferFollowUp: boolean; + chunkMode: "length" | "newline"; }) { - const { interaction, payload, textLimit, maxLinesPerMessage, preferFollowUp } = params; + const { interaction, payload, textLimit, maxLinesPerMessage, preferFollowUp, chunkMode } = params; const mediaList = payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []); const text = payload.text ?? ""; @@ -838,10 +840,12 @@ async function deliverDiscordInteractionReply(params: { }; }), ); - const chunks = chunkDiscordText(text, { + const chunks = chunkDiscordTextWithMode(text, { maxChars: textLimit, maxLines: maxLinesPerMessage, + chunkMode, }); + if (!chunks.length && text) chunks.push(text); const caption = chunks[0] ?? ""; await sendMessage(caption, media); for (const chunk of chunks.slice(1)) { @@ -852,10 +856,12 @@ async function deliverDiscordInteractionReply(params: { } if (!text.trim()) return; - const chunks = chunkDiscordText(text, { + const chunks = chunkDiscordTextWithMode(text, { maxChars: textLimit, maxLines: maxLinesPerMessage, + chunkMode, }); + if (!chunks.length && text) chunks.push(text); for (const chunk of chunks) { if (!chunk.trim()) continue; await sendMessage(chunk); diff --git a/src/discord/monitor/reply-delivery.ts b/src/discord/monitor/reply-delivery.ts index f54efb1b9..3b63f8842 100644 --- a/src/discord/monitor/reply-delivery.ts +++ b/src/discord/monitor/reply-delivery.ts @@ -1,10 +1,11 @@ import type { RequestClient } from "@buape/carbon"; +import type { ChunkMode } from "../../auto-reply/chunk.js"; import type { ReplyPayload } from "../../auto-reply/types.js"; import type { MarkdownTableMode } from "../../config/types.base.js"; import { convertMarkdownTables } from "../../markdown/tables.js"; import type { RuntimeEnv } from "../../runtime.js"; -import { chunkDiscordText } from "../chunk.js"; +import { chunkDiscordTextWithMode } from "../chunk.js"; import { sendMessageDiscord } from "../send.js"; export async function deliverDiscordReply(params: { @@ -18,6 +19,7 @@ export async function deliverDiscordReply(params: { maxLinesPerMessage?: number; replyToId?: string; tableMode?: MarkdownTableMode; + chunkMode?: ChunkMode; }) { const chunkLimit = Math.min(params.textLimit, 2000); for (const payload of params.replies) { @@ -30,10 +32,14 @@ export async function deliverDiscordReply(params: { if (mediaList.length === 0) { let isFirstChunk = true; - for (const chunk of chunkDiscordText(text, { + const mode = params.chunkMode ?? "length"; + const chunks = chunkDiscordTextWithMode(text, { maxChars: chunkLimit, maxLines: params.maxLinesPerMessage, - })) { + chunkMode: mode, + }); + if (!chunks.length && text) chunks.push(text); + for (const chunk of chunks) { const trimmed = chunk.trim(); if (!trimmed) continue; await sendMessageDiscord(params.target, trimmed, { diff --git a/src/discord/send.outbound.ts b/src/discord/send.outbound.ts index 3c83f7b94..a47d0f4f1 100644 --- a/src/discord/send.outbound.ts +++ b/src/discord/send.outbound.ts @@ -1,5 +1,6 @@ import type { RequestClient } from "@buape/carbon"; import { Routes } from "discord-api-types/v10"; +import { resolveChunkMode } from "../auto-reply/chunk.js"; import { loadConfig } from "../config/config.js"; import { resolveMarkdownTableMode } from "../config/markdown-tables.js"; import { recordChannelActivity } from "../infra/channel-activity.js"; @@ -45,6 +46,7 @@ export async function sendMessageDiscord( channel: "discord", accountId: accountInfo.accountId, }); + const chunkMode = resolveChunkMode(cfg, "discord", accountInfo.accountId); const textWithTables = convertMarkdownTables(text ?? "", tableMode); const { token, rest, request } = createDiscordClient(opts, cfg); const recipient = parseRecipient(to); @@ -61,6 +63,7 @@ export async function sendMessageDiscord( request, accountInfo.config.maxLinesPerMessage, opts.embeds, + chunkMode, ); } else { result = await sendDiscordText( @@ -71,6 +74,7 @@ export async function sendMessageDiscord( request, accountInfo.config.maxLinesPerMessage, opts.embeds, + chunkMode, ); } } catch (err) { diff --git a/src/discord/send.shared.ts b/src/discord/send.shared.ts index 2961375ce..4919be29d 100644 --- a/src/discord/send.shared.ts +++ b/src/discord/send.shared.ts @@ -9,7 +9,8 @@ import { createDiscordRetryRunner, type RetryRunner } from "../infra/retry-polic import { normalizePollDurationHours, normalizePollInput, type PollInput } from "../polls.js"; import { loadWebMedia } from "../web/media.js"; import { resolveDiscordAccount } from "./accounts.js"; -import { chunkDiscordText } from "./chunk.js"; +import type { ChunkMode } from "../auto-reply/chunk.js"; +import { chunkDiscordTextWithMode } from "./chunk.js"; import { fetchChannelPermissionsDiscord, isThreadChannelType } from "./send.permissions.js"; import { DiscordSendError } from "./send.types.js"; import { parseDiscordTarget } from "./targets.js"; @@ -231,15 +232,18 @@ async function sendDiscordText( request: DiscordRequest, maxLinesPerMessage?: number, embeds?: unknown[], + chunkMode?: ChunkMode, ) { if (!text.trim()) { throw new Error("Message must be non-empty for Discord sends"); } const messageReference = replyTo ? { message_id: replyTo, fail_if_not_exists: false } : undefined; - const chunks = chunkDiscordText(text, { + const chunks = chunkDiscordTextWithMode(text, { maxChars: DISCORD_TEXT_LIMIT, maxLines: maxLinesPerMessage, + chunkMode, }); + if (!chunks.length && text) chunks.push(text); if (chunks.length === 1) { const res = (await request( () => @@ -285,14 +289,17 @@ async function sendDiscordMedia( request: DiscordRequest, maxLinesPerMessage?: number, embeds?: unknown[], + chunkMode?: ChunkMode, ) { const media = await loadWebMedia(mediaUrl); const chunks = text - ? chunkDiscordText(text, { + ? chunkDiscordTextWithMode(text, { maxChars: DISCORD_TEXT_LIMIT, maxLines: maxLinesPerMessage, + chunkMode, }) : []; + if (!chunks.length && text) chunks.push(text); const caption = chunks[0] ?? ""; const messageReference = replyTo ? { message_id: replyTo, fail_if_not_exists: false } : undefined; const res = (await request( @@ -314,7 +321,16 @@ async function sendDiscordMedia( )) as { id: string; channel_id: string }; for (const chunk of chunks.slice(1)) { if (!chunk.trim()) continue; - await sendDiscordText(rest, channelId, chunk, undefined, request, maxLinesPerMessage); + await sendDiscordText( + rest, + channelId, + chunk, + undefined, + request, + maxLinesPerMessage, + undefined, + chunkMode, + ); } return res; } diff --git a/src/imessage/monitor/deliver.ts b/src/imessage/monitor/deliver.ts index aa3c6dbb1..c07bc2b08 100644 --- a/src/imessage/monitor/deliver.ts +++ b/src/imessage/monitor/deliver.ts @@ -1,4 +1,4 @@ -import { chunkText } from "../../auto-reply/chunk.js"; +import { chunkTextWithMode, resolveChunkMode } from "../../auto-reply/chunk.js"; import { loadConfig } from "../../config/config.js"; import { resolveMarkdownTableMode } from "../../config/markdown-tables.js"; import { convertMarkdownTables } from "../../markdown/tables.js"; @@ -23,13 +23,14 @@ export async function deliverReplies(params: { channel: "imessage", accountId, }); + const chunkMode = resolveChunkMode(cfg, "imessage", accountId); for (const payload of replies) { const mediaList = payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []); const rawText = payload.text ?? ""; const text = convertMarkdownTables(rawText, tableMode); if (!text && mediaList.length === 0) continue; if (mediaList.length === 0) { - for (const chunk of chunkText(text, textLimit)) { + for (const chunk of chunkTextWithMode(text, textLimit, chunkMode)) { await sendMessageIMessage(target, chunk, { maxBytes, client, diff --git a/src/infra/outbound/deliver.test.ts b/src/infra/outbound/deliver.test.ts index 5c38a242b..2e939dfda 100644 --- a/src/infra/outbound/deliver.test.ts +++ b/src/infra/outbound/deliver.test.ts @@ -168,6 +168,84 @@ describe("deliverOutboundPayloads", () => { expect(results.map((r) => r.messageId)).toEqual(["w1", "w2"]); }); + it("respects newline chunk mode for WhatsApp", async () => { + const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" }); + const cfg: ClawdbotConfig = { + channels: { whatsapp: { textChunkLimit: 4000, chunkMode: "newline" } }, + }; + + await deliverOutboundPayloads({ + cfg, + channel: "whatsapp", + to: "+1555", + payloads: [{ text: "Line one\n\nLine two" }], + deps: { sendWhatsApp }, + }); + + expect(sendWhatsApp).toHaveBeenCalledTimes(2); + expect(sendWhatsApp).toHaveBeenNthCalledWith( + 1, + "+1555", + "Line one", + expect.objectContaining({ verbose: false }), + ); + expect(sendWhatsApp).toHaveBeenNthCalledWith( + 2, + "+1555", + "\nLine two", + expect.objectContaining({ verbose: false }), + ); + }); + + it("preserves fenced blocks for markdown chunkers in newline mode", async () => { + const chunker = vi.fn((text: string) => (text ? [text] : [])); + const sendText = vi.fn().mockImplementation(async ({ text }: { text: string }) => ({ + channel: "matrix" as const, + messageId: text, + roomId: "r1", + })); + const sendMedia = vi.fn().mockImplementation(async ({ text }: { text: string }) => ({ + channel: "matrix" as const, + messageId: text, + roomId: "r1", + })); + setActivePluginRegistry( + createTestRegistry([ + { + pluginId: "matrix", + source: "test", + plugin: createOutboundTestPlugin({ + id: "matrix", + outbound: { + deliveryMode: "direct", + chunker, + chunkerMode: "markdown", + textChunkLimit: 4000, + sendText, + sendMedia, + }, + }), + }, + ]), + ); + + const cfg: ClawdbotConfig = { + channels: { matrix: { textChunkLimit: 4000, chunkMode: "newline" } }, + }; + const text = "```js\nconst a = 1;\nconst b = 2;\n```\nAfter"; + + await deliverOutboundPayloads({ + cfg, + channel: "matrix", + to: "!room", + payloads: [{ text }], + }); + + expect(chunker).toHaveBeenCalledTimes(2); + expect(chunker).toHaveBeenNthCalledWith(1, "```js\nconst a = 1;\nconst b = 2;\n```", 4000); + expect(chunker).toHaveBeenNthCalledWith(2, "After", 4000); + }); + it("uses iMessage media maxBytes from agent fallback", async () => { const sendIMessage = vi.fn().mockResolvedValue({ messageId: "i1" }); setActivePluginRegistry( diff --git a/src/infra/outbound/deliver.ts b/src/infra/outbound/deliver.ts index 2665e9957..6df384b52 100644 --- a/src/infra/outbound/deliver.ts +++ b/src/infra/outbound/deliver.ts @@ -1,4 +1,9 @@ -import { resolveTextChunkLimit } from "../../auto-reply/chunk.js"; +import { + chunkByNewline, + chunkMarkdownTextWithMode, + resolveChunkMode, + resolveTextChunkLimit, +} from "../../auto-reply/chunk.js"; import type { ReplyPayload } from "../../auto-reply/types.js"; import { resolveChannelMediaMaxBytes } from "../../channels/plugins/media-limits.js"; import { loadChannelOutboundAdapter } from "../../channels/plugins/outbound/load.js"; @@ -62,6 +67,7 @@ type Chunker = (text: string, limit: number) => string[]; type ChannelHandler = { chunker: Chunker | null; + chunkerMode?: "text" | "markdown"; textChunkLimit?: number; sendText: (text: string) => Promise; sendMedia: (caption: string, mediaUrl: string) => Promise; @@ -121,8 +127,10 @@ function createPluginHandler(params: { const sendText = outbound.sendText; const sendMedia = outbound.sendMedia; const chunker = outbound.chunker ?? null; + const chunkerMode = outbound.chunkerMode; return { chunker, + chunkerMode, textChunkLimit: outbound.textChunkLimit, sendText: async (text) => sendText({ @@ -192,6 +200,7 @@ export async function deliverOutboundPayloads(params: { fallbackLimit: handler.textChunkLimit, }) : undefined; + const chunkMode = handler.chunker ? resolveChunkMode(cfg, channel, accountId) : "length"; const isSignalChannel = channel === "signal"; const signalTableMode = isSignalChannel ? resolveMarkdownTableMode({ cfg, channel: "signal", accountId }) @@ -212,6 +221,23 @@ export async function deliverOutboundPayloads(params: { results.push(await handler.sendText(text)); return; } + if (chunkMode === "newline") { + const mode = handler.chunkerMode ?? "text"; + const lineChunks = + mode === "markdown" + ? chunkMarkdownTextWithMode(text, textLimit, "newline") + : chunkByNewline(text, textLimit, { splitLongLines: false }); + if (!lineChunks.length && text) lineChunks.push(text); + for (const lineChunk of lineChunks) { + const chunks = handler.chunker(lineChunk, textLimit); + if (!chunks.length && lineChunk) chunks.push(lineChunk); + for (const chunk of chunks) { + throwIfAborted(abortSignal); + results.push(await handler.sendText(chunk)); + } + } + return; + } const chunks = handler.chunker(text, textLimit); for (const chunk of chunks) { throwIfAborted(abortSignal); diff --git a/src/plugins/runtime/index.ts b/src/plugins/runtime/index.ts index dc2c65340..a807bcacf 100644 --- a/src/plugins/runtime/index.ts +++ b/src/plugins/runtime/index.ts @@ -3,6 +3,7 @@ import { createRequire } from "node:module"; import { chunkByNewline, chunkMarkdownText, + chunkMarkdownTextWithMode, chunkText, chunkTextWithMode, resolveChunkMode, @@ -170,6 +171,7 @@ export function createPluginRuntime(): PluginRuntime { text: { chunkByNewline, chunkMarkdownText, + chunkMarkdownTextWithMode, chunkText, chunkTextWithMode, resolveChunkMode, diff --git a/src/plugins/runtime/types.ts b/src/plugins/runtime/types.ts index 7df2bec81..027558681 100644 --- a/src/plugins/runtime/types.ts +++ b/src/plugins/runtime/types.ts @@ -37,6 +37,8 @@ type ResolveCommandAuthorizedFromAuthorizers = type ResolveTextChunkLimit = typeof import("../../auto-reply/chunk.js").resolveTextChunkLimit; type ResolveChunkMode = typeof import("../../auto-reply/chunk.js").resolveChunkMode; type ChunkMarkdownText = typeof import("../../auto-reply/chunk.js").chunkMarkdownText; +type ChunkMarkdownTextWithMode = + typeof import("../../auto-reply/chunk.js").chunkMarkdownTextWithMode; type ChunkText = typeof import("../../auto-reply/chunk.js").chunkText; type ChunkTextWithMode = typeof import("../../auto-reply/chunk.js").chunkTextWithMode; type ChunkByNewline = typeof import("../../auto-reply/chunk.js").chunkByNewline; @@ -180,6 +182,7 @@ export type PluginRuntime = { text: { chunkByNewline: ChunkByNewline; chunkMarkdownText: ChunkMarkdownText; + chunkMarkdownTextWithMode: ChunkMarkdownTextWithMode; chunkText: ChunkText; chunkTextWithMode: ChunkTextWithMode; resolveChunkMode: ResolveChunkMode; diff --git a/src/signal/monitor.ts b/src/signal/monitor.ts index e8f7570ab..4215a668f 100644 --- a/src/signal/monitor.ts +++ b/src/signal/monitor.ts @@ -1,4 +1,4 @@ -import { chunkText, resolveTextChunkLimit } from "../auto-reply/chunk.js"; +import { chunkTextWithMode, resolveChunkMode, resolveTextChunkLimit } from "../auto-reply/chunk.js"; import { DEFAULT_GROUP_HISTORY_LIMIT, type HistoryEntry } from "../auto-reply/reply/history.js"; import type { ReplyPayload } from "../auto-reply/types.js"; import type { ClawdbotConfig } from "../config/config.js"; @@ -214,14 +214,16 @@ async function deliverReplies(params: { runtime: RuntimeEnv; maxBytes: number; textLimit: number; + chunkMode: "length" | "newline"; }) { - const { replies, target, baseUrl, account, accountId, runtime, maxBytes, textLimit } = params; + const { replies, target, baseUrl, account, accountId, runtime, maxBytes, textLimit, chunkMode } = + params; for (const payload of replies) { const mediaList = payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []); const text = payload.text ?? ""; if (!text && mediaList.length === 0) continue; if (mediaList.length === 0) { - for (const chunk of chunkText(text, textLimit)) { + for (const chunk of chunkTextWithMode(text, textLimit, chunkMode)) { await sendMessageSignal(target, chunk, { baseUrl, account, @@ -262,6 +264,7 @@ export async function monitorSignalProvider(opts: MonitorSignalOpts = {}): Promi ); const groupHistories = new Map(); const textLimit = resolveTextChunkLimit(cfg, "signal", accountInfo.accountId); + const chunkMode = resolveChunkMode(cfg, "signal", accountInfo.accountId); const baseUrl = opts.baseUrl?.trim() || accountInfo.baseUrl; const account = opts.account?.trim() || accountInfo.config.account?.trim(); const dmPolicy = accountInfo.config.dmPolicy ?? "pairing"; @@ -340,7 +343,7 @@ export async function monitorSignalProvider(opts: MonitorSignalOpts = {}): Promi sendReadReceipts, readReceiptsViaDaemon, fetchAttachment, - deliverReplies, + deliverReplies: (params) => deliverReplies({ ...params, chunkMode }), resolveSignalReactionTargets, isSignalReactionMessage, shouldEmitSignalReactionNotification, diff --git a/src/slack/monitor/replies.ts b/src/slack/monitor/replies.ts index ca4635123..314be285f 100644 --- a/src/slack/monitor/replies.ts +++ b/src/slack/monitor/replies.ts @@ -1,5 +1,7 @@ import { createReplyReferencePlanner } from "../../auto-reply/reply/reply-reference.js"; import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../../auto-reply/tokens.js"; +import type { ChunkMode } from "../../auto-reply/chunk.js"; +import { chunkMarkdownTextWithMode } from "../../auto-reply/chunk.js"; import type { ReplyPayload } from "../../auto-reply/types.js"; import type { MarkdownTableMode } from "../../config/types.base.js"; import type { RuntimeEnv } from "../../runtime.js"; @@ -118,6 +120,7 @@ export async function deliverSlackSlashReplies(params: { ephemeral: boolean; textLimit: number; tableMode?: MarkdownTableMode; + chunkMode?: ChunkMode; }) { const messages: string[] = []; const chunkLimit = Math.min(params.textLimit, 4000); @@ -129,9 +132,16 @@ export async function deliverSlackSlashReplies(params: { .filter(Boolean) .join("\n"); if (!combined) continue; - for (const chunk of markdownToSlackMrkdwnChunks(combined, chunkLimit, { - tableMode: params.tableMode, - })) { + const chunkMode = params.chunkMode ?? "length"; + const markdownChunks = + chunkMode === "newline" + ? chunkMarkdownTextWithMode(combined, chunkLimit, chunkMode) + : [combined]; + const chunks = markdownChunks.flatMap((markdown) => + markdownToSlackMrkdwnChunks(markdown, chunkLimit, { tableMode: params.tableMode }), + ); + if (!chunks.length && combined) chunks.push(combined); + for (const chunk of chunks) { messages.push(chunk); } } diff --git a/src/slack/monitor/slash.ts b/src/slack/monitor/slash.ts index 32900d7a0..d1c2a00ca 100644 --- a/src/slack/monitor/slash.ts +++ b/src/slack/monitor/slash.ts @@ -1,5 +1,6 @@ import type { SlackActionMiddlewareArgs, SlackCommandMiddlewareArgs } from "@slack/bolt"; import type { ChatCommandDefinition, CommandArgs } from "../../auto-reply/commands-registry.js"; +import { resolveChunkMode } from "../../auto-reply/chunk.js"; import { resolveEffectiveMessagesConfig } from "../../agents/identity.js"; import { buildCommandTextFromArgs, @@ -429,6 +430,7 @@ export function registerSlackMonitorSlashCommands(params: { respond, ephemeral: slashCommand.ephemeral, textLimit: ctx.textLimit, + chunkMode: resolveChunkMode(cfg, "slack", route.accountId), tableMode: resolveMarkdownTableMode({ cfg, channel: "slack", @@ -448,6 +450,7 @@ export function registerSlackMonitorSlashCommands(params: { respond, ephemeral: slashCommand.ephemeral, textLimit: ctx.textLimit, + chunkMode: resolveChunkMode(cfg, "slack", route.accountId), tableMode: resolveMarkdownTableMode({ cfg, channel: "slack", diff --git a/src/slack/send.ts b/src/slack/send.ts index 3759b2826..31677278a 100644 --- a/src/slack/send.ts +++ b/src/slack/send.ts @@ -1,6 +1,10 @@ import { type FilesUploadV2Arguments, type WebClient } from "@slack/web-api"; -import { resolveTextChunkLimit } from "../auto-reply/chunk.js"; +import { + chunkMarkdownTextWithMode, + resolveChunkMode, + resolveTextChunkLimit, +} from "../auto-reply/chunk.js"; import { loadConfig } from "../config/config.js"; import { logVerbose } from "../globals.js"; import { loadWebMedia } from "../web/media.js"; @@ -149,7 +153,15 @@ export async function sendMessageSlack( channel: "slack", accountId: account.accountId, }); - const chunks = markdownToSlackMrkdwnChunks(trimmedMessage, chunkLimit, { tableMode }); + const chunkMode = resolveChunkMode(cfg, "slack", account.accountId); + const markdownChunks = + chunkMode === "newline" + ? chunkMarkdownTextWithMode(trimmedMessage, chunkLimit, chunkMode) + : [trimmedMessage]; + const chunks = markdownChunks.flatMap((markdown) => + markdownToSlackMrkdwnChunks(markdown, chunkLimit, { tableMode }), + ); + if (!chunks.length && trimmedMessage) chunks.push(trimmedMessage); const mediaMaxBytes = typeof account.config.mediaMaxMb === "number" ? account.config.mediaMaxMb * 1024 * 1024 diff --git a/src/telegram/bot-message-dispatch.ts b/src/telegram/bot-message-dispatch.ts index 98c5a6d40..474b6136c 100644 --- a/src/telegram/bot-message-dispatch.ts +++ b/src/telegram/bot-message-dispatch.ts @@ -1,5 +1,6 @@ // @ts-nocheck import { EmbeddedBlockChunker } from "../agents/pi-embedded-block-chunker.js"; +import { resolveChunkMode } from "../auto-reply/chunk.js"; import { clearHistoryEntriesIfEnabled } from "../auto-reply/reply/history.js"; import { dispatchReplyWithBufferedBlockDispatcher } from "../auto-reply/reply/provider-dispatcher.js"; import { removeAckReactionAfterReply } from "../channels/ack-reactions.js"; @@ -125,6 +126,7 @@ export const dispatchTelegramMessage = async ({ channel: "telegram", accountId: route.accountId, }); + const chunkMode = resolveChunkMode(cfg, "telegram", route.accountId); const { queuedFinal } = await dispatchReplyWithBufferedBlockDispatcher({ ctx: ctxPayload, @@ -147,6 +149,7 @@ export const dispatchTelegramMessage = async ({ textLimit, messageThreadId: resolvedThreadId, tableMode, + chunkMode, onVoiceRecording: sendRecordVoice, }); }, diff --git a/src/telegram/bot-native-commands.ts b/src/telegram/bot-native-commands.ts index 17952ac83..a983452ba 100644 --- a/src/telegram/bot-native-commands.ts +++ b/src/telegram/bot-native-commands.ts @@ -1,6 +1,7 @@ import type { Bot, Context } from "grammy"; import { resolveEffectiveMessagesConfig } from "../agents/identity.js"; +import { resolveChunkMode } from "../auto-reply/chunk.js"; import { buildCommandTextFromArgs, findCommandByNativeName, @@ -320,6 +321,7 @@ export const registerTelegramNativeCommands = ({ typeof telegramCfg.blockStreaming === "boolean" ? !telegramCfg.blockStreaming : undefined; + const chunkMode = resolveChunkMode(cfg, "telegram", route.accountId); await dispatchReplyWithBufferedBlockDispatcher({ ctx: ctxPayload, @@ -337,6 +339,7 @@ export const registerTelegramNativeCommands = ({ textLimit, messageThreadId: resolvedThreadId, tableMode, + chunkMode, }); }, onError: (err, info) => { diff --git a/src/telegram/bot/delivery.ts b/src/telegram/bot/delivery.ts index 653474d50..b0b296fa6 100644 --- a/src/telegram/bot/delivery.ts +++ b/src/telegram/bot/delivery.ts @@ -4,6 +4,7 @@ import { markdownToTelegramHtml, renderTelegramHtmlText, } from "../format.js"; +import { chunkMarkdownTextWithMode, type ChunkMode } from "../../auto-reply/chunk.js"; import { splitTelegramCaption } from "../caption.js"; import type { ReplyPayload } from "../../auto-reply/types.js"; import type { ReplyToMode } from "../../config/config.js"; @@ -32,12 +33,33 @@ export async function deliverReplies(params: { textLimit: number; messageThreadId?: number; tableMode?: MarkdownTableMode; + chunkMode?: ChunkMode; /** Callback invoked before sending a voice message to switch typing indicator. */ onVoiceRecording?: () => Promise | void; }) { const { replies, chatId, runtime, bot, replyToMode, textLimit, messageThreadId } = params; + const chunkMode = params.chunkMode ?? "length"; const threadParams = buildTelegramThreadParams(messageThreadId); let hasReplied = false; + const chunkText = (markdown: string) => { + const markdownChunks = + chunkMode === "newline" + ? chunkMarkdownTextWithMode(markdown, textLimit, chunkMode) + : [markdown]; + const chunks: ReturnType = []; + for (const chunk of markdownChunks) { + const nested = markdownToTelegramChunks(chunk, textLimit, { tableMode: params.tableMode }); + if (!nested.length && chunk) { + chunks.push({ + html: markdownToTelegramHtml(chunk, { tableMode: params.tableMode }), + text: chunk, + }); + continue; + } + chunks.push(...nested); + } + return chunks; + }; for (const reply of replies) { const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0; if (!reply?.text && !hasMedia) { @@ -55,9 +77,7 @@ export async function deliverReplies(params: { ? [reply.mediaUrl] : []; if (mediaList.length === 0) { - const chunks = markdownToTelegramChunks(reply.text || "", textLimit, { - tableMode: params.tableMode, - }); + const chunks = chunkText(reply.text || ""); for (const chunk of chunks) { await sendTelegramText(bot, chatId, chunk.html, runtime, { replyToMessageId: @@ -151,9 +171,7 @@ export async function deliverReplies(params: { // Send deferred follow-up text right after the first media item. // Chunk it in case it's extremely long (same logic as text-only replies). if (pendingFollowUpText && isFirstMedia) { - const chunks = markdownToTelegramChunks(pendingFollowUpText, textLimit, { - tableMode: params.tableMode, - }); + const chunks = chunkText(pendingFollowUpText); for (const chunk of chunks) { const replyToMessageIdFollowup = replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined; diff --git a/src/web/accounts.ts b/src/web/accounts.ts index 7a4467390..3af151526 100644 --- a/src/web/accounts.ts +++ b/src/web/accounts.ts @@ -22,6 +22,7 @@ export type ResolvedWhatsAppAccount = { groupPolicy?: GroupPolicy; dmPolicy?: DmPolicy; textChunkLimit?: number; + chunkMode?: "length" | "newline"; mediaMaxMb?: number; blockStreaming?: boolean; ackReaction?: WhatsAppAccountConfig["ackReaction"]; @@ -150,6 +151,7 @@ export function resolveWhatsAppAccount(params: { groupAllowFrom: accountCfg?.groupAllowFrom ?? rootCfg?.groupAllowFrom, groupPolicy: accountCfg?.groupPolicy ?? rootCfg?.groupPolicy, textChunkLimit: accountCfg?.textChunkLimit ?? rootCfg?.textChunkLimit, + chunkMode: accountCfg?.chunkMode ?? rootCfg?.chunkMode, mediaMaxMb: accountCfg?.mediaMaxMb ?? rootCfg?.mediaMaxMb, blockStreaming: accountCfg?.blockStreaming ?? rootCfg?.blockStreaming, ackReaction: accountCfg?.ackReaction ?? rootCfg?.ackReaction, diff --git a/src/web/auto-reply/deliver-reply.ts b/src/web/auto-reply/deliver-reply.ts index 2204a9e8f..9b2bb8072 100644 --- a/src/web/auto-reply/deliver-reply.ts +++ b/src/web/auto-reply/deliver-reply.ts @@ -1,4 +1,4 @@ -import { chunkMarkdownText } from "../../auto-reply/chunk.js"; +import { chunkMarkdownTextWithMode, type ChunkMode } from "../../auto-reply/chunk.js"; import type { MarkdownTableMode } from "../../config/types.base.js"; import { convertMarkdownTables } from "../../markdown/tables.js"; import type { ReplyPayload } from "../../auto-reply/types.js"; @@ -15,6 +15,7 @@ export async function deliverWebReply(params: { msg: WebInboundMsg; maxMediaBytes: number; textLimit: number; + chunkMode?: ChunkMode; replyLogger: { info: (obj: unknown, msg: string) => void; warn: (obj: unknown, msg: string) => void; @@ -26,8 +27,9 @@ export async function deliverWebReply(params: { const { replyResult, msg, maxMediaBytes, textLimit, replyLogger, connectionId, skipLog } = params; const replyStarted = Date.now(); const tableMode = params.tableMode ?? "code"; + const chunkMode = params.chunkMode ?? "length"; const convertedText = convertMarkdownTables(replyResult.text || "", tableMode); - const textChunks = chunkMarkdownText(convertedText, textLimit); + const textChunks = chunkMarkdownTextWithMode(convertedText, textLimit, chunkMode); const mediaList = replyResult.mediaUrls?.length ? replyResult.mediaUrls : replyResult.mediaUrl diff --git a/src/web/auto-reply/monitor.ts b/src/web/auto-reply/monitor.ts index bd0f8d201..f086f5c82 100644 --- a/src/web/auto-reply/monitor.ts +++ b/src/web/auto-reply/monitor.ts @@ -79,6 +79,7 @@ export async function monitorWebChannel( groupAllowFrom: account.groupAllowFrom, groupPolicy: account.groupPolicy, textChunkLimit: account.textChunkLimit, + chunkMode: account.chunkMode, mediaMaxMb: account.mediaMaxMb, blockStreaming: account.blockStreaming, groups: account.groups, diff --git a/src/web/auto-reply/monitor/process-message.ts b/src/web/auto-reply/monitor/process-message.ts index 57ad5448f..a10b07bcd 100644 --- a/src/web/auto-reply/monitor/process-message.ts +++ b/src/web/auto-reply/monitor/process-message.ts @@ -1,5 +1,5 @@ import { resolveIdentityNamePrefix } from "../../../agents/identity.js"; -import { resolveTextChunkLimit } from "../../../auto-reply/chunk.js"; +import { resolveChunkMode, resolveTextChunkLimit } from "../../../auto-reply/chunk.js"; import { formatInboundEnvelope, resolveEnvelopeFormatOptions, @@ -229,6 +229,7 @@ export async function processMessage(params: { : undefined; const textLimit = params.maxMediaTextChunkLimit ?? resolveTextChunkLimit(params.cfg, "whatsapp"); + const chunkMode = resolveChunkMode(params.cfg, "whatsapp", params.route.accountId); const tableMode = resolveMarkdownTableMode({ cfg: params.cfg, channel: "whatsapp", @@ -338,6 +339,7 @@ export async function processMessage(params: { msg: params.msg, maxMediaBytes: params.maxMediaBytes, textLimit, + chunkMode, replyLogger: params.replyLogger, connectionId: params.connectionId, // Tool + block updates are noisy; skip their log lines.