// Utilities for splitting outbound text into platform-sized chunks without // unintentionally breaking on newlines. Using [\s\S] keeps newlines inside // the chunk so messages are only split when they truly exceed the limit. import type { ClawdbotConfig } from "../config/config.js"; import { findFenceSpanAt, isSafeFenceBreak, parseFenceSpans, } from "../markdown/fences.js"; export type TextChunkProvider = | "whatsapp" | "telegram" | "discord" | "slack" | "signal" | "imessage" | "webchat"; const DEFAULT_CHUNK_LIMIT_BY_PROVIDER: Record = { whatsapp: 4000, telegram: 4000, discord: 2000, slack: 4000, signal: 4000, imessage: 4000, webchat: 4000, }; export function resolveTextChunkLimit( cfg: ClawdbotConfig | undefined, provider?: TextChunkProvider, ): number { const providerOverride = (() => { if (!provider) return undefined; if (provider === "whatsapp") return cfg?.whatsapp?.textChunkLimit; if (provider === "telegram") return cfg?.telegram?.textChunkLimit; if (provider === "discord") return cfg?.discord?.textChunkLimit; if (provider === "slack") return cfg?.slack?.textChunkLimit; if (provider === "signal") return cfg?.signal?.textChunkLimit; if (provider === "imessage") return cfg?.imessage?.textChunkLimit; return undefined; })(); if (typeof providerOverride === "number" && providerOverride > 0) { return providerOverride; } if (provider) return DEFAULT_CHUNK_LIMIT_BY_PROVIDER[provider]; return 4000; } export function chunkText(text: string, limit: number): string[] { if (!text) return []; if (limit <= 0) return [text]; if (text.length <= limit) return [text]; const chunks: string[] = []; let remaining = text; while (remaining.length > limit) { const window = remaining.slice(0, limit); // 1) Prefer a newline break inside the window. let breakIdx = window.lastIndexOf("\n"); // 2) Otherwise prefer the last whitespace (word boundary) inside the window. if (breakIdx <= 0) { for (let i = window.length - 1; i >= 0; i--) { if (/\s/.test(window[i])) { breakIdx = i; break; } } } // 3) Fallback: hard break exactly at the limit. if (breakIdx <= 0) breakIdx = limit; const rawChunk = remaining.slice(0, breakIdx); const chunk = rawChunk.trimEnd(); if (chunk.length > 0) { chunks.push(chunk); } // If we broke on whitespace/newline, skip that separator; for hard breaks keep it. const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]); const nextStart = Math.min( remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0), ); remaining = remaining.slice(nextStart).trimStart(); } if (remaining.length) chunks.push(remaining); return chunks; } export function chunkMarkdownText(text: string, limit: number): string[] { if (!text) return []; if (limit <= 0) return [text]; if (text.length <= limit) return [text]; const chunks: string[] = []; let remaining = text; while (remaining.length > limit) { const spans = parseFenceSpans(remaining); const window = remaining.slice(0, limit); const softBreak = pickSafeBreakIndex(window, spans); let breakIdx = softBreak > 0 ? softBreak : limit; const initialFence = isSafeFenceBreak(spans, breakIdx) ? undefined : findFenceSpanAt(spans, breakIdx); let fenceToSplit = initialFence; if (initialFence) { const closeLine = `${initialFence.indent}${initialFence.marker}`; const maxIdxIfNeedNewline = limit - (closeLine.length + 1); if (maxIdxIfNeedNewline <= 0) { fenceToSplit = undefined; breakIdx = limit; } else { const minProgressIdx = Math.min( remaining.length, initialFence.start + initialFence.openLine.length + 2, ); const maxIdxIfAlreadyNewline = limit - closeLine.length; let pickedNewline = false; let lastNewline = remaining.lastIndexOf( "\n", Math.max(0, maxIdxIfAlreadyNewline - 1), ); while (lastNewline !== -1) { const candidateBreak = lastNewline + 1; if (candidateBreak < minProgressIdx) break; const candidateFence = findFenceSpanAt(spans, candidateBreak); if (candidateFence && candidateFence.start === initialFence.start) { breakIdx = Math.max(1, candidateBreak); pickedNewline = true; break; } lastNewline = remaining.lastIndexOf("\n", lastNewline - 1); } if (!pickedNewline) { if (minProgressIdx > maxIdxIfAlreadyNewline) { fenceToSplit = undefined; breakIdx = limit; } else { breakIdx = Math.max(minProgressIdx, maxIdxIfNeedNewline); } } } const fenceAtBreak = findFenceSpanAt(spans, breakIdx); fenceToSplit = fenceAtBreak && fenceAtBreak.start === initialFence.start ? fenceAtBreak : undefined; } let rawChunk = remaining.slice(0, breakIdx); if (!rawChunk) break; const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]); const nextStart = Math.min( remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0), ); let next = remaining.slice(nextStart); if (fenceToSplit) { const closeLine = `${fenceToSplit.indent}${fenceToSplit.marker}`; rawChunk = rawChunk.endsWith("\n") ? `${rawChunk}${closeLine}` : `${rawChunk}\n${closeLine}`; next = `${fenceToSplit.openLine}\n${next}`; } else { next = stripLeadingNewlines(next); } chunks.push(rawChunk); remaining = next; } if (remaining.length) chunks.push(remaining); return chunks; } function stripLeadingNewlines(value: string): string { let i = 0; while (i < value.length && value[i] === "\n") i++; return i > 0 ? value.slice(i) : value; } function pickSafeBreakIndex( window: string, spans: ReturnType, ): number { let newlineIdx = window.lastIndexOf("\n"); while (newlineIdx > 0) { if (isSafeFenceBreak(spans, newlineIdx)) return newlineIdx; newlineIdx = window.lastIndexOf("\n", newlineIdx - 1); } for (let i = window.length - 1; i > 0; i--) { if (/\s/.test(window[i]) && isSafeFenceBreak(spans, i)) return i; } return -1; }