diff --git a/CHANGELOG.md b/CHANGELOG.md index ff77bb300..8d63ff67c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -71,6 +71,8 @@ - Auto-reply: preserve spacing when stripping inline directives. (#539) — thanks @joshp123 - Auto-reply: relax reply tag parsing to allow whitespace. (#560) — thanks @mcinteerj - Auto-reply: add per-provider block streaming toggles and coalesce streamed blocks to reduce line spam. (#536) — thanks @mcinteerj +- Auto-reply: default block streaming off for non-Telegram providers unless explicitly enabled, and avoid splitting on forced flushes below max. +- Auto-reply: raise default coalesce minChars for Signal/Slack/Discord and clarify streaming vs draft streaming in docs. - Auto-reply: default block streaming coalesce idle to 1s to reduce tiny chunks. — thanks @steipete - Auto-reply: fix /status usage summary filtering for the active provider. - Auto-reply: deduplicate followup queue entries using message id/routing to avoid duplicate replies. (#600) — thanks @samratjha96 diff --git a/docs/concepts/agent.md b/docs/concepts/agent.md index 33774740d..1323609fc 100644 --- a/docs/concepts/agent.md +++ b/docs/concepts/agent.md @@ -83,13 +83,14 @@ When queue mode is `followup` or `collect`, inbound messages are held until the current turn ends, then a new agent turn starts with the queued payloads. See [`docs/queue.md`](/concepts/queue) for mode + debounce/cap behavior. -Block streaming sends completed assistant blocks as soon as they finish; disable -via `agents.defaults.blockStreamingDefault: "off"` if you only want the final response. +Block streaming sends completed assistant blocks as soon as they finish; it is +**off by default** (`agents.defaults.blockStreamingDefault: "off"`). Tune the boundary via `agents.defaults.blockStreamingBreak` (`text_end` vs `message_end`; defaults to text_end). Control soft block chunking with `agents.defaults.blockStreamingChunk` (defaults to 800–1200 chars; prefers paragraph breaks, then newlines; sentences last). Coalesce streamed chunks with `agents.defaults.blockStreamingCoalesce` to reduce -single-line spam (idle-based merging before send). +single-line spam (idle-based merging before send). Non-Telegram providers require +explicit `*.blockStreaming: true` to enable block replies. Verbose tool summaries are emitted at tool start (no debounce); Control UI streams tool output via agent events when available. More details: [Streaming + chunking](/concepts/streaming). diff --git a/docs/concepts/messages.md b/docs/concepts/messages.md index 707edf13c..531e5b635 100644 --- a/docs/concepts/messages.md +++ b/docs/concepts/messages.md @@ -57,11 +57,11 @@ Block streaming sends partial replies as the model produces text blocks. Chunking respects provider text limits and avoids splitting fenced code. Key settings: -- `agents.defaults.blockStreamingDefault` (`on|off`) +- `agents.defaults.blockStreamingDefault` (`on|off`, default off) - `agents.defaults.blockStreamingBreak` (`text_end|message_end`) - `agents.defaults.blockStreamingChunk` (`minChars|maxChars|breakPreference`) - `agents.defaults.blockStreamingCoalesce` (idle-based batching) -- Provider overrides: `*.blockStreaming` and `*.blockStreamingCoalesce` +- Provider overrides: `*.blockStreaming` and `*.blockStreamingCoalesce` (non-Telegram providers require explicit `*.blockStreaming: true`) Details: [Streaming + chunking](/concepts/streaming). diff --git a/docs/concepts/streaming.md b/docs/concepts/streaming.md index ff6a385c2..1093140bd 100644 --- a/docs/concepts/streaming.md +++ b/docs/concepts/streaming.md @@ -32,7 +32,7 @@ Legend: - `provider send`: actual outbound messages (block replies). **Controls:** -- `agents.defaults.blockStreamingDefault`: `"on"`/`"off"` (default on). +- `agents.defaults.blockStreamingDefault`: `"on"`/`"off"` (default off). - Provider overrides: `*.blockStreaming` (and per-account variants) to force `"on"`/`"off"` per provider. - `agents.defaults.blockStreamingBreak`: `"text_end"` or `"message_end"`. - `agents.defaults.blockStreamingChunk`: `{ minChars, maxChars, breakPreference? }`. @@ -69,14 +69,19 @@ progressive output. - Joiner is derived from `blockStreamingChunk.breakPreference` (`paragraph` → `\n\n`, `newline` → `\n`, `sentence` → space). - Provider overrides are available via `*.blockStreamingCoalesce` (including per-account configs). +- Default coalesce `minChars` is bumped to 1500 for Signal/Slack/Discord unless overridden. ## “Stream chunks or everything” This maps to: -- **Stream chunks:** `blockStreamingDefault: "on"` + `blockStreamingBreak: "text_end"` (emit as you go). +- **Stream chunks:** `blockStreamingDefault: "on"` + `blockStreamingBreak: "text_end"` (emit as you go). Non-Telegram providers also need `*.blockStreaming: true`. - **Stream everything at end:** `blockStreamingBreak: "message_end"` (flush once, possibly multiple chunks if very long). - **No block streaming:** `blockStreamingDefault: "off"` (only final reply). +**Provider note:** For non-Telegram providers, block streaming is **off unless** +`*.blockStreaming` is explicitly set to `true`. Telegram can stream drafts +(`telegram.streamMode`) without block replies. + ## Telegram draft streaming (token-ish) Telegram is the only provider with draft streaming: @@ -85,6 +90,7 @@ Telegram is the only provider with draft streaming: - `partial`: draft updates with the latest stream text. - `block`: draft updates in chunked blocks (same chunker rules). - `off`: no draft streaming. +- Draft streaming is separate from block streaming; block replies are off by default and only enabled by `*.blockStreaming: true` on non-Telegram providers. - Final reply is still a normal message. - `/reasoning stream` writes reasoning into the draft bubble (Telegram only). diff --git a/docs/gateway/configuration-examples.md b/docs/gateway/configuration-examples.md index 5a874120e..a0020af69 100644 --- a/docs/gateway/configuration-examples.md +++ b/docs/gateway/configuration-examples.md @@ -208,7 +208,7 @@ Save to `~/.clawdbot/clawdbot.json` and you can DM the bot from that number. thinkingDefault: "low", verboseDefault: "off", elevatedDefault: "on", - blockStreamingDefault: "on", + blockStreamingDefault: "off", blockStreamingBreak: "text_end", blockStreamingChunk: { minChars: 800, diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 8121c50de..282d3adfd 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -675,7 +675,7 @@ Multi-account support lives under `telegram.accounts` (see the multi-account sec } }, replyToMode: "first", // off | first | all - streamMode: "partial", // off | partial | block (draft streaming) + streamMode: "partial", // off | partial | block (draft streaming; separate from block streaming) actions: { reactions: true, sendMessage: true }, // tool action gates (false disables) mediaMaxMb: 5, retry: { // outbound retry policy @@ -1143,8 +1143,9 @@ Example (adaptive tuned): See [/concepts/session-pruning](/concepts/session-pruning) for behavior details. Block streaming: -- `agents.defaults.blockStreamingDefault`: `"on"`/`"off"` (default on). +- `agents.defaults.blockStreamingDefault`: `"on"`/`"off"` (default off). - Provider overrides: `*.blockStreaming` (and per-account variants) to force block streaming on/off. + Non-Telegram providers require an explicit `*.blockStreaming: true` to enable block replies. - `agents.defaults.blockStreamingBreak`: `"text_end"` or `"message_end"` (default: text_end). - `agents.defaults.blockStreamingChunk`: soft chunking for streamed blocks. Defaults to 800–1200 chars, prefers paragraph breaks (`\n\n`), then newlines, then sentences. @@ -1156,7 +1157,8 @@ Block streaming: ``` - `agents.defaults.blockStreamingCoalesce`: merge streamed blocks before sending. Defaults to `{ idleMs: 1000 }` and inherits `minChars` from `blockStreamingChunk` - with `maxChars` capped to the provider text limit. + with `maxChars` capped to the provider text limit. Signal/Slack/Discord default + to `minChars: 1500` unless overridden. Provider overrides: `whatsapp.blockStreamingCoalesce`, `telegram.blockStreamingCoalesce`, `discord.blockStreamingCoalesce`, `slack.blockStreamingCoalesce`, `signal.blockStreamingCoalesce`, `imessage.blockStreamingCoalesce`, `msteams.blockStreamingCoalesce` (and per-account variants). diff --git a/docs/providers/grammy.md b/docs/providers/grammy.md index e01bcedbd..9fa3c30fb 100644 --- a/docs/providers/grammy.md +++ b/docs/providers/grammy.md @@ -18,7 +18,7 @@ read_when: - **Webhook support:** `webhook-set.ts` wraps `setWebhook/deleteWebhook`; `webhook.ts` hosts the callback with health + graceful shutdown. Gateway enables webhook mode when `telegram.webhookUrl` is set (otherwise it long-polls). - **Sessions:** direct chats collapse into the agent main session (`agent::`); groups use `agent::telegram:group:`; replies route back to the same provider. - **Config knobs:** `telegram.botToken`, `telegram.dmPolicy`, `telegram.groups` (allowlist + mention defaults), `telegram.allowFrom`, `telegram.groupAllowFrom`, `telegram.groupPolicy`, `telegram.mediaMaxMb`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`. -- **Draft streaming:** optional `telegram.streamMode` uses `sendMessageDraft` in private topic chats (Bot API 9.3+). +- **Draft streaming:** optional `telegram.streamMode` uses `sendMessageDraft` in private topic chats (Bot API 9.3+). This is separate from provider block streaming. - **Tests:** grammy mocks cover DM + group mention gating and outbound send; more media/webhook fixtures still welcome. Open questions diff --git a/docs/providers/telegram.md b/docs/providers/telegram.md index 6485b09dd..fce881f1a 100644 --- a/docs/providers/telegram.md +++ b/docs/providers/telegram.md @@ -206,6 +206,10 @@ Config: - `block`: update the draft bubble in larger blocks (chunked). - `off`: disable draft streaming. +Note: draft streaming is separate from **block streaming** (provider messages). +Block streaming is off by default and requires `telegram.blockStreaming: true` +if you want early Telegram messages instead of draft updates. + Reasoning stream (Telegram only): - `/reasoning stream` streams reasoning into the draft bubble while the reply is generating, then sends the final answer without reasoning. diff --git a/src/agents/pi-embedded-block-chunker.ts b/src/agents/pi-embedded-block-chunker.ts index f5473c182..53e933273 100644 --- a/src/agents/pi-embedded-block-chunker.ts +++ b/src/agents/pi-embedded-block-chunker.ts @@ -53,6 +53,14 @@ export class EmbeddedBlockChunker { const maxChars = Math.max(minChars, Math.floor(this.#chunking.maxChars)); if (this.#buffer.length < minChars && !force) return; + if (force && this.#buffer.length <= maxChars) { + if (this.#buffer.trim().length > 0) { + emit(this.#buffer); + } + this.#buffer = ""; + return; + } + while ( this.#buffer.length >= minChars || (force && this.#buffer.length > 0) diff --git a/src/agents/pi-embedded-subscribe.test.ts b/src/agents/pi-embedded-subscribe.test.ts index a49311f17..40949d799 100644 --- a/src/agents/pi-embedded-subscribe.test.ts +++ b/src/agents/pi-embedded-subscribe.test.ts @@ -784,7 +784,7 @@ describe("subscribeEmbeddedPiSession", () => { blockReplyBreak: "message_end", blockReplyChunking: { minChars: 5, - maxChars: 40, + maxChars: 25, breakPreference: "paragraph", }, }); @@ -836,7 +836,7 @@ describe("subscribeEmbeddedPiSession", () => { blockReplyBreak: "message_end", blockReplyChunking: { minChars: 5, - maxChars: 50, + maxChars: 25, breakPreference: "paragraph", }, }); @@ -939,7 +939,7 @@ describe("subscribeEmbeddedPiSession", () => { blockReplyBreak: "message_end", blockReplyChunking: { minChars: 5, - maxChars: 40, + maxChars: 25, breakPreference: "paragraph", }, }); @@ -986,7 +986,7 @@ describe("subscribeEmbeddedPiSession", () => { blockReplyBreak: "message_end", blockReplyChunking: { minChars: 5, - maxChars: 45, + maxChars: 30, breakPreference: "paragraph", }, }); @@ -1035,7 +1035,7 @@ describe("subscribeEmbeddedPiSession", () => { blockReplyBreak: "message_end", blockReplyChunking: { minChars: 10, - maxChars: 50, + maxChars: 30, breakPreference: "paragraph", }, }); diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index b5118f1db..9370dfc58 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -470,20 +470,26 @@ export async function getReplyFromConfig( (agentCfg?.elevatedDefault as ElevatedLevel | undefined) ?? "on") : "off"; + const providerKey = sessionCtx.Provider?.trim().toLowerCase(); + const explicitBlockStreamingEnable = opts?.disableBlockStreaming === false; const resolvedBlockStreaming = opts?.disableBlockStreaming === true ? "off" : opts?.disableBlockStreaming === false ? "on" - : agentCfg?.blockStreamingDefault === "off" - ? "off" - : "on"; + : agentCfg?.blockStreamingDefault === "on" + ? "on" + : "off"; const resolvedBlockStreamingBreak: "text_end" | "message_end" = agentCfg?.blockStreamingBreak === "message_end" ? "message_end" : "text_end"; + const allowBlockStreaming = + providerKey === "telegram" || explicitBlockStreamingEnable; const blockStreamingEnabled = - resolvedBlockStreaming === "on" && opts?.disableBlockStreaming !== true; + resolvedBlockStreaming === "on" && + opts?.disableBlockStreaming !== true && + allowBlockStreaming; const blockReplyChunking = blockStreamingEnabled ? resolveBlockStreamingChunking( cfg, diff --git a/src/auto-reply/reply/block-streaming.ts b/src/auto-reply/reply/block-streaming.ts index f5f305ee2..17f2cda43 100644 --- a/src/auto-reply/reply/block-streaming.ts +++ b/src/auto-reply/reply/block-streaming.ts @@ -5,6 +5,13 @@ import { resolveTextChunkLimit, type TextChunkProvider } from "../chunk.js"; const DEFAULT_BLOCK_STREAM_MIN = 800; const DEFAULT_BLOCK_STREAM_MAX = 1200; const DEFAULT_BLOCK_STREAM_COALESCE_IDLE_MS = 1000; +const PROVIDER_COALESCE_DEFAULTS: Partial< + Record +> = { + signal: { minChars: 1500, idleMs: 1000 }, + slack: { minChars: 1500, idleMs: 1000 }, + discord: { minChars: 1500, idleMs: 1000 }, +}; const BLOCK_CHUNK_PROVIDERS = new Set([ "whatsapp", @@ -77,6 +84,9 @@ export function resolveBlockStreamingCoalescing( const providerKey = normalizeChunkProvider(provider); const textLimit = resolveTextChunkLimit(cfg, providerKey, accountId); const normalizedAccountId = normalizeAccountId(accountId); + const providerDefaults = providerKey + ? PROVIDER_COALESCE_DEFAULTS[providerKey] + : undefined; const providerCfg = (() => { if (!cfg || !providerKey) return undefined; if (providerKey === "whatsapp") { @@ -125,7 +135,10 @@ export function resolveBlockStreamingCoalescing( const minRequested = Math.max( 1, Math.floor( - coalesceCfg?.minChars ?? chunking?.minChars ?? DEFAULT_BLOCK_STREAM_MIN, + coalesceCfg?.minChars ?? + providerDefaults?.minChars ?? + chunking?.minChars ?? + DEFAULT_BLOCK_STREAM_MIN, ), ); const maxRequested = Math.max( @@ -136,7 +149,11 @@ export function resolveBlockStreamingCoalescing( const minChars = Math.min(minRequested, maxChars); const idleMs = Math.max( 0, - Math.floor(coalesceCfg?.idleMs ?? DEFAULT_BLOCK_STREAM_COALESCE_IDLE_MS), + Math.floor( + coalesceCfg?.idleMs ?? + providerDefaults?.idleMs ?? + DEFAULT_BLOCK_STREAM_COALESCE_IDLE_MS, + ), ); const preference = chunking?.breakPreference ?? "paragraph"; const joiner = diff --git a/src/config/schema.ts b/src/config/schema.ts index 0c4b7f4bb..ec31f91c8 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -116,7 +116,7 @@ const FIELD_LABELS: Record = { "talk.apiKey": "Talk API Key", "telegram.botToken": "Telegram Bot Token", "telegram.dmPolicy": "Telegram DM Policy", - "telegram.streamMode": "Telegram Stream Mode", + "telegram.streamMode": "Telegram Draft Stream Mode", "telegram.retry.attempts": "Telegram Retry Attempts", "telegram.retry.minDelayMs": "Telegram Retry Min Delay (ms)", "telegram.retry.maxDelayMs": "Telegram Retry Max Delay (ms)", @@ -193,7 +193,7 @@ const FIELD_HELP: Record = { "telegram.dmPolicy": 'Direct message access control ("pairing" recommended). "open" requires telegram.allowFrom=["*"].', "telegram.streamMode": - "Draft streaming mode for Telegram replies (off | partial | block). Requires private topics + sendMessageDraft.", + "Draft streaming mode for Telegram replies (off | partial | block). Separate from block streaming; requires private topics + sendMessageDraft.", "telegram.retry.attempts": "Max retry attempts for outbound Telegram API calls (default: 3).", "telegram.retry.minDelayMs":