fix(auto-reply): coalesce block replies and document streaming toggles (#536) (thanks @mcinteerj)

2026-01-09 18:19:55 +00:00
parent a05916bee8
commit fd15704c77
18 changed files with 714 additions and 99 deletions
--- a/src/agents/pi-embedded-subscribe.ts
+++ b/src/agents/pi-embedded-subscribe.ts
@@ -1045,6 +1045,15 @@ export function subscribeEmbeddedPiSession(params: {
          stream: "lifecycle",
          data: { phase: "end" },
        });
+        if (params.onBlockReply) {
+          if (blockChunker?.hasBuffered()) {
+            blockChunker.drain({ force: true, emit: emitBlockChunk });
+            blockChunker.reset();
+          } else if (blockBuffer.length > 0) {
+            emitBlockChunk(blockBuffer);
+            blockBuffer = "";
+          }
+        }
        if (pendingCompactionRetry > 0) {
          resolveCompactionRetry();
        } else {
--- a/src/auto-reply/reply.block-streaming.test.ts
+++ b/src/auto-reply/reply.block-streaming.test.ts
@@ -149,7 +149,7 @@ describe("block streaming", () => {

      const res = await replyPromise;
      expect(res).toBeUndefined();
-      expect(seen).toEqual(["first", "second"]);
+      expect(seen).toEqual(["first\n\nsecond"]);
    });
  });

--- a/src/auto-reply/reply.ts
+++ b/src/auto-reply/reply.ts
@@ -454,7 +454,11 @@ export async function getReplyFromConfig(
  const blockStreamingEnabled =
    resolvedBlockStreaming === "on" && opts?.disableBlockStreaming !== true;
  const blockReplyChunking = blockStreamingEnabled
-    ? resolveBlockStreamingChunking(cfg, sessionCtx.Provider)
+    ? resolveBlockStreamingChunking(
+        cfg,
+        sessionCtx.Provider,
+        sessionCtx.AccountId,
+      )
    : undefined;

  const modelState = await createModelSelectionState({
--- a/src/auto-reply/reply/agent-runner.block-streaming.test.ts
+++ b/src/auto-reply/reply/agent-runner.block-streaming.test.ts
@@ -0,0 +1,132 @@
+import { describe, expect, it, vi } from "vitest";
+
+import type { TemplateContext } from "../templating.js";
+import type { FollowupRun, QueueSettings } from "./queue.js";
+import { createMockTypingController } from "./test-helpers.js";
+
+const runEmbeddedPiAgentMock = vi.fn();
+
+vi.mock("../../agents/model-fallback.js", () => ({
+  runWithModelFallback: async ({
+    provider,
+    model,
+    run,
+  }: {
+    provider: string;
+    model: string;
+    run: (provider: string, model: string) => Promise<unknown>;
+  }) => ({
+    result: await run(provider, model),
+    provider,
+    model,
+  }),
+}));
+
+vi.mock("../../agents/pi-embedded.js", () => ({
+  queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
+  runEmbeddedPiAgent: (params: unknown) => runEmbeddedPiAgentMock(params),
+}));
+
+vi.mock("./queue.js", async () => {
+  const actual =
+    await vi.importActual<typeof import("./queue.js")>("./queue.js");
+  return {
+    ...actual,
+    enqueueFollowupRun: vi.fn(),
+    scheduleFollowupDrain: vi.fn(),
+  };
+});
+
+import { runReplyAgent } from "./agent-runner.js";
+
+describe("runReplyAgent block streaming", () => {
+  it("coalesces duplicate text_end block replies", async () => {
+    const onBlockReply = vi.fn();
+    runEmbeddedPiAgentMock.mockImplementationOnce(async (params) => {
+      const block = params.onBlockReply as
+        | ((payload: { text?: string }) => void)
+        | undefined;
+      block?.({ text: "Hello" });
+      block?.({ text: "Hello" });
+      return {
+        payloads: [{ text: "Final message" }],
+        meta: {},
+      };
+    });
+
+    const typing = createMockTypingController();
+    const sessionCtx = {
+      Provider: "discord",
+      OriginatingTo: "channel:C1",
+      AccountId: "primary",
+      MessageSid: "msg",
+    } as unknown as TemplateContext;
+    const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings;
+    const followupRun = {
+      prompt: "hello",
+      summaryLine: "hello",
+      enqueuedAt: Date.now(),
+      run: {
+        sessionId: "session",
+        sessionKey: "main",
+        messageProvider: "discord",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: "/tmp",
+        config: {
+          agents: {
+            defaults: {
+              blockStreamingCoalesce: {
+                minChars: 1,
+                maxChars: 200,
+                idleMs: 0,
+              },
+            },
+          },
+        },
+        skillsSnapshot: {},
+        provider: "anthropic",
+        model: "claude",
+        thinkLevel: "low",
+        verboseLevel: "off",
+        elevatedLevel: "off",
+        bashElevated: {
+          enabled: false,
+          allowed: false,
+          defaultLevel: "off",
+        },
+        timeoutMs: 1_000,
+        blockReplyBreak: "text_end",
+      },
+    } as unknown as FollowupRun;
+
+    const result = await runReplyAgent({
+      commandBody: "hello",
+      followupRun,
+      queueKey: "main",
+      resolvedQueue,
+      shouldSteer: false,
+      shouldFollowup: false,
+      isActive: false,
+      isStreaming: false,
+      opts: { onBlockReply },
+      typing,
+      sessionCtx,
+      defaultModel: "anthropic/claude-opus-4-5",
+      resolvedVerboseLevel: "off",
+      isNewSession: false,
+      blockStreamingEnabled: true,
+      blockReplyChunking: {
+        minChars: 1,
+        maxChars: 200,
+        breakPreference: "paragraph",
+      },
+      resolvedBlockStreamingBreak: "text_end",
+      shouldInjectGroupIntro: false,
+      typingMode: "instant",
+    });
+
+    expect(onBlockReply).toHaveBeenCalledTimes(1);
+    expect(onBlockReply.mock.calls[0][0].text).toBe("Hello");
+    expect(result).toBeUndefined();
+  });
+});
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@@ -35,6 +35,8 @@ import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
 import { SILENT_REPLY_TOKEN } from "../tokens.js";
 import type { GetReplyOptions, ReplyPayload } from "../types.js";
 import { extractAudioTag } from "./audio-tags.js";
+import { createBlockReplyPipeline } from "./block-reply-pipeline.js";
+import { resolveBlockStreamingCoalescing } from "./block-streaming.js";
 import { createFollowupRunner } from "./followup-runner.js";
 import {
  enqueueFollowupRun,
@@ -132,23 +134,6 @@ const appendUsageLine = (
  return updated;
 };

-const withTimeout = async <T>(
-  promise: Promise<T>,
-  timeoutMs: number,
-  timeoutError: Error,
-): Promise<T> => {
-  if (!timeoutMs || timeoutMs <= 0) return promise;
-  let timer: NodeJS.Timeout | undefined;
-  const timeoutPromise = new Promise<never>((_, reject) => {
-    timer = setTimeout(() => reject(timeoutError), timeoutMs);
-  });
-  try {
-    return await Promise.race([promise, timeoutPromise]);
-  } finally {
-    if (timer) clearTimeout(timer);
-  }
-};
-
 export async function runReplyAgent(params: {
  commandBody: string;
  followupRun: FollowupRun;
@@ -228,29 +213,9 @@ export async function runReplyAgent(params: {
    return resolvedVerboseLevel === "on";
  };

-  const streamedPayloadKeys = new Set<string>();
-  const pendingStreamedPayloadKeys = new Set<string>();
-  const pendingBlockTasks = new Set<Promise<void>>();
  const pendingToolTasks = new Set<Promise<void>>();
-  let blockReplyChain: Promise<void> = Promise.resolve();
-  let blockReplyAborted = false;
-  let didLogBlockReplyAbort = false;
-  let didStreamBlockReply = false;
  const blockReplyTimeoutMs =
    opts?.blockReplyTimeoutMs ?? BLOCK_REPLY_SEND_TIMEOUT_MS;
-  const buildPayloadKey = (payload: ReplyPayload) => {
-    const text = payload.text?.trim() ?? "";
-    const mediaList = payload.mediaUrls?.length
-      ? payload.mediaUrls
-      : payload.mediaUrl
-        ? [payload.mediaUrl]
-        : [];
-    return JSON.stringify({
-      text,
-      mediaList,
-      replyToId: payload.replyToId ?? null,
-    });
-  };
  const replyToChannel =
    sessionCtx.OriginatingChannel ??
    ((sessionCtx.Surface ?? sessionCtx.Provider)?.toLowerCase() as
@@ -265,6 +230,23 @@ export async function runReplyAgent(params: {
    replyToChannel,
  );
  const cfg = followupRun.run.config;
+  const blockReplyCoalescing =
+    blockStreamingEnabled && opts?.onBlockReply
+      ? resolveBlockStreamingCoalescing(
+          cfg,
+          sessionCtx.Provider,
+          sessionCtx.AccountId,
+          blockReplyChunking,
+        )
+      : undefined;
+  const blockReplyPipeline =
+    blockStreamingEnabled && opts?.onBlockReply
+      ? createBlockReplyPipeline({
+          onBlockReply: opts.onBlockReply,
+          timeoutMs: blockReplyTimeoutMs,
+          coalescing: blockReplyCoalescing,
+        })
+      : null;

  if (shouldSteer && isStreaming) {
    const steered = queueEmbeddedPiMessage(
@@ -511,15 +493,6 @@ export async function runReplyAgent(params: {
                      text: cleaned,
                      audioAsVoice: audioTagResult.audioAsVoice,
                    });
-                    const payloadKey = buildPayloadKey(blockPayload);
-                    if (
-                      streamedPayloadKeys.has(payloadKey) ||
-                      pendingStreamedPayloadKeys.has(payloadKey)
-                    ) {
-                      return;
-                    }
-                    if (blockReplyAborted) return;
-                    pendingStreamedPayloadKeys.add(payloadKey);
                    void typingSignals
                      .signalTextDelta(taggedPayload.text)
                      .catch((err) => {
@@ -527,50 +500,7 @@ export async function runReplyAgent(params: {
                          `block reply typing signal failed: ${String(err)}`,
                        );
                      });
-                    const timeoutError = new Error(
-                      `block reply delivery timed out after ${blockReplyTimeoutMs}ms`,
-                    );
-                    const abortController = new AbortController();
-                    blockReplyChain = blockReplyChain
-                      .then(async () => {
-                        if (blockReplyAborted) return false;
-                        await withTimeout(
-                          opts.onBlockReply?.(blockPayload, {
-                            abortSignal: abortController.signal,
-                            timeoutMs: blockReplyTimeoutMs,
-                          }) ?? Promise.resolve(),
-                          blockReplyTimeoutMs,
-                          timeoutError,
-                        );
-                        return true;
-                      })
-                      .then((didSend) => {
-                        if (!didSend) return;
-                        streamedPayloadKeys.add(payloadKey);
-                        didStreamBlockReply = true;
-                      })
-                      .catch((err) => {
-                        if (err === timeoutError) {
-                          abortController.abort();
-                          blockReplyAborted = true;
-                          if (!didLogBlockReplyAbort) {
-                            didLogBlockReplyAbort = true;
-                            logVerbose(
-                              `block reply delivery timed out after ${blockReplyTimeoutMs}ms; skipping remaining block replies to preserve ordering`,
-                            );
-                          }
-                          return;
-                        }
-                        logVerbose(
-                          `block reply delivery failed: ${String(err)}`,
-                        );
-                      })
-                      .finally(() => {
-                        pendingStreamedPayloadKeys.delete(payloadKey);
-                      });
-                    const task = blockReplyChain;
-                    pendingBlockTasks.add(task);
-                    void task.finally(() => pendingBlockTasks.delete(task));
+                    blockReplyPipeline?.enqueue(blockPayload);
                  }
                : undefined,
            shouldEmitToolResult,
@@ -684,8 +614,9 @@ export async function runReplyAgent(params: {
    }

    const payloadArray = runResult.payloads ?? [];
-    if (pendingBlockTasks.size > 0) {
-      await Promise.allSettled(pendingBlockTasks);
+    if (blockReplyPipeline) {
+      await blockReplyPipeline.flush({ force: true });
+      blockReplyPipeline.stop();
    }
    if (pendingToolTasks.size > 0) {
      await Promise.allSettled(pendingToolTasks);
@@ -736,7 +667,9 @@ export async function runReplyAgent(params: {
    // Drop final payloads only when block streaming succeeded end-to-end.
    // If streaming aborted (e.g., timeout), fall back to final payloads.
    const shouldDropFinalPayloads =
-      blockStreamingEnabled && didStreamBlockReply && !blockReplyAborted;
+      blockStreamingEnabled &&
+      Boolean(blockReplyPipeline?.didStream()) &&
+      !blockReplyPipeline?.isAborted();
    const messagingToolSentTexts = runResult.messagingToolSentTexts ?? [];
    const messagingToolSentTargets = runResult.messagingToolSentTargets ?? [];
    const suppressMessagingToolReplies = shouldSuppressMessagingToolReplies({
@@ -753,7 +686,7 @@ export async function runReplyAgent(params: {
      ? []
      : blockStreamingEnabled
        ? dedupedPayloads.filter(
-            (payload) => !streamedPayloadKeys.has(buildPayloadKey(payload)),
+            (payload) => !blockReplyPipeline?.hasSentPayload(payload),
          )
        : dedupedPayloads;
    const replyPayloads = suppressMessagingToolReplies ? [] : filteredPayloads;
@@ -886,6 +819,7 @@ export async function runReplyAgent(params: {
      finalPayloads.length === 1 ? finalPayloads[0] : finalPayloads,
    );
  } finally {
+    blockReplyPipeline?.stop();
    typing.markRunComplete();
  }
 }
--- a/src/auto-reply/reply/block-reply-coalescer.test.ts
+++ b/src/auto-reply/reply/block-reply-coalescer.test.ts
@@ -0,0 +1,71 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { createBlockReplyCoalescer } from "./block-reply-coalescer.js";
+
+describe("block reply coalescer", () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("coalesces chunks within the idle window", async () => {
+    vi.useFakeTimers();
+    const flushes: string[] = [];
+    const coalescer = createBlockReplyCoalescer({
+      config: { minChars: 1, maxChars: 200, idleMs: 100, joiner: " " },
+      shouldAbort: () => false,
+      onFlush: (payload) => {
+        flushes.push(payload.text ?? "");
+      },
+    });
+
+    coalescer.enqueue({ text: "Hello" });
+    coalescer.enqueue({ text: "world" });
+
+    await vi.advanceTimersByTimeAsync(100);
+    expect(flushes).toEqual(["Hello world"]);
+    coalescer.stop();
+  });
+
+  it("waits until minChars before idle flush", async () => {
+    vi.useFakeTimers();
+    const flushes: string[] = [];
+    const coalescer = createBlockReplyCoalescer({
+      config: { minChars: 10, maxChars: 200, idleMs: 50, joiner: " " },
+      shouldAbort: () => false,
+      onFlush: (payload) => {
+        flushes.push(payload.text ?? "");
+      },
+    });
+
+    coalescer.enqueue({ text: "short" });
+    await vi.advanceTimersByTimeAsync(50);
+    expect(flushes).toEqual([]);
+
+    coalescer.enqueue({ text: "message" });
+    await vi.advanceTimersByTimeAsync(50);
+    expect(flushes).toEqual(["short message"]);
+    coalescer.stop();
+  });
+
+  it("flushes buffered text before media payloads", () => {
+    const flushes: Array<{ text?: string; mediaUrls?: string[] }> = [];
+    const coalescer = createBlockReplyCoalescer({
+      config: { minChars: 1, maxChars: 200, idleMs: 0, joiner: " " },
+      shouldAbort: () => false,
+      onFlush: (payload) => {
+        flushes.push({
+          text: payload.text,
+          mediaUrls: payload.mediaUrls,
+        });
+      },
+    });
+
+    coalescer.enqueue({ text: "Hello" });
+    coalescer.enqueue({ text: "world" });
+    coalescer.enqueue({ mediaUrls: ["https://example.com/a.png"] });
+    void coalescer.flush({ force: true });
+
+    expect(flushes[0].text).toBe("Hello world");
+    expect(flushes[1].mediaUrls).toEqual(["https://example.com/a.png"]);
+    coalescer.stop();
+  });
+});
--- a/src/auto-reply/reply/block-reply-coalescer.ts
+++ b/src/auto-reply/reply/block-reply-coalescer.ts
@@ -0,0 +1,125 @@
+import type { ReplyPayload } from "../types.js";
+import type { BlockStreamingCoalescing } from "./block-streaming.js";
+
+export type BlockReplyCoalescer = {
+  enqueue: (payload: ReplyPayload) => void;
+  flush: (options?: { force?: boolean }) => Promise<void>;
+  hasBuffered: () => boolean;
+  stop: () => void;
+};
+
+export function createBlockReplyCoalescer(params: {
+  config: BlockStreamingCoalescing;
+  shouldAbort: () => boolean;
+  onFlush: (payload: ReplyPayload) => Promise<void> | void;
+}): BlockReplyCoalescer {
+  const { config, shouldAbort, onFlush } = params;
+  const minChars = Math.max(1, Math.floor(config.minChars));
+  const maxChars = Math.max(minChars, Math.floor(config.maxChars));
+  const idleMs = Math.max(0, Math.floor(config.idleMs));
+  const joiner = config.joiner ?? "";
+
+  let bufferText = "";
+  let bufferReplyToId: ReplyPayload["replyToId"];
+  let bufferAudioAsVoice: ReplyPayload["audioAsVoice"];
+  let idleTimer: NodeJS.Timeout | undefined;
+
+  const clearIdleTimer = () => {
+    if (!idleTimer) return;
+    clearTimeout(idleTimer);
+    idleTimer = undefined;
+  };
+
+  const resetBuffer = () => {
+    bufferText = "";
+    bufferReplyToId = undefined;
+    bufferAudioAsVoice = undefined;
+  };
+
+  const scheduleIdleFlush = () => {
+    if (idleMs <= 0) return;
+    clearIdleTimer();
+    idleTimer = setTimeout(() => {
+      void flush({ force: false });
+    }, idleMs);
+  };
+
+  const flush = async (options?: { force?: boolean }) => {
+    clearIdleTimer();
+    if (shouldAbort()) {
+      resetBuffer();
+      return;
+    }
+    if (!bufferText) return;
+    if (!options?.force && bufferText.length < minChars) {
+      scheduleIdleFlush();
+      return;
+    }
+    const payload: ReplyPayload = {
+      text: bufferText,
+      replyToId: bufferReplyToId,
+      audioAsVoice: bufferAudioAsVoice,
+    };
+    resetBuffer();
+    await onFlush(payload);
+  };
+
+  const enqueue = (payload: ReplyPayload) => {
+    if (shouldAbort()) return;
+    const hasMedia =
+      Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0;
+    const text = payload.text ?? "";
+    const hasText = text.trim().length > 0;
+    if (hasMedia) {
+      void flush({ force: true });
+      void onFlush(payload);
+      return;
+    }
+    if (!hasText) return;
+
+    if (
+      bufferText &&
+      (bufferReplyToId !== payload.replyToId ||
+        bufferAudioAsVoice !== payload.audioAsVoice)
+    ) {
+      void flush({ force: true });
+    }
+
+    if (!bufferText) {
+      bufferReplyToId = payload.replyToId;
+      bufferAudioAsVoice = payload.audioAsVoice;
+    }
+
+    const nextText = bufferText ? `${bufferText}${joiner}${text}` : text;
+    if (nextText.length > maxChars) {
+      if (bufferText) {
+        void flush({ force: true });
+        bufferReplyToId = payload.replyToId;
+        bufferAudioAsVoice = payload.audioAsVoice;
+        if (text.length >= maxChars) {
+          void onFlush(payload);
+          return;
+        }
+        bufferText = text;
+        scheduleIdleFlush();
+        return;
+      }
+      void onFlush(payload);
+      return;
+    }
+
+    bufferText = nextText;
+    if (bufferText.length >= maxChars) {
+      void flush({ force: true });
+      return;
+    }
+    scheduleIdleFlush();
+  };
+
+  return {
+    enqueue,
+    flush,
+    hasBuffered: () => Boolean(bufferText),
+    stop: () => clearIdleTimer(),
+  };
+}
--- a/src/auto-reply/reply/block-reply-pipeline.ts
+++ b/src/auto-reply/reply/block-reply-pipeline.ts
@@ -0,0 +1,173 @@
+import { logVerbose } from "../../globals.js";
+import type { ReplyPayload } from "../types.js";
+import { createBlockReplyCoalescer } from "./block-reply-coalescer.js";
+import type { BlockStreamingCoalescing } from "./block-streaming.js";
+
+export type BlockReplyPipeline = {
+  enqueue: (payload: ReplyPayload) => void;
+  flush: (options?: { force?: boolean }) => Promise<void>;
+  stop: () => void;
+  hasBuffered: () => boolean;
+  didStream: () => boolean;
+  isAborted: () => boolean;
+  hasSentPayload: (payload: ReplyPayload) => boolean;
+};
+
+export function createBlockReplyPayloadKey(payload: ReplyPayload): string {
+  const text = payload.text?.trim() ?? "";
+  const mediaList = payload.mediaUrls?.length
+    ? payload.mediaUrls
+    : payload.mediaUrl
+      ? [payload.mediaUrl]
+      : [];
+  return JSON.stringify({
+    text,
+    mediaList,
+    replyToId: payload.replyToId ?? null,
+  });
+}
+
+const withTimeout = async <T>(
+  promise: Promise<T>,
+  timeoutMs: number,
+  timeoutError: Error,
+): Promise<T> => {
+  if (!timeoutMs || timeoutMs <= 0) return promise;
+  let timer: NodeJS.Timeout | undefined;
+  const timeoutPromise = new Promise<never>((_, reject) => {
+    timer = setTimeout(() => reject(timeoutError), timeoutMs);
+  });
+  try {
+    return await Promise.race([promise, timeoutPromise]);
+  } finally {
+    if (timer) clearTimeout(timer);
+  }
+};
+
+export function createBlockReplyPipeline(params: {
+  onBlockReply: (
+    payload: ReplyPayload,
+    options?: { abortSignal?: AbortSignal; timeoutMs?: number },
+  ) => Promise<void> | void;
+  timeoutMs: number;
+  coalescing?: BlockStreamingCoalescing;
+}): BlockReplyPipeline {
+  const { onBlockReply, timeoutMs, coalescing } = params;
+  const sentKeys = new Set<string>();
+  const pendingKeys = new Set<string>();
+  const seenKeys = new Set<string>();
+  const bufferedKeys = new Set<string>();
+  let sendChain: Promise<void> = Promise.resolve();
+  let aborted = false;
+  let didStream = false;
+  let didLogTimeout = false;
+
+  const sendPayload = (payload: ReplyPayload, skipSeen?: boolean) => {
+    if (aborted) return;
+    const payloadKey = createBlockReplyPayloadKey(payload);
+    if (!skipSeen) {
+      if (seenKeys.has(payloadKey)) return;
+      seenKeys.add(payloadKey);
+    }
+    if (sentKeys.has(payloadKey) || pendingKeys.has(payloadKey)) return;
+    pendingKeys.add(payloadKey);
+
+    const timeoutError = new Error(
+      `block reply delivery timed out after ${timeoutMs}ms`,
+    );
+    const abortController = new AbortController();
+    sendChain = sendChain
+      .then(async () => {
+        if (aborted) return false;
+        await withTimeout(
+          onBlockReply(payload, {
+            abortSignal: abortController.signal,
+            timeoutMs,
+          }) ?? Promise.resolve(),
+          timeoutMs,
+          timeoutError,
+        );
+        return true;
+      })
+      .then((didSend) => {
+        if (!didSend) return;
+        sentKeys.add(payloadKey);
+        didStream = true;
+      })
+      .catch((err) => {
+        if (err === timeoutError) {
+          abortController.abort();
+          aborted = true;
+          if (!didLogTimeout) {
+            didLogTimeout = true;
+            logVerbose(
+              `block reply delivery timed out after ${timeoutMs}ms; skipping remaining block replies to preserve ordering`,
+            );
+          }
+          return;
+        }
+        logVerbose(`block reply delivery failed: ${String(err)}`);
+      })
+      .finally(() => {
+        pendingKeys.delete(payloadKey);
+      });
+  };
+
+  const coalescer = coalescing
+    ? createBlockReplyCoalescer({
+        config: coalescing,
+        shouldAbort: () => aborted,
+        onFlush: (payload) => {
+          bufferedKeys.clear();
+          sendPayload(payload);
+        },
+      })
+    : null;
+
+  const enqueue = (payload: ReplyPayload) => {
+    if (aborted) return;
+    const hasMedia =
+      Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0;
+    if (hasMedia) {
+      void coalescer?.flush({ force: true });
+      sendPayload(payload);
+      return;
+    }
+    if (coalescer) {
+      const payloadKey = createBlockReplyPayloadKey(payload);
+      if (
+        seenKeys.has(payloadKey) ||
+        pendingKeys.has(payloadKey) ||
+        bufferedKeys.has(payloadKey)
+      ) {
+        return;
+      }
+      bufferedKeys.add(payloadKey);
+      coalescer.enqueue(payload);
+      return;
+    }
+    sendPayload(payload);
+  };
+
+  const flush = async (options?: { force?: boolean }) => {
+    await coalescer?.flush(options);
+    await sendChain;
+  };
+
+  const stop = () => {
+    coalescer?.stop();
+  };
+
+  return {
+    enqueue,
+    flush,
+    stop,
+    hasBuffered: () => Boolean(coalescer?.hasBuffered()),
+    didStream: () => didStream,
+    isAborted: () => aborted,
+    hasSentPayload: (payload) => {
+      const payloadKey = createBlockReplyPayloadKey(payload);
+      return sentKeys.has(payloadKey);
+    },
+  };
+}
--- a/src/auto-reply/reply/block-streaming.ts
+++ b/src/auto-reply/reply/block-streaming.ts
@@ -1,8 +1,10 @@
 import type { ClawdbotConfig } from "../../config/config.js";
+import { normalizeAccountId } from "../../routing/session-key.js";
 import { resolveTextChunkLimit, type TextChunkProvider } from "../chunk.js";

 const DEFAULT_BLOCK_STREAM_MIN = 800;
 const DEFAULT_BLOCK_STREAM_MAX = 1200;
+const DEFAULT_BLOCK_STREAM_COALESCE_IDLE_MS = 400;

 const BLOCK_CHUNK_PROVIDERS = new Set<TextChunkProvider>([
  "whatsapp",
@@ -12,6 +14,7 @@ const BLOCK_CHUNK_PROVIDERS = new Set<TextChunkProvider>([
  "signal",
  "imessage",
  "webchat",
+  "msteams",
 ]);

 function normalizeChunkProvider(
@@ -24,16 +27,24 @@ function normalizeChunkProvider(
    : undefined;
 }

+export type BlockStreamingCoalescing = {
+  minChars: number;
+  maxChars: number;
+  idleMs: number;
+  joiner: string;
+};
+
 export function resolveBlockStreamingChunking(
  cfg: ClawdbotConfig | undefined,
  provider?: string,
+  accountId?: string | null,
 ): {
  minChars: number;
  maxChars: number;
  breakPreference: "paragraph" | "newline" | "sentence";
 } {
  const providerKey = normalizeChunkProvider(provider);
-  const textLimit = resolveTextChunkLimit(cfg, providerKey);
+  const textLimit = resolveTextChunkLimit(cfg, providerKey, accountId);
  const chunkCfg = cfg?.agents?.defaults?.blockStreamingChunk;
  const maxRequested = Math.max(
    1,
@@ -52,3 +63,88 @@ export function resolveBlockStreamingChunking(
      : "paragraph";
  return { minChars, maxChars, breakPreference };
 }
+
+export function resolveBlockStreamingCoalescing(
+  cfg: ClawdbotConfig | undefined,
+  provider?: string,
+  accountId?: string | null,
+  chunking?: {
+    minChars: number;
+    maxChars: number;
+    breakPreference: "paragraph" | "newline" | "sentence";
+  },
+): BlockStreamingCoalescing {
+  const providerKey = normalizeChunkProvider(provider);
+  const textLimit = resolveTextChunkLimit(cfg, providerKey, accountId);
+  const normalizedAccountId = normalizeAccountId(accountId);
+  const providerCfg = (() => {
+    if (!cfg || !providerKey) return undefined;
+    if (providerKey === "whatsapp") {
+      return (
+        cfg.whatsapp?.accounts?.[normalizedAccountId]?.blockStreamingCoalesce ??
+        cfg.whatsapp?.blockStreamingCoalesce
+      );
+    }
+    if (providerKey === "telegram") {
+      return (
+        cfg.telegram?.accounts?.[normalizedAccountId]?.blockStreamingCoalesce ??
+        cfg.telegram?.blockStreamingCoalesce
+      );
+    }
+    if (providerKey === "discord") {
+      return (
+        cfg.discord?.accounts?.[normalizedAccountId]?.blockStreamingCoalesce ??
+        cfg.discord?.blockStreamingCoalesce
+      );
+    }
+    if (providerKey === "slack") {
+      return (
+        cfg.slack?.accounts?.[normalizedAccountId]?.blockStreamingCoalesce ??
+        cfg.slack?.blockStreamingCoalesce
+      );
+    }
+    if (providerKey === "signal") {
+      return (
+        cfg.signal?.accounts?.[normalizedAccountId]?.blockStreamingCoalesce ??
+        cfg.signal?.blockStreamingCoalesce
+      );
+    }
+    if (providerKey === "imessage") {
+      return (
+        cfg.imessage?.accounts?.[normalizedAccountId]?.blockStreamingCoalesce ??
+        cfg.imessage?.blockStreamingCoalesce
+      );
+    }
+    if (providerKey === "msteams") {
+      return cfg.msteams?.blockStreamingCoalesce;
+    }
+    return undefined;
+  })();
+  const coalesceCfg =
+    providerCfg ?? cfg?.agents?.defaults?.blockStreamingCoalesce;
+  const minRequested = Math.max(
+    1,
+    Math.floor(
+      coalesceCfg?.minChars ?? chunking?.minChars ?? DEFAULT_BLOCK_STREAM_MIN,
+    ),
+  );
+  const maxRequested = Math.max(
+    1,
+    Math.floor(coalesceCfg?.maxChars ?? textLimit),
+  );
+  const maxChars = Math.max(1, Math.min(maxRequested, textLimit));
+  const minChars = Math.min(minRequested, maxChars);
+  const idleMs = Math.max(
+    0,
+    Math.floor(coalesceCfg?.idleMs ?? DEFAULT_BLOCK_STREAM_COALESCE_IDLE_MS),
+  );
+  const preference = chunking?.breakPreference ?? "paragraph";
+  const joiner =
+    preference === "sentence" ? " " : preference === "newline" ? "\n" : "\n\n";
+  return {
+    minChars,
+    maxChars,
+    idleMs,
+    joiner,
+  };
+}
--- a/src/config/types.ts
+++ b/src/config/types.ts
@@ -16,6 +16,12 @@ export type OutboundRetryConfig = {
  jitter?: number;
 };

+export type BlockStreamingCoalesceConfig = {
+  minChars?: number;
+  maxChars?: number;
+  idleMs?: number;
+};
+
 export type SessionSendPolicyAction = "allow" | "deny";
 export type SessionSendPolicyMatch = {
  provider?: string;
@@ -127,6 +133,8 @@ export type WhatsAppConfig = {
  textChunkLimit?: number;
  /** Disable block streaming for this account. */
  blockStreaming?: boolean;
+  /** Merge streamed block replies before sending. */
+  blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
  /** Per-action tool gating (default: true for all). */
  actions?: WhatsAppActionConfig;
  groups?: Record<
@@ -153,6 +161,8 @@ export type WhatsAppAccountConfig = {
  groupPolicy?: GroupPolicy;
  textChunkLimit?: number;
  blockStreaming?: boolean;
+  /** Merge streamed block replies before sending. */
+  blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
  groups?: Record<
    string,
    {
@@ -306,6 +316,8 @@ export type TelegramAccountConfig = {
  textChunkLimit?: number;
  /** Disable block streaming for this account. */
  blockStreaming?: boolean;
+  /** Merge streamed block replies before sending. */
+  blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
  /** Draft streaming mode for Telegram (off|partial|block). Default: partial. */
  streamMode?: "off" | "partial" | "block";
  mediaMaxMb?: number;
@@ -429,6 +441,8 @@ export type DiscordAccountConfig = {
  textChunkLimit?: number;
  /** Disable block streaming for this account. */
  blockStreaming?: boolean;
+  /** Merge streamed block replies before sending. */
+  blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
  /**
   * Soft max line count per Discord message.
   * Discord clients can clip/collapse very tall messages; splitting by lines
@@ -525,6 +539,8 @@ export type SlackAccountConfig = {
  groupPolicy?: GroupPolicy;
  textChunkLimit?: number;
  blockStreaming?: boolean;
+  /** Merge streamed block replies before sending. */
+  blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
  mediaMaxMb?: number;
  /** Reaction notification mode (off|own|all|allowlist). Default: own. */
  reactionNotifications?: SlackReactionNotificationMode;
@@ -579,6 +595,8 @@ export type SignalAccountConfig = {
  /** Outbound text chunk size (chars). Default: 4000. */
  textChunkLimit?: number;
  blockStreaming?: boolean;
+  /** Merge streamed block replies before sending. */
+  blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
  mediaMaxMb?: number;
 };

@@ -632,6 +650,8 @@ export type MSTeamsConfig = {
  allowFrom?: Array<string>;
  /** Outbound text chunk size (chars). Default: 4000. */
  textChunkLimit?: number;
+  /** Merge streamed block replies before sending. */
+  blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
  /**
   * Allowed host suffixes for inbound attachment downloads.
   * Use ["*"] to allow any host (not recommended).
@@ -678,6 +698,8 @@ export type IMessageAccountConfig = {
  /** Outbound text chunk size (chars). Default: 4000. */
  textChunkLimit?: number;
  blockStreaming?: boolean;
+  /** Merge streamed block replies before sending. */
+  blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
  groups?: Record<
    string,
    {
@@ -1201,6 +1223,11 @@ export type AgentDefaultsConfig = {
    maxChars?: number;
    breakPreference?: "paragraph" | "newline" | "sentence";
  };
+  /**
+   * Block reply coalescing (merge streamed chunks before send).
+   * idleMs: wait time before flushing when idle.
+   */
+  blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
  timeoutSeconds?: number;
  /** Max inbound media size in MB for agent-visible attachments (text note or future image attach). */
  mediaMaxMb?: number;
--- a/src/config/zod-schema.ts
+++ b/src/config/zod-schema.ts
@@ -97,6 +97,12 @@ const GroupPolicySchema = z.enum(["open", "disabled", "allowlist"]);

 const DmPolicySchema = z.enum(["pairing", "allowlist", "open", "disabled"]);

+const BlockStreamingCoalesceSchema = z.object({
+  minChars: z.number().int().positive().optional(),
+  maxChars: z.number().int().positive().optional(),
+  idleMs: z.number().int().nonnegative().optional(),
+});
+
 const normalizeAllowFrom = (values?: Array<string | number>): string[] =>
  (values ?? []).map((v) => String(v).trim()).filter(Boolean);

@@ -192,6 +198,7 @@ const TelegramAccountSchemaBase = z.object({
  groupPolicy: GroupPolicySchema.optional().default("open"),
  textChunkLimit: z.number().int().positive().optional(),
  blockStreaming: z.boolean().optional(),
+  blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
  streamMode: z.enum(["off", "partial", "block"]).optional().default("partial"),
  mediaMaxMb: z.number().positive().optional(),
  retry: RetryConfigSchema,
@@ -277,6 +284,7 @@ const DiscordAccountSchema = z.object({
  groupPolicy: GroupPolicySchema.optional().default("open"),
  textChunkLimit: z.number().int().positive().optional(),
  blockStreaming: z.boolean().optional(),
+  blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
  maxLinesPerMessage: z.number().int().positive().optional(),
  mediaMaxMb: z.number().positive().optional(),
  historyLimit: z.number().int().min(0).optional(),
@@ -347,6 +355,7 @@ const SlackAccountSchema = z.object({
  groupPolicy: GroupPolicySchema.optional().default("open"),
  textChunkLimit: z.number().int().positive().optional(),
  blockStreaming: z.boolean().optional(),
+  blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
  mediaMaxMb: z.number().positive().optional(),
  reactionNotifications: z.enum(["off", "own", "all", "allowlist"]).optional(),
  reactionAllowlist: z.array(z.union([z.string(), z.number()])).optional(),
@@ -398,6 +407,7 @@ const SignalAccountSchemaBase = z.object({
  groupPolicy: GroupPolicySchema.optional().default("open"),
  textChunkLimit: z.number().int().positive().optional(),
  blockStreaming: z.boolean().optional(),
+  blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
  mediaMaxMb: z.number().int().positive().optional(),
 });

@@ -443,6 +453,7 @@ const IMessageAccountSchemaBase = z.object({
  mediaMaxMb: z.number().int().positive().optional(),
  textChunkLimit: z.number().int().positive().optional(),
  blockStreaming: z.boolean().optional(),
+  blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
  groups: z
    .record(
      z.string(),
@@ -507,6 +518,7 @@ const MSTeamsConfigSchema = z
    dmPolicy: DmPolicySchema.optional().default("pairing"),
    allowFrom: z.array(z.string()).optional(),
    textChunkLimit: z.number().int().positive().optional(),
+    blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
    mediaAllowHosts: z.array(z.string()).optional(),
    requireMention: z.boolean().optional(),
    replyStyle: MSTeamsReplyStyleSchema.optional(),
@@ -994,6 +1006,7 @@ const AgentDefaultsSchema = z
          .optional(),
      })
      .optional(),
+    blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
    timeoutSeconds: z.number().int().positive().optional(),
    mediaMaxMb: z.number().positive().optional(),
    typingIntervalSeconds: z.number().int().positive().optional(),
@@ -1215,6 +1228,7 @@ export const ClawdbotSchema = z.object({
              groupPolicy: GroupPolicySchema.optional().default("open"),
              textChunkLimit: z.number().int().positive().optional(),
              blockStreaming: z.boolean().optional(),
+              blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
              groups: z
                .record(
                  z.string(),
@@ -1249,6 +1263,7 @@ export const ClawdbotSchema = z.object({
      groupPolicy: GroupPolicySchema.optional().default("open"),
      textChunkLimit: z.number().int().positive().optional(),
      blockStreaming: z.boolean().optional(),
+      blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
      actions: z
        .object({
          reactions: z.boolean().optional(),
--- a/src/telegram/bot.ts
+++ b/src/telegram/bot.ts
@@ -673,7 +673,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
      : undefined;
    const draftChunking =
      draftStream && streamMode === "block"
-        ? resolveBlockStreamingChunking(cfg, "telegram")
+        ? resolveBlockStreamingChunking(cfg, "telegram", route.accountId)
        : undefined;
    const draftChunker = draftChunking
      ? new EmbeddedBlockChunker(draftChunking)