From 875b018ea1829569ab6623c776c64a2d496826c1 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sun, 25 Jan 2026 11:54:20 +0000
Subject: [PATCH] fix: stop sending tool summaries to channels

---
 src/agents/pi-embedded-runner/run.ts          |  2 +-
 .../reply/dispatch-from-config.test.ts        | 32 +++++++++++++++++++
 src/auto-reply/reply/dispatch-from-config.ts  | 22 ++-----------
 src/auto-reply/reply/route-reply.test.ts      | 17 ++++++++++
 src/auto-reply/reply/route-reply.ts           | 19 ++++++-----
 src/discord/monitor.slash.test.ts             | 11 +++----
 ...essages-without-mention-by-default.test.ts | 12 +++----
 ...ends-tool-summaries-responseprefix.test.ts | 12 +++----
 ...ends-tool-summaries-responseprefix.test.ts | 22 ++++---------
 ...topic-skill-filters-system-prompts.test.ts |  7 ++--
 ...ies-without-native-reply-threading.test.ts | 12 +++----
 src/telegram/bot.test.ts                      | 19 ++++-------
 ...mmaries-immediately-responseprefix.test.ts | 14 ++------
 13 files changed, 100 insertions(+), 101 deletions(-)
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index ea2488a1c..adbe6ad49 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -599,7 +599,7 @@ export async function runEmbeddedPiAgent(
             verboseLevel: params.verboseLevel,
             reasoningLevel: params.reasoningLevel,
             toolResultFormat: resolvedToolResultFormat,
-            inlineToolResultsAllowed: !params.onPartialReply && !params.onToolResult,
+            inlineToolResultsAllowed: false,
           });
 
           log.debug(
diff --git a/src/auto-reply/reply/dispatch-from-config.test.ts b/src/auto-reply/reply/dispatch-from-config.test.ts
index 4e1982674..0504694bf 100644
--- a/src/auto-reply/reply/dispatch-from-config.test.ts
+++ b/src/auto-reply/reply/dispatch-from-config.test.ts
@@ -138,6 +138,38 @@ describe("dispatchReplyFromConfig", () => {
     );
   });
 
+  it("does not provide onToolResult when routing cross-provider", async () => {
+    mocks.tryFastAbortFromMessage.mockResolvedValue({
+      handled: false,
+      aborted: false,
+    });
+    mocks.routeReply.mockClear();
+    const cfg = {} as ClawdbotConfig;
+    const dispatcher = createDispatcher();
+    const ctx = buildTestCtx({
+      Provider: "slack",
+      OriginatingChannel: "telegram",
+      OriginatingTo: "telegram:999",
+    });
+
+    const replyResolver = async (
+      _ctx: MsgContext,
+      opts: GetReplyOptions | undefined,
+      _cfg: ClawdbotConfig,
+    ) => {
+      expect(opts?.onToolResult).toBeUndefined();
+      return { text: "hi" } satisfies ReplyPayload;
+    };
+
+    await dispatchReplyFromConfig({ ctx, cfg, dispatcher, replyResolver });
+
+    expect(mocks.routeReply).toHaveBeenCalledWith(
+      expect.objectContaining({
+        payload: expect.objectContaining({ text: "hi" }),
+      }),
+    );
+  });
+
   it("fast-aborts without calling the reply resolver", async () => {
     mocks.tryFastAbortFromMessage.mockResolvedValue({
       handled: true,
diff --git a/src/auto-reply/reply/dispatch-from-config.ts b/src/auto-reply/reply/dispatch-from-config.ts
index 16c83bf30..f946c05f9 100644
--- a/src/auto-reply/reply/dispatch-from-config.ts
+++ b/src/auto-reply/reply/dispatch-from-config.ts
@@ -206,6 +206,7 @@ export async function dispatchReplyFromConfig(params: {
   const sendPayloadAsync = async (
     payload: ReplyPayload,
     abortSignal?: AbortSignal,
+    mirror?: boolean,
   ): Promise<void> => {
     // TypeScript doesn't narrow these from the shouldRouteToOriginating check,
     // but they're guaranteed non-null when this function is called.
@@ -220,6 +221,7 @@ export async function dispatchReplyFromConfig(params: {
       threadId: ctx.MessageThreadId,
       cfg,
       abortSignal,
+      mirror,
     });
     if (!result.ok) {
       logVerbose(`dispatch-from-config: route-reply failed: ${result.error ?? "unknown error"}`);
@@ -268,24 +270,6 @@ export async function dispatchReplyFromConfig(params: {
       ctx,
       {
         ...params.replyOptions,
-        onToolResult: (payload: ReplyPayload) => {
-          const run = async () => {
-            const ttsPayload = await maybeApplyTtsToPayload({
-              payload,
-              cfg,
-              channel: ttsChannel,
-              kind: "tool",
-              inboundAudio,
-              ttsAuto: sessionTtsAuto,
-            });
-            if (shouldRouteToOriginating) {
-              await sendPayloadAsync(ttsPayload);
-            } else {
-              dispatcher.sendToolResult(ttsPayload);
-            }
-          };
-          return run();
-        },
         onBlockReply: (payload: ReplyPayload, context) => {
           const run = async () => {
             const ttsPayload = await maybeApplyTtsToPayload({
@@ -297,7 +281,7 @@ export async function dispatchReplyFromConfig(params: {
               ttsAuto: sessionTtsAuto,
             });
             if (shouldRouteToOriginating) {
-              await sendPayloadAsync(ttsPayload, context?.abortSignal);
+              await sendPayloadAsync(ttsPayload, context?.abortSignal, false);
             } else {
               dispatcher.sendBlockReply(ttsPayload);
             }
diff --git a/src/auto-reply/reply/route-reply.test.ts b/src/auto-reply/reply/route-reply.test.ts
index f587ec481..0e6e0a721 100644
--- a/src/auto-reply/reply/route-reply.test.ts
+++ b/src/auto-reply/reply/route-reply.test.ts
@@ -379,6 +379,23 @@ describe("routeReply", () => {
       }),
     );
   });
+
+  it("skips mirror data when mirror is false", async () => {
+    mocks.deliverOutboundPayloads.mockResolvedValue([]);
+    await routeReply({
+      payload: { text: "hi" },
+      channel: "slack",
+      to: "channel:C123",
+      sessionKey: "agent:main:main",
+      mirror: false,
+      cfg: {} as never,
+    });
+    expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith(
+      expect.objectContaining({
+        mirror: undefined,
+      }),
+    );
+  });
 });
 
 const emptyRegistry = createRegistry([]);
diff --git a/src/auto-reply/reply/route-reply.ts b/src/auto-reply/reply/route-reply.ts
index bbc7efa7d..7db417794 100644
--- a/src/auto-reply/reply/route-reply.ts
+++ b/src/auto-reply/reply/route-reply.ts
@@ -33,6 +33,8 @@ export type RouteReplyParams = {
   cfg: ClawdbotConfig;
   /** Optional abort signal for cooperative cancellation. */
   abortSignal?: AbortSignal;
+  /** Mirror reply into session transcript (default: true when sessionKey is set). */
+  mirror?: boolean;
 };
 
 export type RouteReplyResult = {
@@ -118,14 +120,15 @@ export async function routeReply(params: RouteReplyParams): Promise<RouteReplyRe
       replyToId: resolvedReplyToId ?? null,
       threadId: resolvedThreadId,
       abortSignal,
-      mirror: params.sessionKey
-        ? {
-            sessionKey: params.sessionKey,
-            agentId: resolveSessionAgentId({ sessionKey: params.sessionKey, config: cfg }),
-            text,
-            mediaUrls,
-          }
-        : undefined,
+      mirror:
+        params.mirror !== false && params.sessionKey
+          ? {
+              sessionKey: params.sessionKey,
+              agentId: resolveSessionAgentId({ sessionKey: params.sessionKey, config: cfg }),
+              text,
+              mediaUrls,
+            }
+          : undefined,
     });
 
     const last = results.at(-1);
diff --git a/src/discord/monitor.slash.test.ts b/src/discord/monitor.slash.test.ts
index af098eb96..a6c43087d 100644
--- a/src/discord/monitor.slash.test.ts
+++ b/src/discord/monitor.slash.test.ts
@@ -34,15 +34,13 @@ vi.mock("../auto-reply/dispatch.js", async (importOriginal) => {
 beforeEach(() => {
   dispatchMock.mockReset().mockImplementation(async (params) => {
     if ("dispatcher" in params && params.dispatcher) {
-      params.dispatcher.sendToolResult({ text: "tool update" });
       params.dispatcher.sendFinalReply({ text: "final reply" });
-      return { queuedFinal: true, counts: { tool: 1, block: 0, final: 1 } };
+      return { queuedFinal: true, counts: { tool: 0, block: 0, final: 1 } };
     }
     if ("dispatcherOptions" in params && params.dispatcherOptions) {
       const { dispatcher, markDispatchIdle } = createReplyDispatcherWithTyping(
         params.dispatcherOptions,
       );
-      dispatcher.sendToolResult({ text: "tool update" });
       dispatcher.sendFinalReply({ text: "final reply" });
       await dispatcher.waitForIdle();
       markDispatchIdle();
@@ -53,7 +51,7 @@ beforeEach(() => {
 });
 
 describe("discord native commands", () => {
-  it("streams tool results for native slash commands", { timeout: 60_000 }, async () => {
+  it("skips tool results for native slash commands", { timeout: 60_000 }, async () => {
     const { ChannelType } = await import("@buape/carbon");
     const { createDiscordNativeCommand } = await import("./monitor.js");
 
@@ -97,8 +95,7 @@ describe("discord native commands", () => {
 
     expect(dispatchMock).toHaveBeenCalledTimes(1);
     expect(reply).toHaveBeenCalledTimes(1);
-    expect(followUp).toHaveBeenCalledTimes(1);
-    expect(reply.mock.calls[0]?.[0]?.content).toContain("tool");
-    expect(followUp.mock.calls[0]?.[0]?.content).toContain("final");
+    expect(followUp).toHaveBeenCalledTimes(0);
+    expect(reply.mock.calls[0]?.[0]?.content).toContain("final");
   });
 });
diff --git a/src/imessage/monitor.skips-group-messages-without-mention-by-default.test.ts b/src/imessage/monitor.skips-group-messages-without-mention-by-default.test.ts
index a0665044e..1c7330ab5 100644
--- a/src/imessage/monitor.skips-group-messages-without-mention-by-default.test.ts
+++ b/src/imessage/monitor.skips-group-messages-without-mention-by-default.test.ts
@@ -277,15 +277,12 @@ describe("monitorIMessageProvider", () => {
     expect(ctx.SessionKey).toBe("agent:main:imessage:group:2");
   });
 
-  it("prefixes tool and final replies with responsePrefix", async () => {
+  it("prefixes final replies with responsePrefix", async () => {
     config = {
       ...config,
       messages: { responsePrefix: "PFX" },
     };
-    replyMock.mockImplementation(async (_ctx, opts) => {
-      await opts?.onToolResult?.({ text: "tool update" });
-      return { text: "final reply" };
-    });
+    replyMock.mockResolvedValue({ text: "final reply" });
     const run = monitorIMessageProvider();
     await waitForSubscribe();
 
@@ -307,9 +304,8 @@ describe("monitorIMessageProvider", () => {
     closeResolve?.();
     await run;
 
-    expect(sendMock).toHaveBeenCalledTimes(2);
-    expect(sendMock.mock.calls[0][1]).toBe("PFX tool update");
-    expect(sendMock.mock.calls[1][1]).toBe("PFX final reply");
+    expect(sendMock).toHaveBeenCalledTimes(1);
+    expect(sendMock.mock.calls[0][1]).toBe("PFX final reply");
   });
 
   it("defaults to dmPolicy=pairing behavior when allowFrom is empty", async () => {
diff --git a/src/signal/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts b/src/signal/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts
index c3709890e..2d4e9c540 100644
--- a/src/signal/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts
+++ b/src/signal/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts
@@ -210,12 +210,9 @@ describe("monitorSignalProvider tool results", () => {
     );
   });
 
-  it("sends tool summaries with responsePrefix", async () => {
+  it("skips tool summaries with responsePrefix", async () => {
     const abortController = new AbortController();
-    replyMock.mockImplementation(async (_ctx, opts) => {
-      await opts?.onToolResult?.({ text: "tool update" });
-      return { text: "final reply" };
-    });
+    replyMock.mockResolvedValue({ text: "final reply" });
 
     streamMock.mockImplementation(async ({ onEvent }) => {
       const payload = {
@@ -243,9 +240,8 @@ describe("monitorSignalProvider tool results", () => {
 
     await flush();
 
-    expect(sendMock).toHaveBeenCalledTimes(2);
-    expect(sendMock.mock.calls[0][1]).toBe("PFX tool update");
-    expect(sendMock.mock.calls[1][1]).toBe("PFX final reply");
+    expect(sendMock).toHaveBeenCalledTimes(1);
+    expect(sendMock.mock.calls[0][1]).toBe("PFX final reply");
   });
 
   it("replies with pairing code when dmPolicy is pairing and no allowFrom is set", async () => {
diff --git a/src/slack/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts b/src/slack/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts
index bcd797793..ce7015399 100644
--- a/src/slack/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts
+++ b/src/slack/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts
@@ -23,11 +23,8 @@ beforeEach(() => {
 });
 
 describe("monitorSlackProvider tool results", () => {
-  it("sends tool summaries with responsePrefix", async () => {
-    replyMock.mockImplementation(async (_ctx, opts) => {
-      await opts?.onToolResult?.({ text: "tool update" });
-      return { text: "final reply" };
-    });
+  it("skips tool summaries with responsePrefix", async () => {
+    replyMock.mockResolvedValue({ text: "final reply" });
 
     const controller = new AbortController();
     const run = monitorSlackProvider({
@@ -55,9 +52,8 @@ describe("monitorSlackProvider tool results", () => {
     controller.abort();
     await run;
 
-    expect(sendMock).toHaveBeenCalledTimes(2);
-    expect(sendMock.mock.calls[0][1]).toBe("PFX tool update");
-    expect(sendMock.mock.calls[1][1]).toBe("PFX final reply");
+    expect(sendMock).toHaveBeenCalledTimes(1);
+    expect(sendMock.mock.calls[0][1]).toBe("PFX final reply");
   });
 
   it("drops events with mismatched api_app_id", async () => {
@@ -130,10 +126,7 @@ describe("monitorSlackProvider tool results", () => {
       },
     };
 
-    replyMock.mockImplementation(async (_ctx, opts) => {
-      await opts?.onToolResult?.({ text: "tool update" });
-      return { text: "final reply" };
-    });
+    replyMock.mockResolvedValue({ text: "final reply" });
 
     const controller = new AbortController();
     const run = monitorSlackProvider({
@@ -161,9 +154,8 @@ describe("monitorSlackProvider tool results", () => {
     controller.abort();
     await run;
 
-    expect(sendMock).toHaveBeenCalledTimes(2);
-    expect(sendMock.mock.calls[0][1]).toBe("tool update");
-    expect(sendMock.mock.calls[1][1]).toBe("final reply");
+    expect(sendMock).toHaveBeenCalledTimes(1);
+    expect(sendMock.mock.calls[0][1]).toBe("final reply");
   });
 
   it("preserves RawBody without injecting processed room history", async () => {
diff --git a/src/telegram/bot.create-telegram-bot.applies-topic-skill-filters-system-prompts.test.ts b/src/telegram/bot.create-telegram-bot.applies-topic-skill-filters-system-prompts.test.ts
index 0cda853be..1a7a9d40c 100644
--- a/src/telegram/bot.create-telegram-bot.applies-topic-skill-filters-system-prompts.test.ts
+++ b/src/telegram/bot.create-telegram-bot.applies-topic-skill-filters-system-prompts.test.ts
@@ -300,7 +300,7 @@ describe("createTelegramBot", () => {
       expect.objectContaining({ message_thread_id: 99 }),
     );
   });
-  it("streams tool summaries for native slash commands", async () => {
+  it("skips tool summaries for native slash commands", async () => {
     onSpy.mockReset();
     sendMessageSpy.mockReset();
     commandSpy.mockReset();
@@ -338,9 +338,8 @@ describe("createTelegramBot", () => {
       match: "on",
     });
 
-    expect(sendMessageSpy).toHaveBeenCalledTimes(2);
-    expect(sendMessageSpy.mock.calls[0]?.[1]).toContain("tool update");
-    expect(sendMessageSpy.mock.calls[1]?.[1]).toContain("final reply");
+    expect(sendMessageSpy).toHaveBeenCalledTimes(1);
+    expect(sendMessageSpy.mock.calls[0]?.[1]).toContain("final reply");
   });
   it("dedupes duplicate message updates by update_id", async () => {
     onSpy.mockReset();
diff --git a/src/telegram/bot.create-telegram-bot.sends-replies-without-native-reply-threading.test.ts b/src/telegram/bot.create-telegram-bot.sends-replies-without-native-reply-threading.test.ts
index 80c880b79..dffe8ee88 100644
--- a/src/telegram/bot.create-telegram-bot.sends-replies-without-native-reply-threading.test.ts
+++ b/src/telegram/bot.create-telegram-bot.sends-replies-without-native-reply-threading.test.ts
@@ -217,15 +217,12 @@ describe("createTelegramBot", () => {
       expect(call[2]?.reply_to_message_id).toBeUndefined();
     }
   });
-  it("prefixes tool and final replies with responsePrefix", async () => {
+  it("prefixes final replies with responsePrefix", async () => {
     onSpy.mockReset();
     sendMessageSpy.mockReset();
     const replySpy = replyModule.__replySpy as unknown as ReturnType<typeof vi.fn>;
     replySpy.mockReset();
-    replySpy.mockImplementation(async (_ctx, opts) => {
-      await opts?.onToolResult?.({ text: "tool result" });
-      return { text: "final reply" };
-    });
+    replySpy.mockResolvedValue({ text: "final reply" });
     loadConfig.mockReturnValue({
       channels: {
         telegram: { dmPolicy: "open", allowFrom: ["*"] },
@@ -245,9 +242,8 @@ describe("createTelegramBot", () => {
       getFile: async () => ({ download: async () => new Uint8Array() }),
     });
 
-    expect(sendMessageSpy).toHaveBeenCalledTimes(2);
-    expect(sendMessageSpy.mock.calls[0][1]).toBe("PFX tool result");
-    expect(sendMessageSpy.mock.calls[1][1]).toBe("PFX final reply");
+    expect(sendMessageSpy).toHaveBeenCalledTimes(1);
+    expect(sendMessageSpy.mock.calls[0][1]).toBe("PFX final reply");
   });
   it("honors replyToMode=all for threaded replies", async () => {
     onSpy.mockReset();
diff --git a/src/telegram/bot.test.ts b/src/telegram/bot.test.ts
index 486741f53..8dc52ab57 100644
--- a/src/telegram/bot.test.ts
+++ b/src/telegram/bot.test.ts
@@ -865,15 +865,12 @@ describe("createTelegramBot", () => {
     }
   });
 
-  it("prefixes tool and final replies with responsePrefix", async () => {
+  it("prefixes final replies with responsePrefix", async () => {
     onSpy.mockReset();
     sendMessageSpy.mockReset();
     const replySpy = replyModule.__replySpy as unknown as ReturnType<typeof vi.fn>;
     replySpy.mockReset();
-    replySpy.mockImplementation(async (_ctx, opts) => {
-      await opts?.onToolResult?.({ text: "tool result" });
-      return { text: "final reply" };
-    });
+    replySpy.mockResolvedValue({ text: "final reply" });
     loadConfig.mockReturnValue({
       channels: {
         telegram: { dmPolicy: "open", allowFrom: ["*"] },
@@ -893,9 +890,8 @@ describe("createTelegramBot", () => {
       getFile: async () => ({ download: async () => new Uint8Array() }),
     });
 
-    expect(sendMessageSpy).toHaveBeenCalledTimes(2);
-    expect(sendMessageSpy.mock.calls[0][1]).toBe("PFX tool result");
-    expect(sendMessageSpy.mock.calls[1][1]).toBe("PFX final reply");
+    expect(sendMessageSpy).toHaveBeenCalledTimes(1);
+    expect(sendMessageSpy.mock.calls[0][1]).toBe("PFX final reply");
   });
 
   it("honors replyToMode=all for threaded replies", async () => {
@@ -2288,7 +2284,7 @@ describe("createTelegramBot", () => {
     );
   });
 
-  it("streams tool summaries for native slash commands", async () => {
+  it("skips tool summaries for native slash commands", async () => {
     onSpy.mockReset();
     sendMessageSpy.mockReset();
     commandSpy.mockReset();
@@ -2326,9 +2322,8 @@ describe("createTelegramBot", () => {
       match: "on",
     });
 
-    expect(sendMessageSpy).toHaveBeenCalledTimes(2);
-    expect(sendMessageSpy.mock.calls[0]?.[1]).toContain("tool update");
-    expect(sendMessageSpy.mock.calls[1]?.[1]).toContain("final reply");
+    expect(sendMessageSpy).toHaveBeenCalledTimes(1);
+    expect(sendMessageSpy.mock.calls[0]?.[1]).toContain("final reply");
   });
 
   it("dedupes duplicate message updates by update_id", async () => {
diff --git a/src/web/auto-reply.web-auto-reply.sends-tool-summaries-immediately-responseprefix.test.ts b/src/web/auto-reply.web-auto-reply.sends-tool-summaries-immediately-responseprefix.test.ts
index 41a4dfc2a..19572b0dd 100644
--- a/src/web/auto-reply.web-auto-reply.sends-tool-summaries-immediately-responseprefix.test.ts
+++ b/src/web/auto-reply.web-auto-reply.sends-tool-summaries-immediately-responseprefix.test.ts
@@ -105,7 +105,7 @@ describe("web auto-reply", () => {
     vi.useRealTimers();
   });
 
-  it("sends tool summaries immediately with responsePrefix", async () => {
+  it("skips tool summaries and sends final reply with responsePrefix", async () => {
     setLoadConfigMock(() => ({
       channels: { whatsapp: { allowFrom: ["*"] } },
       messages: {
@@ -125,15 +125,7 @@ describe("web auto-reply", () => {
       return { close: vi.fn() };
     };
 
-    const resolver = vi
-      .fn()
-      .mockImplementation(
-        async (_ctx, opts?: { onToolResult?: (r: { text: string }) => Promise<void> }) => {
-          await opts?.onToolResult?.({ text: "🧩 tool1" });
-          await opts?.onToolResult?.({ text: "🧩 tool2" });
-          return { text: "final" };
-        },
-      );
+    const resolver = vi.fn().mockResolvedValue({ text: "final" });
 
     await monitorWebChannel(false, listenerFactory, false, resolver);
     expect(capturedOnMessage).toBeDefined();
@@ -149,7 +141,7 @@ describe("web auto-reply", () => {
     });
 
     const replies = reply.mock.calls.map((call) => call[0]);
-    expect(replies).toEqual(["🦞 🧩 tool1", "🦞 🧩 tool2", "🦞 final"]);
+    expect(replies).toEqual(["🦞 final"]);
     resetLoadConfigMock();
   });
   it("uses identity.name for messagePrefix when set", async () => {