From 875b018ea1829569ab6623c776c64a2d496826c1 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 25 Jan 2026 11:54:20 +0000 Subject: [PATCH] fix: stop sending tool summaries to channels --- src/agents/pi-embedded-runner/run.ts | 2 +- .../reply/dispatch-from-config.test.ts | 32 +++++++++++++++++++ src/auto-reply/reply/dispatch-from-config.ts | 22 ++----------- src/auto-reply/reply/route-reply.test.ts | 17 ++++++++++ src/auto-reply/reply/route-reply.ts | 19 ++++++----- src/discord/monitor.slash.test.ts | 11 +++---- ...essages-without-mention-by-default.test.ts | 12 +++---- ...ends-tool-summaries-responseprefix.test.ts | 12 +++---- ...ends-tool-summaries-responseprefix.test.ts | 22 ++++--------- ...topic-skill-filters-system-prompts.test.ts | 7 ++-- ...ies-without-native-reply-threading.test.ts | 12 +++---- src/telegram/bot.test.ts | 19 ++++------- ...mmaries-immediately-responseprefix.test.ts | 14 ++------ 13 files changed, 100 insertions(+), 101 deletions(-) diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index ea2488a1c..adbe6ad49 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -599,7 +599,7 @@ export async function runEmbeddedPiAgent( verboseLevel: params.verboseLevel, reasoningLevel: params.reasoningLevel, toolResultFormat: resolvedToolResultFormat, - inlineToolResultsAllowed: !params.onPartialReply && !params.onToolResult, + inlineToolResultsAllowed: false, }); log.debug( diff --git a/src/auto-reply/reply/dispatch-from-config.test.ts b/src/auto-reply/reply/dispatch-from-config.test.ts index 4e1982674..0504694bf 100644 --- a/src/auto-reply/reply/dispatch-from-config.test.ts +++ b/src/auto-reply/reply/dispatch-from-config.test.ts @@ -138,6 +138,38 @@ describe("dispatchReplyFromConfig", () => { ); }); + it("does not provide onToolResult when routing cross-provider", async () => { + mocks.tryFastAbortFromMessage.mockResolvedValue({ + handled: false, + aborted: false, + }); + mocks.routeReply.mockClear(); + const cfg = {} as ClawdbotConfig; + const dispatcher = createDispatcher(); + const ctx = buildTestCtx({ + Provider: "slack", + OriginatingChannel: "telegram", + OriginatingTo: "telegram:999", + }); + + const replyResolver = async ( + _ctx: MsgContext, + opts: GetReplyOptions | undefined, + _cfg: ClawdbotConfig, + ) => { + expect(opts?.onToolResult).toBeUndefined(); + return { text: "hi" } satisfies ReplyPayload; + }; + + await dispatchReplyFromConfig({ ctx, cfg, dispatcher, replyResolver }); + + expect(mocks.routeReply).toHaveBeenCalledWith( + expect.objectContaining({ + payload: expect.objectContaining({ text: "hi" }), + }), + ); + }); + it("fast-aborts without calling the reply resolver", async () => { mocks.tryFastAbortFromMessage.mockResolvedValue({ handled: true, diff --git a/src/auto-reply/reply/dispatch-from-config.ts b/src/auto-reply/reply/dispatch-from-config.ts index 16c83bf30..f946c05f9 100644 --- a/src/auto-reply/reply/dispatch-from-config.ts +++ b/src/auto-reply/reply/dispatch-from-config.ts @@ -206,6 +206,7 @@ export async function dispatchReplyFromConfig(params: { const sendPayloadAsync = async ( payload: ReplyPayload, abortSignal?: AbortSignal, + mirror?: boolean, ): Promise => { // TypeScript doesn't narrow these from the shouldRouteToOriginating check, // but they're guaranteed non-null when this function is called. @@ -220,6 +221,7 @@ export async function dispatchReplyFromConfig(params: { threadId: ctx.MessageThreadId, cfg, abortSignal, + mirror, }); if (!result.ok) { logVerbose(`dispatch-from-config: route-reply failed: ${result.error ?? "unknown error"}`); @@ -268,24 +270,6 @@ export async function dispatchReplyFromConfig(params: { ctx, { ...params.replyOptions, - onToolResult: (payload: ReplyPayload) => { - const run = async () => { - const ttsPayload = await maybeApplyTtsToPayload({ - payload, - cfg, - channel: ttsChannel, - kind: "tool", - inboundAudio, - ttsAuto: sessionTtsAuto, - }); - if (shouldRouteToOriginating) { - await sendPayloadAsync(ttsPayload); - } else { - dispatcher.sendToolResult(ttsPayload); - } - }; - return run(); - }, onBlockReply: (payload: ReplyPayload, context) => { const run = async () => { const ttsPayload = await maybeApplyTtsToPayload({ @@ -297,7 +281,7 @@ export async function dispatchReplyFromConfig(params: { ttsAuto: sessionTtsAuto, }); if (shouldRouteToOriginating) { - await sendPayloadAsync(ttsPayload, context?.abortSignal); + await sendPayloadAsync(ttsPayload, context?.abortSignal, false); } else { dispatcher.sendBlockReply(ttsPayload); } diff --git a/src/auto-reply/reply/route-reply.test.ts b/src/auto-reply/reply/route-reply.test.ts index f587ec481..0e6e0a721 100644 --- a/src/auto-reply/reply/route-reply.test.ts +++ b/src/auto-reply/reply/route-reply.test.ts @@ -379,6 +379,23 @@ describe("routeReply", () => { }), ); }); + + it("skips mirror data when mirror is false", async () => { + mocks.deliverOutboundPayloads.mockResolvedValue([]); + await routeReply({ + payload: { text: "hi" }, + channel: "slack", + to: "channel:C123", + sessionKey: "agent:main:main", + mirror: false, + cfg: {} as never, + }); + expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith( + expect.objectContaining({ + mirror: undefined, + }), + ); + }); }); const emptyRegistry = createRegistry([]); diff --git a/src/auto-reply/reply/route-reply.ts b/src/auto-reply/reply/route-reply.ts index bbc7efa7d..7db417794 100644 --- a/src/auto-reply/reply/route-reply.ts +++ b/src/auto-reply/reply/route-reply.ts @@ -33,6 +33,8 @@ export type RouteReplyParams = { cfg: ClawdbotConfig; /** Optional abort signal for cooperative cancellation. */ abortSignal?: AbortSignal; + /** Mirror reply into session transcript (default: true when sessionKey is set). */ + mirror?: boolean; }; export type RouteReplyResult = { @@ -118,14 +120,15 @@ export async function routeReply(params: RouteReplyParams): Promise { beforeEach(() => { dispatchMock.mockReset().mockImplementation(async (params) => { if ("dispatcher" in params && params.dispatcher) { - params.dispatcher.sendToolResult({ text: "tool update" }); params.dispatcher.sendFinalReply({ text: "final reply" }); - return { queuedFinal: true, counts: { tool: 1, block: 0, final: 1 } }; + return { queuedFinal: true, counts: { tool: 0, block: 0, final: 1 } }; } if ("dispatcherOptions" in params && params.dispatcherOptions) { const { dispatcher, markDispatchIdle } = createReplyDispatcherWithTyping( params.dispatcherOptions, ); - dispatcher.sendToolResult({ text: "tool update" }); dispatcher.sendFinalReply({ text: "final reply" }); await dispatcher.waitForIdle(); markDispatchIdle(); @@ -53,7 +51,7 @@ beforeEach(() => { }); describe("discord native commands", () => { - it("streams tool results for native slash commands", { timeout: 60_000 }, async () => { + it("skips tool results for native slash commands", { timeout: 60_000 }, async () => { const { ChannelType } = await import("@buape/carbon"); const { createDiscordNativeCommand } = await import("./monitor.js"); @@ -97,8 +95,7 @@ describe("discord native commands", () => { expect(dispatchMock).toHaveBeenCalledTimes(1); expect(reply).toHaveBeenCalledTimes(1); - expect(followUp).toHaveBeenCalledTimes(1); - expect(reply.mock.calls[0]?.[0]?.content).toContain("tool"); - expect(followUp.mock.calls[0]?.[0]?.content).toContain("final"); + expect(followUp).toHaveBeenCalledTimes(0); + expect(reply.mock.calls[0]?.[0]?.content).toContain("final"); }); }); diff --git a/src/imessage/monitor.skips-group-messages-without-mention-by-default.test.ts b/src/imessage/monitor.skips-group-messages-without-mention-by-default.test.ts index a0665044e..1c7330ab5 100644 --- a/src/imessage/monitor.skips-group-messages-without-mention-by-default.test.ts +++ b/src/imessage/monitor.skips-group-messages-without-mention-by-default.test.ts @@ -277,15 +277,12 @@ describe("monitorIMessageProvider", () => { expect(ctx.SessionKey).toBe("agent:main:imessage:group:2"); }); - it("prefixes tool and final replies with responsePrefix", async () => { + it("prefixes final replies with responsePrefix", async () => { config = { ...config, messages: { responsePrefix: "PFX" }, }; - replyMock.mockImplementation(async (_ctx, opts) => { - await opts?.onToolResult?.({ text: "tool update" }); - return { text: "final reply" }; - }); + replyMock.mockResolvedValue({ text: "final reply" }); const run = monitorIMessageProvider(); await waitForSubscribe(); @@ -307,9 +304,8 @@ describe("monitorIMessageProvider", () => { closeResolve?.(); await run; - expect(sendMock).toHaveBeenCalledTimes(2); - expect(sendMock.mock.calls[0][1]).toBe("PFX tool update"); - expect(sendMock.mock.calls[1][1]).toBe("PFX final reply"); + expect(sendMock).toHaveBeenCalledTimes(1); + expect(sendMock.mock.calls[0][1]).toBe("PFX final reply"); }); it("defaults to dmPolicy=pairing behavior when allowFrom is empty", async () => { diff --git a/src/signal/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts b/src/signal/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts index c3709890e..2d4e9c540 100644 --- a/src/signal/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts +++ b/src/signal/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts @@ -210,12 +210,9 @@ describe("monitorSignalProvider tool results", () => { ); }); - it("sends tool summaries with responsePrefix", async () => { + it("skips tool summaries with responsePrefix", async () => { const abortController = new AbortController(); - replyMock.mockImplementation(async (_ctx, opts) => { - await opts?.onToolResult?.({ text: "tool update" }); - return { text: "final reply" }; - }); + replyMock.mockResolvedValue({ text: "final reply" }); streamMock.mockImplementation(async ({ onEvent }) => { const payload = { @@ -243,9 +240,8 @@ describe("monitorSignalProvider tool results", () => { await flush(); - expect(sendMock).toHaveBeenCalledTimes(2); - expect(sendMock.mock.calls[0][1]).toBe("PFX tool update"); - expect(sendMock.mock.calls[1][1]).toBe("PFX final reply"); + expect(sendMock).toHaveBeenCalledTimes(1); + expect(sendMock.mock.calls[0][1]).toBe("PFX final reply"); }); it("replies with pairing code when dmPolicy is pairing and no allowFrom is set", async () => { diff --git a/src/slack/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts b/src/slack/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts index bcd797793..ce7015399 100644 --- a/src/slack/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts +++ b/src/slack/monitor.tool-result.sends-tool-summaries-responseprefix.test.ts @@ -23,11 +23,8 @@ beforeEach(() => { }); describe("monitorSlackProvider tool results", () => { - it("sends tool summaries with responsePrefix", async () => { - replyMock.mockImplementation(async (_ctx, opts) => { - await opts?.onToolResult?.({ text: "tool update" }); - return { text: "final reply" }; - }); + it("skips tool summaries with responsePrefix", async () => { + replyMock.mockResolvedValue({ text: "final reply" }); const controller = new AbortController(); const run = monitorSlackProvider({ @@ -55,9 +52,8 @@ describe("monitorSlackProvider tool results", () => { controller.abort(); await run; - expect(sendMock).toHaveBeenCalledTimes(2); - expect(sendMock.mock.calls[0][1]).toBe("PFX tool update"); - expect(sendMock.mock.calls[1][1]).toBe("PFX final reply"); + expect(sendMock).toHaveBeenCalledTimes(1); + expect(sendMock.mock.calls[0][1]).toBe("PFX final reply"); }); it("drops events with mismatched api_app_id", async () => { @@ -130,10 +126,7 @@ describe("monitorSlackProvider tool results", () => { }, }; - replyMock.mockImplementation(async (_ctx, opts) => { - await opts?.onToolResult?.({ text: "tool update" }); - return { text: "final reply" }; - }); + replyMock.mockResolvedValue({ text: "final reply" }); const controller = new AbortController(); const run = monitorSlackProvider({ @@ -161,9 +154,8 @@ describe("monitorSlackProvider tool results", () => { controller.abort(); await run; - expect(sendMock).toHaveBeenCalledTimes(2); - expect(sendMock.mock.calls[0][1]).toBe("tool update"); - expect(sendMock.mock.calls[1][1]).toBe("final reply"); + expect(sendMock).toHaveBeenCalledTimes(1); + expect(sendMock.mock.calls[0][1]).toBe("final reply"); }); it("preserves RawBody without injecting processed room history", async () => { diff --git a/src/telegram/bot.create-telegram-bot.applies-topic-skill-filters-system-prompts.test.ts b/src/telegram/bot.create-telegram-bot.applies-topic-skill-filters-system-prompts.test.ts index 0cda853be..1a7a9d40c 100644 --- a/src/telegram/bot.create-telegram-bot.applies-topic-skill-filters-system-prompts.test.ts +++ b/src/telegram/bot.create-telegram-bot.applies-topic-skill-filters-system-prompts.test.ts @@ -300,7 +300,7 @@ describe("createTelegramBot", () => { expect.objectContaining({ message_thread_id: 99 }), ); }); - it("streams tool summaries for native slash commands", async () => { + it("skips tool summaries for native slash commands", async () => { onSpy.mockReset(); sendMessageSpy.mockReset(); commandSpy.mockReset(); @@ -338,9 +338,8 @@ describe("createTelegramBot", () => { match: "on", }); - expect(sendMessageSpy).toHaveBeenCalledTimes(2); - expect(sendMessageSpy.mock.calls[0]?.[1]).toContain("tool update"); - expect(sendMessageSpy.mock.calls[1]?.[1]).toContain("final reply"); + expect(sendMessageSpy).toHaveBeenCalledTimes(1); + expect(sendMessageSpy.mock.calls[0]?.[1]).toContain("final reply"); }); it("dedupes duplicate message updates by update_id", async () => { onSpy.mockReset(); diff --git a/src/telegram/bot.create-telegram-bot.sends-replies-without-native-reply-threading.test.ts b/src/telegram/bot.create-telegram-bot.sends-replies-without-native-reply-threading.test.ts index 80c880b79..dffe8ee88 100644 --- a/src/telegram/bot.create-telegram-bot.sends-replies-without-native-reply-threading.test.ts +++ b/src/telegram/bot.create-telegram-bot.sends-replies-without-native-reply-threading.test.ts @@ -217,15 +217,12 @@ describe("createTelegramBot", () => { expect(call[2]?.reply_to_message_id).toBeUndefined(); } }); - it("prefixes tool and final replies with responsePrefix", async () => { + it("prefixes final replies with responsePrefix", async () => { onSpy.mockReset(); sendMessageSpy.mockReset(); const replySpy = replyModule.__replySpy as unknown as ReturnType; replySpy.mockReset(); - replySpy.mockImplementation(async (_ctx, opts) => { - await opts?.onToolResult?.({ text: "tool result" }); - return { text: "final reply" }; - }); + replySpy.mockResolvedValue({ text: "final reply" }); loadConfig.mockReturnValue({ channels: { telegram: { dmPolicy: "open", allowFrom: ["*"] }, @@ -245,9 +242,8 @@ describe("createTelegramBot", () => { getFile: async () => ({ download: async () => new Uint8Array() }), }); - expect(sendMessageSpy).toHaveBeenCalledTimes(2); - expect(sendMessageSpy.mock.calls[0][1]).toBe("PFX tool result"); - expect(sendMessageSpy.mock.calls[1][1]).toBe("PFX final reply"); + expect(sendMessageSpy).toHaveBeenCalledTimes(1); + expect(sendMessageSpy.mock.calls[0][1]).toBe("PFX final reply"); }); it("honors replyToMode=all for threaded replies", async () => { onSpy.mockReset(); diff --git a/src/telegram/bot.test.ts b/src/telegram/bot.test.ts index 486741f53..8dc52ab57 100644 --- a/src/telegram/bot.test.ts +++ b/src/telegram/bot.test.ts @@ -865,15 +865,12 @@ describe("createTelegramBot", () => { } }); - it("prefixes tool and final replies with responsePrefix", async () => { + it("prefixes final replies with responsePrefix", async () => { onSpy.mockReset(); sendMessageSpy.mockReset(); const replySpy = replyModule.__replySpy as unknown as ReturnType; replySpy.mockReset(); - replySpy.mockImplementation(async (_ctx, opts) => { - await opts?.onToolResult?.({ text: "tool result" }); - return { text: "final reply" }; - }); + replySpy.mockResolvedValue({ text: "final reply" }); loadConfig.mockReturnValue({ channels: { telegram: { dmPolicy: "open", allowFrom: ["*"] }, @@ -893,9 +890,8 @@ describe("createTelegramBot", () => { getFile: async () => ({ download: async () => new Uint8Array() }), }); - expect(sendMessageSpy).toHaveBeenCalledTimes(2); - expect(sendMessageSpy.mock.calls[0][1]).toBe("PFX tool result"); - expect(sendMessageSpy.mock.calls[1][1]).toBe("PFX final reply"); + expect(sendMessageSpy).toHaveBeenCalledTimes(1); + expect(sendMessageSpy.mock.calls[0][1]).toBe("PFX final reply"); }); it("honors replyToMode=all for threaded replies", async () => { @@ -2288,7 +2284,7 @@ describe("createTelegramBot", () => { ); }); - it("streams tool summaries for native slash commands", async () => { + it("skips tool summaries for native slash commands", async () => { onSpy.mockReset(); sendMessageSpy.mockReset(); commandSpy.mockReset(); @@ -2326,9 +2322,8 @@ describe("createTelegramBot", () => { match: "on", }); - expect(sendMessageSpy).toHaveBeenCalledTimes(2); - expect(sendMessageSpy.mock.calls[0]?.[1]).toContain("tool update"); - expect(sendMessageSpy.mock.calls[1]?.[1]).toContain("final reply"); + expect(sendMessageSpy).toHaveBeenCalledTimes(1); + expect(sendMessageSpy.mock.calls[0]?.[1]).toContain("final reply"); }); it("dedupes duplicate message updates by update_id", async () => { diff --git a/src/web/auto-reply.web-auto-reply.sends-tool-summaries-immediately-responseprefix.test.ts b/src/web/auto-reply.web-auto-reply.sends-tool-summaries-immediately-responseprefix.test.ts index 41a4dfc2a..19572b0dd 100644 --- a/src/web/auto-reply.web-auto-reply.sends-tool-summaries-immediately-responseprefix.test.ts +++ b/src/web/auto-reply.web-auto-reply.sends-tool-summaries-immediately-responseprefix.test.ts @@ -105,7 +105,7 @@ describe("web auto-reply", () => { vi.useRealTimers(); }); - it("sends tool summaries immediately with responsePrefix", async () => { + it("skips tool summaries and sends final reply with responsePrefix", async () => { setLoadConfigMock(() => ({ channels: { whatsapp: { allowFrom: ["*"] } }, messages: { @@ -125,15 +125,7 @@ describe("web auto-reply", () => { return { close: vi.fn() }; }; - const resolver = vi - .fn() - .mockImplementation( - async (_ctx, opts?: { onToolResult?: (r: { text: string }) => Promise }) => { - await opts?.onToolResult?.({ text: "🧩 tool1" }); - await opts?.onToolResult?.({ text: "🧩 tool2" }); - return { text: "final" }; - }, - ); + const resolver = vi.fn().mockResolvedValue({ text: "final" }); await monitorWebChannel(false, listenerFactory, false, resolver); expect(capturedOnMessage).toBeDefined(); @@ -149,7 +141,7 @@ describe("web auto-reply", () => { }); const replies = reply.mock.calls.map((call) => call[0]); - expect(replies).toEqual(["🦞 🧩 tool1", "🦞 🧩 tool2", "🦞 final"]); + expect(replies).toEqual(["🦞 final"]); resetLoadConfigMock(); }); it("uses identity.name for messagePrefix when set", async () => {