From 38a03ff2c82f135d8dd9c49ba7f5b0449aa852c1 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Wed, 3 Dec 2025 10:11:41 +0000
Subject: [PATCH] Verbose: batch rapid tool results

---
 CHANGELOG.md                    |  1 +
 src/auto-reply/command-reply.ts | 71 ++++++++++++++++++++++++++++-----
 src/index.core.test.ts          |  2 +-
 3 files changed, 63 insertions(+), 11 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 15c5b91c8..e4c5fe096 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ### Highlights
 - **Thinking directives & state:** `/t|/think|/thinking <level>` (aliases off|minimal|low|medium|high|max/highest). Inline applies to that message; directive-only message pins the level for the session; `/think:off` clears. Resolution: inline > session override > `inbound.reply.thinkingDefault` > off. Pi/Tau get `--thinking <level>` (except off); other agents append cue words (`think` → `think hard` → `think harder` → `ultrathink`). Heartbeat probe uses `HEARTBEAT /think:high`.
 - **Verbose directives + session hints:** `/v|/verbose on|full|off` mirrors thinking: inline > session > config default. Directive-only replies with an acknowledgement; invalid levels return a hint. When enabled, tool results from JSON-emitting agents (Pi/Tau, etc.) are forwarded as metadata-only `[🛠️ <tool-name> <arg>]` messages (now streamed as they happen), and new sessions surface a `🧭 New session: <id>` hint.
+- **Verbose tool coalescing:** successive tool results of the same tool within ~250ms are batched into one `[🛠️ tool] arg1, arg2` message to reduce WhatsApp noise.
 - **Directive confirmations:** Directive-only messages now reply with an acknowledgement (`Thinking level set to high.` / `Thinking disabled.`) and reject unknown levels with a helpful hint (state is unchanged).
 - **Pi/Tau stability:** RPC replies buffered until the assistant turn finishes; parsers return consistent `texts[]`; web auto-replies keep a warm Tau RPC process to avoid cold starts.
 - **Claude prompt flow:** One-time `sessionIntro` with per-message `/think:high` bodyPrefix; system prompt always sent on first turn even with `sendSystemOnce`.
diff --git a/src/auto-reply/command-reply.ts b/src/auto-reply/command-reply.ts
index 919d73fc6..d3d5a2360 100644
--- a/src/auto-reply/command-reply.ts
+++ b/src/auto-reply/command-reply.ts
@@ -53,6 +53,9 @@ export type CommandReplyResult = {
   meta: CommandReplyMeta;
 };
 
+// Debounce window for coalescing successive tool_result messages (ms)
+const TOOL_RESULT_DEBOUNCE_MS = 250;
+
 type ToolMessageLike = {
   name?: string;
   toolName?: string;
@@ -120,6 +123,12 @@ function formatToolPrefix(toolName?: string, meta?: string) {
   return extra ? `[🛠️ ${label} ${extra}]` : `[🛠️ ${label}]`;
 }
 
+function formatToolAggregate(toolName?: string, metas?: string[]) {
+  const filtered = (metas ?? []).filter(Boolean);
+  if (!filtered.length) return formatToolPrefix(toolName);
+  return `${formatToolPrefix(toolName)} ${filtered.join(", ")}`;
+}
+
 export function summarizeClaudeMetadata(payload: unknown): string | undefined {
   if (!payload || typeof payload !== "object") return undefined;
   const obj = payload as Record<string, unknown>;
@@ -321,6 +330,27 @@ export async function runCommandReply(
   let queuedMs: number | undefined;
   let queuedAhead: number | undefined;
   try {
+    let pendingToolName: string | undefined;
+    let pendingMetas: string[] = [];
+    let pendingTimer: NodeJS.Timeout | null = null;
+    const flushPendingTool = () => {
+      if (!onPartialReply) return;
+      if (!pendingToolName && pendingMetas.length === 0) return;
+      const text = formatToolAggregate(pendingToolName, pendingMetas);
+      const { text: cleanedText, mediaUrls: mediaFound } =
+        splitMediaFromOutput(text);
+      void onPartialReply({
+        text: cleanedText,
+        mediaUrls: mediaFound?.length ? mediaFound : undefined,
+      } as ReplyPayload);
+      pendingToolName = undefined;
+      pendingMetas = [];
+      if (pendingTimer) {
+        clearTimeout(pendingTimer);
+        pendingTimer = null;
+      }
+    };
+
     const run = async () => {
       // Prefer long-lived tau RPC for pi agent to avoid cold starts.
       if (agentKind === "pi") {
@@ -338,7 +368,7 @@ export async function runCommandReply(
           }
           return copy;
         })();
-        return await runPiRpc({
+        const rpcResult = await runPiRpc({
           argv: rpcArgv,
           cwd: reply.cwd,
           prompt: body,
@@ -363,13 +393,20 @@ export async function runCommandReply(
                     ) {
                       const toolName = inferToolName(ev.message);
                       const meta = inferToolMeta(ev.message);
-                      const prefix = formatToolPrefix(toolName, meta);
-                      const { text: cleanedText, mediaUrls: mediaFound } =
-                        splitMediaFromOutput(prefix);
-                      void onPartialReply({
-                        text: cleanedText,
-                        mediaUrls: mediaFound?.length ? mediaFound : undefined,
-                      } as ReplyPayload);
+                      if (
+                        pendingToolName &&
+                        toolName &&
+                        toolName !== pendingToolName
+                      ) {
+                        flushPendingTool();
+                      }
+                      if (!pendingToolName) pendingToolName = toolName;
+                      if (meta) pendingMetas.push(meta);
+                      if (pendingTimer) clearTimeout(pendingTimer);
+                      pendingTimer = setTimeout(
+                        flushPendingTool,
+                        TOOL_RESULT_DEBOUNCE_MS,
+                      );
                     }
                   } catch {
                     // ignore malformed lines
@@ -377,6 +414,8 @@ export async function runCommandReply(
                 }
               : undefined,
         });
+        flushPendingTool();
+        return rpcResult;
       }
       return await commandRunner(finalArgv, { timeoutMs, cwd: reply.cwd });
     };
@@ -414,8 +453,20 @@ export async function runCommandReply(
       verboseLevel === "on" && !onPartialReply && parsedToolResults.length > 0;
 
     if (includeToolResultsInline) {
-      for (const tr of parsedToolResults) {
-        const prefixed = formatToolPrefix(tr.toolName, tr.meta);
+      const aggregated = parsedToolResults.reduce<
+        { toolName?: string; metas: string[] }[]
+      >((acc, tr) => {
+        const last = acc.at(-1);
+        if (last && last.toolName === tr.toolName) {
+          if (tr.meta) last.metas.push(tr.meta);
+        } else {
+          acc.push({ toolName: tr.toolName, metas: tr.meta ? [tr.meta] : [] });
+        }
+        return acc;
+      }, []);
+
+      for (const tr of aggregated) {
+        const prefixed = formatToolAggregate(tr.toolName, tr.metas);
         const { text: cleanedText, mediaUrls: mediaFound } =
           splitMediaFromOutput(prefixed);
         replyItems.push({
diff --git a/src/index.core.test.ts b/src/index.core.test.ts
index 5683eff1e..3a633324e 100644
--- a/src/index.core.test.ts
+++ b/src/index.core.test.ts
@@ -744,7 +744,7 @@ describe("config and templating", () => {
     expect(rpcSpy).toHaveBeenCalled();
     const payloads = Array.isArray(res) ? res : res ? [res] : [];
     expect(payloads.length).toBeGreaterThanOrEqual(2);
-    expect(payloads[0]?.text).toBe("[🛠️ bash ls]");
+    expect(payloads[0]?.text).toBe("[🛠️ bash] ls");
     expect(payloads[1]?.text).toContain("summary");
   });