fix: drop unsigned gemini tool calls from history

2026-01-16 22:42:32 +00:00
parent a4e99ecdaf
commit a7bec3340f
4 changed files with 89 additions and 62 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,7 +27,6 @@
 - TUI: show provider/model labels for the active session and default model.
 - Heartbeat: add per-agent heartbeat configuration and multi-agent docs example.
 - UI: show gateway auth guidance + doc link on unauthorized Control UI connections.
- UI: add session deletion action in Control UI sessions list. (#1017) — thanks @Szpadel.
 - Security: warn on weak model tiers (Haiku, below GPT-5, below Claude 4.5) in `clawdbot security audit`.
 - Apps: store node auth tokens encrypted (Keychain/SecurePrefs).
 - Cron: isolated cron jobs now start a fresh session id on every run to prevent context buildup.
@@ -59,7 +58,6 @@
 ### Fixes
 - Messages: make `/stop` clear queued followups and pending session lane work for a hard abort.
 - Messages: make `/stop` abort active sub-agent runs spawned from the requester session and report how many were stopped.
- Sessions: ensure `sessions.delete` clears queues, aborts embedded runs, and stops sub-agents before deletion.
 - WhatsApp: default response prefix only for self-chat, using identity name when set.
 - Signal/iMessage: bound transport readiness waits to 30s with periodic logging. (#1014) — thanks @Szpadel.
 - Auth: merge main auth profiles into per-agent stores for sub-agents and document inheritance. (#1013) — thanks @marcmarg.
@@ -71,7 +69,7 @@
 - Daemon: fix profile-aware service label resolution (env-driven) and add coverage for launchd/systemd/schtasks. (#969) — thanks @bjesuiter.
 - Agents: avoid false positives when logging unsupported Google tool schema keywords.
 - Agents: skip Gemini history downgrades for google-antigravity to preserve tool calls. (#894) — thanks @mukhtharcm.
- Agents: map Z.AI thinking to on/off in UI/TUI and drop the pi-agent-core patch now that upstream handles binary thinking.
+- Agents: drop unsigned Gemini tool calls (and matching results) from context to prevent tool-call markup leakage across models.
 - Status: restore usage summary line for current provider when no OAuth profiles exist.
 - Fix: guard model fallback against undefined provider/model values. (#954) — thanks @roshanasingh4.
 - Fix: refactor session store updates, add chat.inject, and harden subagent cleanup flow. (#944) — thanks @tyler6204.
--- a/src/agents/pi-embedded-helpers.downgradegeminihistory.test.ts
+++ b/src/agents/pi-embedded-helpers.downgradegeminihistory.test.ts
@@ -0,0 +1,71 @@
+import { describe, expect, it } from "vitest";
+import { downgradeGeminiHistory } from "./pi-embedded-helpers.js";
+
+describe("downgradeGeminiHistory", () => {
+  it("drops unsigned tool calls and matching tool results", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "hello" },
+          { type: "toolCall", id: "call_1", name: "read", arguments: { path: "/tmp" } },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_1",
+        content: [{ type: "text", text: "ok" }],
+      },
+      { role: "user", content: "next" },
+    ];
+
+    expect(downgradeGeminiHistory(input)).toEqual([
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "hello" }],
+      },
+      { role: "user", content: "next" },
+    ]);
+  });
+
+  it("keeps signed tool calls and results", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "toolCall",
+            id: "call_2",
+            name: "read",
+            arguments: { path: "/tmp" },
+            thought_signature: "sig_123",
+          },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_2",
+        content: [{ type: "text", text: "ok" }],
+      },
+    ];
+
+    expect(downgradeGeminiHistory(input)).toEqual(input);
+  });
+
+  it("drops assistant messages that only contain unsigned tool calls", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "call_3", name: "read", arguments: {} }],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_3",
+        content: [{ type: "text", text: "ok" }],
+      },
+      { role: "user", content: "after" },
+    ];
+
+    expect(downgradeGeminiHistory(input)).toEqual([{ role: "user", content: "after" }]);
+  });
+});
--- a/src/agents/pi-embedded-helpers/google.ts
+++ b/src/agents/pi-embedded-helpers/google.ts
@@ -16,9 +16,9 @@ export function isAntigravityClaude(api?: string | null, modelId?: string): bool
 export { sanitizeGoogleTurnOrdering };

 /**
- * Downgrades tool calls that are missing `thought_signature` (required by Gemini)
- * into text representations, to prevent 400 INVALID_ARGUMENT errors.
- * Also converts corresponding tool results into user messages.
+ * Drops tool calls that are missing `thought_signature` (required by Gemini)
+ * to prevent 400 INVALID_ARGUMENT errors. Matching tool results are dropped
+ * so they don't become orphaned in the transcript.
 */
 type GeminiToolCallBlock = {
  type?: unknown;
@@ -86,7 +86,7 @@ export function downgradeGeminiThinkingBlocks(messages: AgentMessage[]): AgentMe
 }

 export function downgradeGeminiHistory(messages: AgentMessage[]): AgentMessage[] {
-  const downgradedIds = new Set<string>();
+  const droppedToolCallIds = new Set<string>();
  const out: AgentMessage[] = [];

  const resolveToolResultId = (
@@ -113,9 +113,9 @@ export function downgradeGeminiHistory(messages: AgentMessage[]): AgentMessage[]
        continue;
      }

-      let hasDowngraded = false;
-      const newContent = assistantMsg.content.map((block) => {
-        if (!block || typeof block !== "object") return block;
+      let dropped = false;
+      const nextContent = assistantMsg.content.filter((block) => {
+        if (!block || typeof block !== "object") return true;
        const blockRecord = block as GeminiToolCallBlock;
        const type = blockRecord.type;
        if (type === "toolCall" || type === "functionCall" || type === "toolUse") {
@@ -128,64 +128,26 @@ export function downgradeGeminiHistory(messages: AgentMessage[]): AgentMessage[]
                : typeof blockRecord.toolCallId === "string"
                  ? blockRecord.toolCallId
                  : undefined;
-            const name =
-              typeof blockRecord.name === "string"
-                ? blockRecord.name
-                : typeof blockRecord.toolName === "string"
-                  ? blockRecord.toolName
-                  : undefined;
-            const args =
-              blockRecord.arguments !== undefined ? blockRecord.arguments : blockRecord.input;
-
-            if (id) downgradedIds.add(id);
-            hasDowngraded = true;
-
-            const argsText = typeof args === "string" ? args : JSON.stringify(args, null, 2);
-
-            return {
-              type: "text",
-              text: `[Tool Call: ${name ?? "unknown"}${
-                id ? ` (ID: ${id})` : ""
-              }]\nArguments: ${argsText}`,
-            };
+            if (id) droppedToolCallIds.add(id);
+            dropped = true;
+            return false;
          }
        }
-        return block;
+        return true;
      });

-      out.push(hasDowngraded ? ({ ...assistantMsg, content: newContent } as AgentMessage) : msg);
+      if (dropped && nextContent.length === 0) {
+        continue;
+      }
+
+      out.push(dropped ? ({ ...assistantMsg, content: nextContent } as AgentMessage) : msg);
      continue;
    }

    if (role === "toolResult") {
      const toolMsg = msg as Extract<AgentMessage, { role: "toolResult" }>;
      const toolResultId = resolveToolResultId(toolMsg);
-      if (toolResultId && downgradedIds.has(toolResultId)) {
-        let textContent = "";
-        if (Array.isArray(toolMsg.content)) {
-          textContent = toolMsg.content
-            .map((entry) => {
-              if (entry && typeof entry === "object") {
-                const text = (entry as { text?: unknown }).text;
-                if (typeof text === "string") return text;
-              }
-              return JSON.stringify(entry);
-            })
-            .join("\n");
-        } else {
-          textContent = JSON.stringify(toolMsg.content);
-        }
-
-        out.push({
-          role: "user",
-          content: [
-            {
-              type: "text",
-              text: `[Tool Result for ID ${toolResultId}]\n${textContent}`,
-            },
-          ],
-        } as AgentMessage);
-
+      if (toolResultId && droppedToolCallIds.has(toolResultId)) {
        continue;
      }
    }
--- a/src/agents/pi-embedded-runner.google-sanitize-thinking.test.ts
+++ b/src/agents/pi-embedded-runner.google-sanitize-thinking.test.ts
@@ -190,10 +190,6 @@ describe("sanitizeSessionHistory (google thinking)", () => {
    expect(assistant.content).toEqual([
      { type: "text", text: "hello" },
      { type: "text", text: "ok" },
-      {
-        type: "text",
-        text: '[Tool Call: read (ID: call_1)]\nArguments: {\n  "path": "/tmp/foo"\n}',
-      },
      {
        type: "toolCall",
        id: "call_2",