fix: drop unsigned gemini tool calls from history

2026-01-16 22:42:32 +00:00
parent a4e99ecdaf
commit a7bec3340f
4 changed files with 89 additions and 62 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,7 +27,6 @@
 - TUI: show provider/model labels for the active session and default model.
 - Heartbeat: add per-agent heartbeat configuration and multi-agent docs example.
 - UI: show gateway auth guidance + doc link on unauthorized Control UI connections.
 - UI: add session deletion action in Control UI sessions list. (#1017) — thanks @Szpadel.
 - Security: warn on weak model tiers (Haiku, below GPT-5, below Claude 4.5) in `clawdbot security audit`.
 - Apps: store node auth tokens encrypted (Keychain/SecurePrefs).
 - Cron: isolated cron jobs now start a fresh session id on every run to prevent context buildup.
@@ -59,7 +58,6 @@
 ### Fixes
 - Messages: make `/stop` clear queued followups and pending session lane work for a hard abort.
 - Messages: make `/stop` abort active sub-agent runs spawned from the requester session and report how many were stopped.
 - Sessions: ensure `sessions.delete` clears queues, aborts embedded runs, and stops sub-agents before deletion.
 - WhatsApp: default response prefix only for self-chat, using identity name when set.
 - Signal/iMessage: bound transport readiness waits to 30s with periodic logging. (#1014) — thanks @Szpadel.
 - Auth: merge main auth profiles into per-agent stores for sub-agents and document inheritance. (#1013) — thanks @marcmarg.
@@ -71,7 +69,7 @@
 - Daemon: fix profile-aware service label resolution (env-driven) and add coverage for launchd/systemd/schtasks. (#969) — thanks @bjesuiter.
 - Agents: avoid false positives when logging unsupported Google tool schema keywords.
 - Agents: skip Gemini history downgrades for google-antigravity to preserve tool calls. (#894) — thanks @mukhtharcm.
- Agents: map Z.AI thinking to on/off in UI/TUI and drop the pi-agent-core patch now that upstream handles binary thinking.
+- Agents: drop unsigned Gemini tool calls (and matching results) from context to prevent tool-call markup leakage across models.
 - Status: restore usage summary line for current provider when no OAuth profiles exist.
 - Fix: guard model fallback against undefined provider/model values. (#954) — thanks @roshanasingh4.
 - Fix: refactor session store updates, add chat.inject, and harden subagent cleanup flow. (#944) — thanks @tyler6204.
--- a/src/agents/pi-embedded-helpers.downgradegeminihistory.test.ts
+++ b/src/agents/pi-embedded-helpers.downgradegeminihistory.test.ts
@@ -0,0 +1,71 @@
 import { describe, expect, it } from "vitest";
 import { downgradeGeminiHistory } from "./pi-embedded-helpers.js";
 describe("downgradeGeminiHistory", () => {
  it("drops unsigned tool calls and matching tool results", () => {
    const input = [
      {
        role: "assistant",
        content: [
          { type: "text", text: "hello" },
          { type: "toolCall", id: "call_1", name: "read", arguments: { path: "/tmp" } },
        ],
      },
      {
        role: "toolResult",
        toolCallId: "call_1",
        content: [{ type: "text", text: "ok" }],
      },
      { role: "user", content: "next" },
    ];
    expect(downgradeGeminiHistory(input)).toEqual([
      {
        role: "assistant",
        content: [{ type: "text", text: "hello" }],
      },
      { role: "user", content: "next" },
    ]);
  });
  it("keeps signed tool calls and results", () => {
    const input = [
      {
        role: "assistant",
        content: [
          {
            type: "toolCall",
            id: "call_2",
            name: "read",
            arguments: { path: "/tmp" },
            thought_signature: "sig_123",
          },
        ],
      },
      {
        role: "toolResult",
        toolCallId: "call_2",
        content: [{ type: "text", text: "ok" }],
      },
    ];
    expect(downgradeGeminiHistory(input)).toEqual(input);
  });
  it("drops assistant messages that only contain unsigned tool calls", () => {
    const input = [
      {
        role: "assistant",
        content: [{ type: "toolCall", id: "call_3", name: "read", arguments: {} }],
      },
      {
        role: "toolResult",
        toolCallId: "call_3",
        content: [{ type: "text", text: "ok" }],
      },
      { role: "user", content: "after" },
    ];
    expect(downgradeGeminiHistory(input)).toEqual([{ role: "user", content: "after" }]);
  });
 });
--- a/src/agents/pi-embedded-helpers/google.ts
+++ b/src/agents/pi-embedded-helpers/google.ts
@@ -16,9 +16,9 @@ export function isAntigravityClaude(api?: string | null, modelId?: string): bool
 export { sanitizeGoogleTurnOrdering };
 /**
- * Downgrades tool calls that are missing `thought_signature` (required by Gemini)
+ * Drops tool calls that are missing `thought_signature` (required by Gemini)
- * into text representations, to prevent 400 INVALID_ARGUMENT errors.
+ * to prevent 400 INVALID_ARGUMENT errors. Matching tool results are dropped
- * Also converts corresponding tool results into user messages.
+ * so they don't become orphaned in the transcript.
 */
 type GeminiToolCallBlock = {
  type?: unknown;
@@ -86,7 +86,7 @@ export function downgradeGeminiThinkingBlocks(messages: AgentMessage[]): AgentMe
 }
 export function downgradeGeminiHistory(messages: AgentMessage[]): AgentMessage[] {
-  const downgradedIds = new Set<string>();
+  const droppedToolCallIds = new Set<string>();
  const out: AgentMessage[] = [];
  const resolveToolResultId = (
@@ -113,9 +113,9 @@ export function downgradeGeminiHistory(messages: AgentMessage[]): AgentMessage[]
        continue;
      }
-      let hasDowngraded = false;
+      let dropped = false;
-      const newContent = assistantMsg.content.map((block) => {
+      const nextContent = assistantMsg.content.filter((block) => {
-        if (!block || typeof block !== "object") return block;
+        if (!block || typeof block !== "object") return true;
        const blockRecord = block as GeminiToolCallBlock;
        const type = blockRecord.type;
        if (type === "toolCall" || type === "functionCall" || type === "toolUse") {
@@ -128,64 +128,26 @@ export function downgradeGeminiHistory(messages: AgentMessage[]): AgentMessage[]
                : typeof blockRecord.toolCallId === "string"
                  ? blockRecord.toolCallId
                  : undefined;
-            const name =
+            if (id) droppedToolCallIds.add(id);
-              typeof blockRecord.name === "string"
+            dropped = true;
-                ? blockRecord.name
+            return false;
                : typeof blockRecord.toolName === "string"
                  ? blockRecord.toolName
                  : undefined;
            const args =
              blockRecord.arguments !== undefined ? blockRecord.arguments : blockRecord.input;
            if (id) downgradedIds.add(id);
            hasDowngraded = true;
            const argsText = typeof args === "string" ? args : JSON.stringify(args, null, 2);
            return {
              type: "text",
              text: `[Tool Call: ${name ?? "unknown"}${
                id ? ` (ID: ${id})` : ""
              }]\nArguments: ${argsText}`,
            };
          }
        }
-        return block;
+        return true;
      });
-      out.push(hasDowngraded ? ({ ...assistantMsg, content: newContent } as AgentMessage) : msg);
+      if (dropped && nextContent.length === 0) {
        continue;
      }
      out.push(dropped ? ({ ...assistantMsg, content: nextContent } as AgentMessage) : msg);
      continue;
    }
    if (role === "toolResult") {
      const toolMsg = msg as Extract<AgentMessage, { role: "toolResult" }>;
      const toolResultId = resolveToolResultId(toolMsg);
-      if (toolResultId && downgradedIds.has(toolResultId)) {
+      if (toolResultId && droppedToolCallIds.has(toolResultId)) {
        let textContent = "";
        if (Array.isArray(toolMsg.content)) {
          textContent = toolMsg.content
            .map((entry) => {
              if (entry && typeof entry === "object") {
                const text = (entry as { text?: unknown }).text;
                if (typeof text === "string") return text;
              }
              return JSON.stringify(entry);
            })
            .join("\n");
        } else {
          textContent = JSON.stringify(toolMsg.content);
        }
        out.push({
          role: "user",
          content: [
            {
              type: "text",
              text: `[Tool Result for ID ${toolResultId}]\n${textContent}`,
            },
          ],
        } as AgentMessage);
        continue;
      }
    }
--- a/src/agents/pi-embedded-runner.google-sanitize-thinking.test.ts
+++ b/src/agents/pi-embedded-runner.google-sanitize-thinking.test.ts
@@ -190,10 +190,6 @@ describe("sanitizeSessionHistory (google thinking)", () => {
    expect(assistant.content).toEqual([
      { type: "text", text: "hello" },
      { type: "text", text: "ok" },
      {
        type: "text",
        text: '[Tool Call: read (ID: call_1)]\nArguments: {\n  "path": "/tmp/foo"\n}',
      },
      {
        type: "toolCall",
        id: "call_2",