From a7bec3340f5fbd0e0ef754f56c4903d3c7d581db Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 16 Jan 2026 22:42:32 +0000 Subject: [PATCH] fix: drop unsigned gemini tool calls from history --- CHANGELOG.md | 4 +- ...ded-helpers.downgradegeminihistory.test.ts | 71 ++++++++++++++++++ src/agents/pi-embedded-helpers/google.ts | 72 +++++-------------- ...ed-runner.google-sanitize-thinking.test.ts | 4 -- 4 files changed, 89 insertions(+), 62 deletions(-) create mode 100644 src/agents/pi-embedded-helpers.downgradegeminihistory.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 53942f148..2e4dbc1bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,6 @@ - TUI: show provider/model labels for the active session and default model. - Heartbeat: add per-agent heartbeat configuration and multi-agent docs example. - UI: show gateway auth guidance + doc link on unauthorized Control UI connections. -- UI: add session deletion action in Control UI sessions list. (#1017) — thanks @Szpadel. - Security: warn on weak model tiers (Haiku, below GPT-5, below Claude 4.5) in `clawdbot security audit`. - Apps: store node auth tokens encrypted (Keychain/SecurePrefs). - Cron: isolated cron jobs now start a fresh session id on every run to prevent context buildup. @@ -59,7 +58,6 @@ ### Fixes - Messages: make `/stop` clear queued followups and pending session lane work for a hard abort. - Messages: make `/stop` abort active sub-agent runs spawned from the requester session and report how many were stopped. -- Sessions: ensure `sessions.delete` clears queues, aborts embedded runs, and stops sub-agents before deletion. - WhatsApp: default response prefix only for self-chat, using identity name when set. - Signal/iMessage: bound transport readiness waits to 30s with periodic logging. (#1014) — thanks @Szpadel. - Auth: merge main auth profiles into per-agent stores for sub-agents and document inheritance. (#1013) — thanks @marcmarg. @@ -71,7 +69,7 @@ - Daemon: fix profile-aware service label resolution (env-driven) and add coverage for launchd/systemd/schtasks. (#969) — thanks @bjesuiter. - Agents: avoid false positives when logging unsupported Google tool schema keywords. - Agents: skip Gemini history downgrades for google-antigravity to preserve tool calls. (#894) — thanks @mukhtharcm. -- Agents: map Z.AI thinking to on/off in UI/TUI and drop the pi-agent-core patch now that upstream handles binary thinking. +- Agents: drop unsigned Gemini tool calls (and matching results) from context to prevent tool-call markup leakage across models. - Status: restore usage summary line for current provider when no OAuth profiles exist. - Fix: guard model fallback against undefined provider/model values. (#954) — thanks @roshanasingh4. - Fix: refactor session store updates, add chat.inject, and harden subagent cleanup flow. (#944) — thanks @tyler6204. diff --git a/src/agents/pi-embedded-helpers.downgradegeminihistory.test.ts b/src/agents/pi-embedded-helpers.downgradegeminihistory.test.ts new file mode 100644 index 000000000..b48d5d4e1 --- /dev/null +++ b/src/agents/pi-embedded-helpers.downgradegeminihistory.test.ts @@ -0,0 +1,71 @@ +import { describe, expect, it } from "vitest"; +import { downgradeGeminiHistory } from "./pi-embedded-helpers.js"; + +describe("downgradeGeminiHistory", () => { + it("drops unsigned tool calls and matching tool results", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "text", text: "hello" }, + { type: "toolCall", id: "call_1", name: "read", arguments: { path: "/tmp" } }, + ], + }, + { + role: "toolResult", + toolCallId: "call_1", + content: [{ type: "text", text: "ok" }], + }, + { role: "user", content: "next" }, + ]; + + expect(downgradeGeminiHistory(input)).toEqual([ + { + role: "assistant", + content: [{ type: "text", text: "hello" }], + }, + { role: "user", content: "next" }, + ]); + }); + + it("keeps signed tool calls and results", () => { + const input = [ + { + role: "assistant", + content: [ + { + type: "toolCall", + id: "call_2", + name: "read", + arguments: { path: "/tmp" }, + thought_signature: "sig_123", + }, + ], + }, + { + role: "toolResult", + toolCallId: "call_2", + content: [{ type: "text", text: "ok" }], + }, + ]; + + expect(downgradeGeminiHistory(input)).toEqual(input); + }); + + it("drops assistant messages that only contain unsigned tool calls", () => { + const input = [ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_3", name: "read", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_3", + content: [{ type: "text", text: "ok" }], + }, + { role: "user", content: "after" }, + ]; + + expect(downgradeGeminiHistory(input)).toEqual([{ role: "user", content: "after" }]); + }); +}); diff --git a/src/agents/pi-embedded-helpers/google.ts b/src/agents/pi-embedded-helpers/google.ts index a5fdee75a..dc21da947 100644 --- a/src/agents/pi-embedded-helpers/google.ts +++ b/src/agents/pi-embedded-helpers/google.ts @@ -16,9 +16,9 @@ export function isAntigravityClaude(api?: string | null, modelId?: string): bool export { sanitizeGoogleTurnOrdering }; /** - * Downgrades tool calls that are missing `thought_signature` (required by Gemini) - * into text representations, to prevent 400 INVALID_ARGUMENT errors. - * Also converts corresponding tool results into user messages. + * Drops tool calls that are missing `thought_signature` (required by Gemini) + * to prevent 400 INVALID_ARGUMENT errors. Matching tool results are dropped + * so they don't become orphaned in the transcript. */ type GeminiToolCallBlock = { type?: unknown; @@ -86,7 +86,7 @@ export function downgradeGeminiThinkingBlocks(messages: AgentMessage[]): AgentMe } export function downgradeGeminiHistory(messages: AgentMessage[]): AgentMessage[] { - const downgradedIds = new Set(); + const droppedToolCallIds = new Set(); const out: AgentMessage[] = []; const resolveToolResultId = ( @@ -113,9 +113,9 @@ export function downgradeGeminiHistory(messages: AgentMessage[]): AgentMessage[] continue; } - let hasDowngraded = false; - const newContent = assistantMsg.content.map((block) => { - if (!block || typeof block !== "object") return block; + let dropped = false; + const nextContent = assistantMsg.content.filter((block) => { + if (!block || typeof block !== "object") return true; const blockRecord = block as GeminiToolCallBlock; const type = blockRecord.type; if (type === "toolCall" || type === "functionCall" || type === "toolUse") { @@ -128,64 +128,26 @@ export function downgradeGeminiHistory(messages: AgentMessage[]): AgentMessage[] : typeof blockRecord.toolCallId === "string" ? blockRecord.toolCallId : undefined; - const name = - typeof blockRecord.name === "string" - ? blockRecord.name - : typeof blockRecord.toolName === "string" - ? blockRecord.toolName - : undefined; - const args = - blockRecord.arguments !== undefined ? blockRecord.arguments : blockRecord.input; - - if (id) downgradedIds.add(id); - hasDowngraded = true; - - const argsText = typeof args === "string" ? args : JSON.stringify(args, null, 2); - - return { - type: "text", - text: `[Tool Call: ${name ?? "unknown"}${ - id ? ` (ID: ${id})` : "" - }]\nArguments: ${argsText}`, - }; + if (id) droppedToolCallIds.add(id); + dropped = true; + return false; } } - return block; + return true; }); - out.push(hasDowngraded ? ({ ...assistantMsg, content: newContent } as AgentMessage) : msg); + if (dropped && nextContent.length === 0) { + continue; + } + + out.push(dropped ? ({ ...assistantMsg, content: nextContent } as AgentMessage) : msg); continue; } if (role === "toolResult") { const toolMsg = msg as Extract; const toolResultId = resolveToolResultId(toolMsg); - if (toolResultId && downgradedIds.has(toolResultId)) { - let textContent = ""; - if (Array.isArray(toolMsg.content)) { - textContent = toolMsg.content - .map((entry) => { - if (entry && typeof entry === "object") { - const text = (entry as { text?: unknown }).text; - if (typeof text === "string") return text; - } - return JSON.stringify(entry); - }) - .join("\n"); - } else { - textContent = JSON.stringify(toolMsg.content); - } - - out.push({ - role: "user", - content: [ - { - type: "text", - text: `[Tool Result for ID ${toolResultId}]\n${textContent}`, - }, - ], - } as AgentMessage); - + if (toolResultId && droppedToolCallIds.has(toolResultId)) { continue; } } diff --git a/src/agents/pi-embedded-runner.google-sanitize-thinking.test.ts b/src/agents/pi-embedded-runner.google-sanitize-thinking.test.ts index 12b6db850..1b34c732e 100644 --- a/src/agents/pi-embedded-runner.google-sanitize-thinking.test.ts +++ b/src/agents/pi-embedded-runner.google-sanitize-thinking.test.ts @@ -190,10 +190,6 @@ describe("sanitizeSessionHistory (google thinking)", () => { expect(assistant.content).toEqual([ { type: "text", text: "hello" }, { type: "text", text: "ok" }, - { - type: "text", - text: '[Tool Call: read (ID: call_1)]\nArguments: {\n "path": "/tmp/foo"\n}', - }, { type: "toolCall", id: "call_2",