From 98377c7c6b1a24eede134ad28ae7a59feac13af2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 10 Jan 2026 21:45:10 +0000 Subject: [PATCH] fix(agents): harden tool transcript repair --- CHANGELOG.md | 2 ++ src/agents/pi-embedded-helpers.test.ts | 34 ++++++++++++++++++ src/agents/pi-embedded-helpers.ts | 36 ++++++++++++++++--- .../gateway-models.profiles.live.test.ts | 5 +++ 4 files changed, 73 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9f30f7a3..bf3a55eb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,8 @@ - Telegram: serialize media-group processing to avoid missed albums under load. - Signal: handle `dataMessage.reaction` events (signal-cli SSE) to avoid broken attachment errors. (#637) — thanks @neist. - Docs: showcase entries for ParentPay, R2 Upload, iOS TestFlight, and Oura Health. (#650) — thanks @henrino3. +- Agents: repair session transcripts by dropping duplicate tool results across the whole history (unblocks Anthropic-compatible APIs after retries). +- Tests/Live: reset the gateway session between model runs to avoid cross-provider transcript incompatibilities (notably OpenAI Responses reasoning replay rules). ## 2026.1.9 ### Highlights diff --git a/src/agents/pi-embedded-helpers.test.ts b/src/agents/pi-embedded-helpers.test.ts index 6b6274262..7a0aaa8a9 100644 --- a/src/agents/pi-embedded-helpers.test.ts +++ b/src/agents/pi-embedded-helpers.test.ts @@ -642,6 +642,40 @@ describe("sanitizeToolUseResultPairing", () => { const out = sanitizeToolUseResultPairing(input); expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1); }); + + it("drops duplicate tool results for the same id across the transcript", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "first" }], + isError: false, + }, + { role: "assistant", content: [{ type: "text", text: "ok" }] }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "second (duplicate)" }], + isError: false, + }, + ] satisfies AgentMessage[]; + + const out = sanitizeToolUseResultPairing(input); + const results = out.filter((m) => m.role === "toolResult") as Array<{ + toolCallId?: string; + content?: unknown; + }>; + expect(results).toHaveLength(1); + expect(results[0]?.toolCallId).toBe("call_1"); + }); }); describe("normalizeTextForComparison", () => { diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 832a2d0cb..173fd5137 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -286,8 +286,18 @@ export function sanitizeToolUseResultPairing( // displaced (e.g. after user turns) or duplicated. Repair by: // - moving matching toolResult messages directly after their assistant toolCall turn // - inserting synthetic error toolResults for missing ids - // - dropping duplicate toolResults for the same id within the span + // - dropping duplicate toolResults for the same id (anywhere in the transcript) const out: AgentMessage[] = []; + const seenToolResultIds = new Set(); + + const pushToolResult = ( + msg: Extract, + ) => { + const id = extractToolResultId(msg); + if (id && seenToolResultIds.has(id)) return; + if (id) seenToolResultIds.add(id); + out.push(msg); + }; for (let i = 0; i < messages.length; i += 1) { const msg = messages[i] as AgentMessage; @@ -298,7 +308,11 @@ export function sanitizeToolUseResultPairing( const role = (msg as { role?: unknown }).role; if (role !== "assistant") { - out.push(msg); + if (role === "toolResult") { + pushToolResult(msg as Extract); + } else { + out.push(msg); + } continue; } @@ -335,6 +349,9 @@ export function sanitizeToolUseResultPairing( >; const id = extractToolResultId(toolResult); if (id && toolCallIds.has(id)) { + if (seenToolResultIds.has(id)) { + continue; + } if (!spanResultsById.has(id)) { spanResultsById.set(id, toolResult); } @@ -349,13 +366,24 @@ export function sanitizeToolUseResultPairing( for (const call of toolCalls) { const existing = spanResultsById.get(call.id); - out.push( + pushToolResult( existing ?? makeMissingToolResult({ toolCallId: call.id, toolName: call.name }), ); } - out.push(...remainder); + for (const rem of remainder) { + if (!rem || typeof rem !== "object") { + out.push(rem); + continue; + } + const remRole = (rem as { role?: unknown }).role; + if (remRole === "toolResult") { + pushToolResult(rem as Extract); + continue; + } + out.push(rem); + } i = j - 1; } diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts index 6bb4cc740..a4d76704a 100644 --- a/src/gateway/gateway-models.profiles.live.test.ts +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -338,6 +338,11 @@ describeLive("gateway live (dev agent, profile keys)", () => { key: sessionKey, model: modelKey, }); + // Reset between models: avoids cross-provider transcript incompatibilities + // (notably OpenAI Responses requiring reasoning replay for function_call items). + await client.request>("sessions.reset", { + key: sessionKey, + }); // “Meaningful” direct prompt (no tools). const runId = randomUUID();