From 98377c7c6b1a24eede134ad28ae7a59feac13af2 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sat, 10 Jan 2026 21:45:10 +0000
Subject: [PATCH] fix(agents): harden tool transcript repair

---
 CHANGELOG.md                                  |  2 ++
 src/agents/pi-embedded-helpers.test.ts        | 34 ++++++++++++++++++
 src/agents/pi-embedded-helpers.ts             | 36 ++++++++++++++++---
 .../gateway-models.profiles.live.test.ts      |  5 +++
 4 files changed, 73 insertions(+), 4 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a9f30f7a3..bf3a55eb3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,8 @@
 - Telegram: serialize media-group processing to avoid missed albums under load.
 - Signal: handle `dataMessage.reaction` events (signal-cli SSE) to avoid broken attachment errors. (#637) — thanks @neist.
 - Docs: showcase entries for ParentPay, R2 Upload, iOS TestFlight, and Oura Health. (#650) — thanks @henrino3.
+- Agents: repair session transcripts by dropping duplicate tool results across the whole history (unblocks Anthropic-compatible APIs after retries).
+- Tests/Live: reset the gateway session between model runs to avoid cross-provider transcript incompatibilities (notably OpenAI Responses reasoning replay rules).
 ## 2026.1.9
 
 ### Highlights
diff --git a/src/agents/pi-embedded-helpers.test.ts b/src/agents/pi-embedded-helpers.test.ts
index 6b6274262..7a0aaa8a9 100644
--- a/src/agents/pi-embedded-helpers.test.ts
+++ b/src/agents/pi-embedded-helpers.test.ts
@@ -642,6 +642,40 @@ describe("sanitizeToolUseResultPairing", () => {
     const out = sanitizeToolUseResultPairing(input);
     expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1);
   });
+
+  it("drops duplicate tool results for the same id across the transcript", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [
+          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_1",
+        toolName: "read",
+        content: [{ type: "text", text: "first" }],
+        isError: false,
+      },
+      { role: "assistant", content: [{ type: "text", text: "ok" }] },
+      {
+        role: "toolResult",
+        toolCallId: "call_1",
+        toolName: "read",
+        content: [{ type: "text", text: "second (duplicate)" }],
+        isError: false,
+      },
+    ] satisfies AgentMessage[];
+
+    const out = sanitizeToolUseResultPairing(input);
+    const results = out.filter((m) => m.role === "toolResult") as Array<{
+      toolCallId?: string;
+      content?: unknown;
+    }>;
+    expect(results).toHaveLength(1);
+    expect(results[0]?.toolCallId).toBe("call_1");
+  });
 });
 
 describe("normalizeTextForComparison", () => {
diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts
index 832a2d0cb..173fd5137 100644
--- a/src/agents/pi-embedded-helpers.ts
+++ b/src/agents/pi-embedded-helpers.ts
@@ -286,8 +286,18 @@ export function sanitizeToolUseResultPairing(
   // displaced (e.g. after user turns) or duplicated. Repair by:
   // - moving matching toolResult messages directly after their assistant toolCall turn
   // - inserting synthetic error toolResults for missing ids
-  // - dropping duplicate toolResults for the same id within the span
+  // - dropping duplicate toolResults for the same id (anywhere in the transcript)
   const out: AgentMessage[] = [];
+  const seenToolResultIds = new Set<string>();
+
+  const pushToolResult = (
+    msg: Extract<AgentMessage, { role: "toolResult" }>,
+  ) => {
+    const id = extractToolResultId(msg);
+    if (id && seenToolResultIds.has(id)) return;
+    if (id) seenToolResultIds.add(id);
+    out.push(msg);
+  };
 
   for (let i = 0; i < messages.length; i += 1) {
     const msg = messages[i] as AgentMessage;
@@ -298,7 +308,11 @@ export function sanitizeToolUseResultPairing(
 
     const role = (msg as { role?: unknown }).role;
     if (role !== "assistant") {
-      out.push(msg);
+      if (role === "toolResult") {
+        pushToolResult(msg as Extract<AgentMessage, { role: "toolResult" }>);
+      } else {
+        out.push(msg);
+      }
       continue;
     }
 
@@ -335,6 +349,9 @@ export function sanitizeToolUseResultPairing(
         >;
         const id = extractToolResultId(toolResult);
         if (id && toolCallIds.has(id)) {
+          if (seenToolResultIds.has(id)) {
+            continue;
+          }
           if (!spanResultsById.has(id)) {
             spanResultsById.set(id, toolResult);
           }
@@ -349,13 +366,24 @@ export function sanitizeToolUseResultPairing(
 
     for (const call of toolCalls) {
       const existing = spanResultsById.get(call.id);
-      out.push(
+      pushToolResult(
         existing ??
           makeMissingToolResult({ toolCallId: call.id, toolName: call.name }),
       );
     }
 
-    out.push(...remainder);
+    for (const rem of remainder) {
+      if (!rem || typeof rem !== "object") {
+        out.push(rem);
+        continue;
+      }
+      const remRole = (rem as { role?: unknown }).role;
+      if (remRole === "toolResult") {
+        pushToolResult(rem as Extract<AgentMessage, { role: "toolResult" }>);
+        continue;
+      }
+      out.push(rem);
+    }
     i = j - 1;
   }
 
diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts
index 6bb4cc740..a4d76704a 100644
--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@@ -338,6 +338,11 @@ describeLive("gateway live (dev agent, profile keys)", () => {
               key: sessionKey,
               model: modelKey,
             });
+            // Reset between models: avoids cross-provider transcript incompatibilities
+            // (notably OpenAI Responses requiring reasoning replay for function_call items).
+            await client.request<Record<string, unknown>>("sessions.reset", {
+              key: sessionKey,
+            });
 
             // “Meaningful” direct prompt (no tools).
             const runId = randomUUID();