fix(agents): harden tool transcript repair

2026-01-10 21:45:10 +00:00
parent 805a29252e
commit 98377c7c6b
4 changed files with 73 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,8 @@
 - Telegram: serialize media-group processing to avoid missed albums under load.
 - Signal: handle `dataMessage.reaction` events (signal-cli SSE) to avoid broken attachment errors. (#637) — thanks @neist.
 - Docs: showcase entries for ParentPay, R2 Upload, iOS TestFlight, and Oura Health. (#650) — thanks @henrino3.
+- Agents: repair session transcripts by dropping duplicate tool results across the whole history (unblocks Anthropic-compatible APIs after retries).
+- Tests/Live: reset the gateway session between model runs to avoid cross-provider transcript incompatibilities (notably OpenAI Responses reasoning replay rules).
 ## 2026.1.9

 ### Highlights
--- a/src/agents/pi-embedded-helpers.test.ts
+++ b/src/agents/pi-embedded-helpers.test.ts
@@ -642,6 +642,40 @@ describe("sanitizeToolUseResultPairing", () => {
    const out = sanitizeToolUseResultPairing(input);
    expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1);
  });
+
+  it("drops duplicate tool results for the same id across the transcript", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [
+          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_1",
+        toolName: "read",
+        content: [{ type: "text", text: "first" }],
+        isError: false,
+      },
+      { role: "assistant", content: [{ type: "text", text: "ok" }] },
+      {
+        role: "toolResult",
+        toolCallId: "call_1",
+        toolName: "read",
+        content: [{ type: "text", text: "second (duplicate)" }],
+        isError: false,
+      },
+    ] satisfies AgentMessage[];
+
+    const out = sanitizeToolUseResultPairing(input);
+    const results = out.filter((m) => m.role === "toolResult") as Array<{
+      toolCallId?: string;
+      content?: unknown;
+    }>;
+    expect(results).toHaveLength(1);
+    expect(results[0]?.toolCallId).toBe("call_1");
+  });
 });

 describe("normalizeTextForComparison", () => {
--- a/src/agents/pi-embedded-helpers.ts
+++ b/src/agents/pi-embedded-helpers.ts
@@ -286,8 +286,18 @@ export function sanitizeToolUseResultPairing(
  // displaced (e.g. after user turns) or duplicated. Repair by:
  // - moving matching toolResult messages directly after their assistant toolCall turn
  // - inserting synthetic error toolResults for missing ids
-  // - dropping duplicate toolResults for the same id within the span
+  // - dropping duplicate toolResults for the same id (anywhere in the transcript)
  const out: AgentMessage[] = [];
+  const seenToolResultIds = new Set<string>();
+
+  const pushToolResult = (
+    msg: Extract<AgentMessage, { role: "toolResult" }>,
+  ) => {
+    const id = extractToolResultId(msg);
+    if (id && seenToolResultIds.has(id)) return;
+    if (id) seenToolResultIds.add(id);
+    out.push(msg);
+  };

  for (let i = 0; i < messages.length; i += 1) {
    const msg = messages[i] as AgentMessage;
@@ -298,7 +308,11 @@ export function sanitizeToolUseResultPairing(

    const role = (msg as { role?: unknown }).role;
    if (role !== "assistant") {
-      out.push(msg);
+      if (role === "toolResult") {
+        pushToolResult(msg as Extract<AgentMessage, { role: "toolResult" }>);
+      } else {
+        out.push(msg);
+      }
      continue;
    }

@@ -335,6 +349,9 @@ export function sanitizeToolUseResultPairing(
        >;
        const id = extractToolResultId(toolResult);
        if (id && toolCallIds.has(id)) {
+          if (seenToolResultIds.has(id)) {
+            continue;
+          }
          if (!spanResultsById.has(id)) {
            spanResultsById.set(id, toolResult);
          }
@@ -349,13 +366,24 @@ export function sanitizeToolUseResultPairing(

    for (const call of toolCalls) {
      const existing = spanResultsById.get(call.id);
-      out.push(
+      pushToolResult(
        existing ??
          makeMissingToolResult({ toolCallId: call.id, toolName: call.name }),
      );
    }

-    out.push(...remainder);
+    for (const rem of remainder) {
+      if (!rem || typeof rem !== "object") {
+        out.push(rem);
+        continue;
+      }
+      const remRole = (rem as { role?: unknown }).role;
+      if (remRole === "toolResult") {
+        pushToolResult(rem as Extract<AgentMessage, { role: "toolResult" }>);
+        continue;
+      }
+      out.push(rem);
+    }
    i = j - 1;
  }

--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@@ -338,6 +338,11 @@ describeLive("gateway live (dev agent, profile keys)", () => {
              key: sessionKey,
              model: modelKey,
            });
+            // Reset between models: avoids cross-provider transcript incompatibilities
+            // (notably OpenAI Responses requiring reasoning replay for function_call items).
+            await client.request<Record<string, unknown>>("sessions.reset", {
+              key: sessionKey,
+            });

            // “Meaningful” direct prompt (no tools).
            const runId = randomUUID();