fix(agents): harden tool transcript repair

2026-01-10 21:45:10 +00:00
parent 805a29252e
commit 98377c7c6b
4 changed files with 73 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,8 @@
 - Telegram: serialize media-group processing to avoid missed albums under load.
 - Signal: handle `dataMessage.reaction` events (signal-cli SSE) to avoid broken attachment errors. (#637) — thanks @neist.
 - Docs: showcase entries for ParentPay, R2 Upload, iOS TestFlight, and Oura Health. (#650) — thanks @henrino3.
 - Agents: repair session transcripts by dropping duplicate tool results across the whole history (unblocks Anthropic-compatible APIs after retries).
 - Tests/Live: reset the gateway session between model runs to avoid cross-provider transcript incompatibilities (notably OpenAI Responses reasoning replay rules).
 ## 2026.1.9
 ### Highlights
--- a/src/agents/pi-embedded-helpers.test.ts
+++ b/src/agents/pi-embedded-helpers.test.ts
@@ -642,6 +642,40 @@ describe("sanitizeToolUseResultPairing", () => {
    const out = sanitizeToolUseResultPairing(input);
    expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1);
  });
  it("drops duplicate tool results for the same id across the transcript", () => {
    const input = [
      {
        role: "assistant",
        content: [
          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
        ],
      },
      {
        role: "toolResult",
        toolCallId: "call_1",
        toolName: "read",
        content: [{ type: "text", text: "first" }],
        isError: false,
      },
      { role: "assistant", content: [{ type: "text", text: "ok" }] },
      {
        role: "toolResult",
        toolCallId: "call_1",
        toolName: "read",
        content: [{ type: "text", text: "second (duplicate)" }],
        isError: false,
      },
    ] satisfies AgentMessage[];
    const out = sanitizeToolUseResultPairing(input);
    const results = out.filter((m) => m.role === "toolResult") as Array<{
      toolCallId?: string;
      content?: unknown;
    }>;
    expect(results).toHaveLength(1);
    expect(results[0]?.toolCallId).toBe("call_1");
  });
 });
 describe("normalizeTextForComparison", () => {
--- a/src/agents/pi-embedded-helpers.ts
+++ b/src/agents/pi-embedded-helpers.ts
@@ -286,8 +286,18 @@ export function sanitizeToolUseResultPairing(
  // displaced (e.g. after user turns) or duplicated. Repair by:
  // - moving matching toolResult messages directly after their assistant toolCall turn
  // - inserting synthetic error toolResults for missing ids
-  // - dropping duplicate toolResults for the same id within the span
+  // - dropping duplicate toolResults for the same id (anywhere in the transcript)
  const out: AgentMessage[] = [];
  const seenToolResultIds = new Set<string>();
  const pushToolResult = (
    msg: Extract<AgentMessage, { role: "toolResult" }>,
  ) => {
    const id = extractToolResultId(msg);
    if (id && seenToolResultIds.has(id)) return;
    if (id) seenToolResultIds.add(id);
    out.push(msg);
  };
  for (let i = 0; i < messages.length; i += 1) {
    const msg = messages[i] as AgentMessage;
@@ -298,7 +308,11 @@ export function sanitizeToolUseResultPairing(
    const role = (msg as { role?: unknown }).role;
    if (role !== "assistant") {
      if (role === "toolResult") {
        pushToolResult(msg as Extract<AgentMessage, { role: "toolResult" }>);
      } else {
        out.push(msg);
      }
      continue;
    }
@@ -335,6 +349,9 @@ export function sanitizeToolUseResultPairing(
        >;
        const id = extractToolResultId(toolResult);
        if (id && toolCallIds.has(id)) {
          if (seenToolResultIds.has(id)) {
            continue;
          }
          if (!spanResultsById.has(id)) {
            spanResultsById.set(id, toolResult);
          }
@@ -349,13 +366,24 @@ export function sanitizeToolUseResultPairing(
    for (const call of toolCalls) {
      const existing = spanResultsById.get(call.id);
-      out.push(
+      pushToolResult(
        existing ??
          makeMissingToolResult({ toolCallId: call.id, toolName: call.name }),
      );
    }
-    out.push(...remainder);
+    for (const rem of remainder) {
      if (!rem || typeof rem !== "object") {
        out.push(rem);
        continue;
      }
      const remRole = (rem as { role?: unknown }).role;
      if (remRole === "toolResult") {
        pushToolResult(rem as Extract<AgentMessage, { role: "toolResult" }>);
        continue;
      }
      out.push(rem);
    }
    i = j - 1;
  }
--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@@ -338,6 +338,11 @@ describeLive("gateway live (dev agent, profile keys)", () => {
              key: sessionKey,
              model: modelKey,
            });
            // Reset between models: avoids cross-provider transcript incompatibilities
            // (notably OpenAI Responses requiring reasoning replay for function_call items).
            await client.request<Record<string, unknown>>("sessions.reset", {
              key: sessionKey,
            });
            // “Meaningful” direct prompt (no tools).
            const runId = randomUUID();