fix: repair tool-use history for anthropic

2026-01-10 19:15:52 +00:00
parent c409edd3fa
commit 212b13b099
4 changed files with 277 additions and 8 deletions
--- a/src/agents/models.profiles.live.test.ts
+++ b/src/agents/models.profiles.live.test.ts
@@ -96,7 +96,6 @@ describeLive("live models (profile keys)", () => {
                apiKey: apiKeyInfo.apiKey,
                reasoning: model.reasoning ? "low" : undefined,
                maxTokens: 128,
                temperature: 0,
              },
            );
@@ -136,7 +135,6 @@ describeLive("live models (profile keys)", () => {
                apiKey: apiKeyInfo.apiKey,
                reasoning: model.reasoning ? "low" : undefined,
                maxTokens: 64,
                temperature: 0,
              },
            );
@@ -163,7 +161,6 @@ describeLive("live models (profile keys)", () => {
              apiKey: apiKeyInfo.apiKey,
              reasoning: model.reasoning ? "low" : undefined,
              maxTokens: 64,
              temperature: 0,
            },
          );
--- a/src/agents/pi-embedded-helpers.test.ts
+++ b/src/agents/pi-embedded-helpers.test.ts
@@ -14,6 +14,7 @@ import {
  sanitizeGoogleTurnOrdering,
  sanitizeSessionMessagesImages,
  sanitizeToolCallId,
  sanitizeToolUseResultPairing,
  validateGeminiTurns,
 } from "./pi-embedded-helpers.js";
 import {
@@ -539,6 +540,108 @@ describe("sanitizeSessionMessagesImages", () => {
    expect(toolResult.role).toBe("toolResult");
    expect(toolResult.toolCallId).toBe("call_123_fc_456");
  });
  it("drops assistant blocks after a tool call when enforceToolCallLast is enabled", async () => {
    const input = [
      {
        role: "assistant",
        content: [
          { type: "text", text: "before" },
          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
          { type: "thinking", thinking: "after", thinkingSignature: "sig" },
          { type: "text", text: "after text" },
        ],
      },
    ] satisfies AgentMessage[];
    const out = await sanitizeSessionMessagesImages(input, "test", {
      enforceToolCallLast: true,
    });
    const assistant = out[0] as { content?: Array<{ type?: string }> };
    expect(assistant.content?.map((b) => b.type)).toEqual(["text", "toolCall"]);
  });
  it("keeps assistant blocks after a tool call when enforceToolCallLast is disabled", async () => {
    const input = [
      {
        role: "assistant",
        content: [
          { type: "text", text: "before" },
          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
          { type: "thinking", thinking: "after", thinkingSignature: "sig" },
          { type: "text", text: "after text" },
        ],
      },
    ] satisfies AgentMessage[];
    const out = await sanitizeSessionMessagesImages(input, "test");
    const assistant = out[0] as { content?: Array<{ type?: string }> };
    expect(assistant.content?.map((b) => b.type)).toEqual([
      "text",
      "toolCall",
      "thinking",
      "text",
    ]);
  });
 });
 describe("sanitizeToolUseResultPairing", () => {
  it("moves tool results directly after tool calls and inserts missing results", () => {
    const input = [
      {
        role: "assistant",
        content: [
          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
          { type: "toolCall", id: "call_2", name: "bash", arguments: {} },
        ],
      },
      { role: "user", content: "user message that should come after tool use" },
      {
        role: "toolResult",
        toolCallId: "call_2",
        toolName: "bash",
        content: [{ type: "text", text: "ok" }],
        isError: false,
      },
    ] satisfies AgentMessage[];
    const out = sanitizeToolUseResultPairing(input);
    expect(out[0]?.role).toBe("assistant");
    expect(out[1]?.role).toBe("toolResult");
    expect((out[1] as { toolCallId?: string }).toolCallId).toBe("call_1");
    expect(out[2]?.role).toBe("toolResult");
    expect((out[2] as { toolCallId?: string }).toolCallId).toBe("call_2");
    expect(out[3]?.role).toBe("user");
  });
  it("drops duplicate tool results for the same id within a span", () => {
    const input = [
      {
        role: "assistant",
        content: [
          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
        ],
      },
      {
        role: "toolResult",
        toolCallId: "call_1",
        toolName: "read",
        content: [{ type: "text", text: "first" }],
        isError: false,
      },
      {
        role: "toolResult",
        toolCallId: "call_1",
        toolName: "read",
        content: [{ type: "text", text: "second" }],
        isError: false,
      },
      { role: "user", content: "ok" },
    ] satisfies AgentMessage[];
    const out = sanitizeToolUseResultPairing(input);
    expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1);
  });
 });
 describe("normalizeTextForComparison", () => {
--- a/src/agents/pi-embedded-helpers.ts
+++ b/src/agents/pi-embedded-helpers.ts
@@ -85,7 +85,7 @@ function isEmptyAssistantErrorMessage(
 export async function sanitizeSessionMessagesImages(
  messages: AgentMessage[],
  label: string,
-  options?: { sanitizeToolCallIds?: boolean },
+  options?: { sanitizeToolCallIds?: boolean; enforceToolCallLast?: boolean },
 ): Promise<AgentMessage[]> {
  // We sanitize historical session messages because Anthropic can reject a request
  // if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX).
@@ -155,9 +155,29 @@ export async function sanitizeSessionMessagesImages(
          if (rec.type !== "text" || typeof rec.text !== "string") return true;
          return rec.text.trim().length > 0;
        });
        const normalizedContent = options?.enforceToolCallLast
          ? (() => {
              let lastToolIndex = -1;
              for (let i = filteredContent.length - 1; i >= 0; i -= 1) {
                const block = filteredContent[i];
                if (!block || typeof block !== "object") continue;
                const type = (block as { type?: unknown }).type;
                if (
                  type === "functionCall" ||
                  type === "toolUse" ||
                  type === "toolCall"
                ) {
                  lastToolIndex = i;
                  break;
                }
              }
              if (lastToolIndex === -1) return filteredContent;
              return filteredContent.slice(0, lastToolIndex + 1);
            })()
          : filteredContent;
        const sanitizedContent = options?.sanitizeToolCallIds
          ? await Promise.all(
-              filteredContent.map(async (block) => {
+              normalizedContent.map(async (block) => {
                if (!block || typeof block !== "object") return block;
                const type = (block as { type?: unknown }).type;
@@ -179,7 +199,7 @@ export async function sanitizeSessionMessagesImages(
                return block;
              }),
            )
-          : filteredContent;
+          : normalizedContent;
        const finalContent = (await sanitizeContentBlocksImages(
          sanitizedContent as unknown as ContentBlock[],
          label,
@@ -197,6 +217,150 @@ export async function sanitizeSessionMessagesImages(
  return out;
 }
 type ToolCallLike = {
  id: string;
  name?: string;
 };
 function extractToolCallsFromAssistant(
  msg: Extract<AgentMessage, { role: "assistant" }>,
 ): ToolCallLike[] {
  const content = msg.content;
  if (!Array.isArray(content)) return [];
  const toolCalls: ToolCallLike[] = [];
  for (const block of content) {
    if (!block || typeof block !== "object") continue;
    const rec = block as { type?: unknown; id?: unknown; name?: unknown };
    if (typeof rec.id !== "string" || !rec.id) continue;
    if (
      rec.type === "toolCall" ||
      rec.type === "toolUse" ||
      rec.type === "functionCall"
    ) {
      toolCalls.push({
        id: rec.id,
        name: typeof rec.name === "string" ? rec.name : undefined,
      });
    }
  }
  return toolCalls;
 }
 function extractToolResultId(
  msg: Extract<AgentMessage, { role: "toolResult" }>,
 ): string | null {
  const toolCallId = (msg as { toolCallId?: unknown }).toolCallId;
  if (typeof toolCallId === "string" && toolCallId) return toolCallId;
  const toolUseId = (msg as { toolUseId?: unknown }).toolUseId;
  if (typeof toolUseId === "string" && toolUseId) return toolUseId;
  return null;
 }
 function makeMissingToolResult(params: {
  toolCallId: string;
  toolName?: string;
 }): Extract<AgentMessage, { role: "toolResult" }> {
  return {
    role: "toolResult",
    toolCallId: params.toolCallId,
    toolName: params.toolName ?? "unknown",
    content: [
      {
        type: "text",
        text: "[clawdbot] missing tool result in session history; inserted synthetic error result for transcript repair.",
      },
    ],
    isError: true,
    timestamp: Date.now(),
  } as Extract<AgentMessage, { role: "toolResult" }>;
 }
 export function sanitizeToolUseResultPairing(
  messages: AgentMessage[],
 ): AgentMessage[] {
  // Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not
  // immediately followed by matching tool results. Session files can end up with results
  // displaced (e.g. after user turns) or duplicated. Repair by:
  // - moving matching toolResult messages directly after their assistant toolCall turn
  // - inserting synthetic error toolResults for missing ids
  // - dropping duplicate toolResults for the same id within the span
  const out: AgentMessage[] = [];
  for (let i = 0; i < messages.length; i += 1) {
    const msg = messages[i] as AgentMessage;
    if (!msg || typeof msg !== "object") {
      out.push(msg);
      continue;
    }
    const role = (msg as { role?: unknown }).role;
    if (role !== "assistant") {
      out.push(msg);
      continue;
    }
    const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
    const toolCalls = extractToolCallsFromAssistant(assistant);
    if (toolCalls.length === 0) {
      out.push(msg);
      continue;
    }
    const toolCallIds = new Set(toolCalls.map((t) => t.id));
    const spanResultsById = new Map<
      string,
      Extract<AgentMessage, { role: "toolResult" }>
    >();
    const remainder: AgentMessage[] = [];
    let j = i + 1;
    for (; j < messages.length; j += 1) {
      const next = messages[j] as AgentMessage;
      if (!next || typeof next !== "object") {
        remainder.push(next);
        continue;
      }
      const nextRole = (next as { role?: unknown }).role;
      if (nextRole === "assistant") break;
      if (nextRole === "toolResult") {
        const toolResult = next as Extract<
          AgentMessage,
          { role: "toolResult" }
        >;
        const id = extractToolResultId(toolResult);
        if (id && toolCallIds.has(id)) {
          if (!spanResultsById.has(id)) {
            spanResultsById.set(id, toolResult);
          }
          continue;
        }
      }
      remainder.push(next);
    }
    out.push(msg);
    for (const call of toolCalls) {
      const existing = spanResultsById.get(call.id);
      out.push(
        existing ??
          makeMissingToolResult({ toolCallId: call.id, toolName: call.name }),
      );
    }
    out.push(...remainder);
    i = j - 1;
  }
  return out;
 }
 const GOOGLE_TURN_ORDER_BOOTSTRAP_TEXT = "(session bootstrap)";
 export function isGoogleModelApi(api?: string | null): boolean {
--- a/src/agents/pi-embedded-runner.ts
+++ b/src/agents/pi-embedded-runner.ts
@@ -88,6 +88,7 @@ import {
  pickFallbackThinkingLevel,
  sanitizeGoogleTurnOrdering,
  sanitizeSessionMessagesImages,
  sanitizeToolUseResultPairing,
  validateGeminiTurns,
 } from "./pi-embedded-helpers.js";
 import {
@@ -379,10 +380,14 @@ async function sanitizeSessionHistory(params: {
  const sanitizedImages = await sanitizeSessionMessagesImages(
    params.messages,
    "session:history",
-    { sanitizeToolCallIds: isGoogleModelApi(params.modelApi) },
+    {
      sanitizeToolCallIds: isGoogleModelApi(params.modelApi),
      enforceToolCallLast: params.modelApi === "anthropic-messages",
    },
  );
  const repairedTools = sanitizeToolUseResultPairing(sanitizedImages);
  return applyGoogleTurnOrderingFix({
-    messages: sanitizedImages,
+    messages: repairedTools,
    modelApi: params.modelApi,
    sessionManager: params.sessionManager,
    sessionId: params.sessionId,