diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts index d75c34bc9..9e5ec9668 100644 --- a/src/agents/models.profiles.live.test.ts +++ b/src/agents/models.profiles.live.test.ts @@ -96,7 +96,6 @@ describeLive("live models (profile keys)", () => { apiKey: apiKeyInfo.apiKey, reasoning: model.reasoning ? "low" : undefined, maxTokens: 128, - temperature: 0, }, ); @@ -136,7 +135,6 @@ describeLive("live models (profile keys)", () => { apiKey: apiKeyInfo.apiKey, reasoning: model.reasoning ? "low" : undefined, maxTokens: 64, - temperature: 0, }, ); @@ -163,7 +161,6 @@ describeLive("live models (profile keys)", () => { apiKey: apiKeyInfo.apiKey, reasoning: model.reasoning ? "low" : undefined, maxTokens: 64, - temperature: 0, }, ); diff --git a/src/agents/pi-embedded-helpers.test.ts b/src/agents/pi-embedded-helpers.test.ts index 42afca9e6..6b6274262 100644 --- a/src/agents/pi-embedded-helpers.test.ts +++ b/src/agents/pi-embedded-helpers.test.ts @@ -14,6 +14,7 @@ import { sanitizeGoogleTurnOrdering, sanitizeSessionMessagesImages, sanitizeToolCallId, + sanitizeToolUseResultPairing, validateGeminiTurns, } from "./pi-embedded-helpers.js"; import { @@ -539,6 +540,108 @@ describe("sanitizeSessionMessagesImages", () => { expect(toolResult.role).toBe("toolResult"); expect(toolResult.toolCallId).toBe("call_123_fc_456"); }); + + it("drops assistant blocks after a tool call when enforceToolCallLast is enabled", async () => { + const input = [ + { + role: "assistant", + content: [ + { type: "text", text: "before" }, + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + { type: "thinking", thinking: "after", thinkingSignature: "sig" }, + { type: "text", text: "after text" }, + ], + }, + ] satisfies AgentMessage[]; + + const out = await sanitizeSessionMessagesImages(input, "test", { + enforceToolCallLast: true, + }); + const assistant = out[0] as { content?: Array<{ type?: string }> }; + expect(assistant.content?.map((b) => b.type)).toEqual(["text", "toolCall"]); + }); + + it("keeps assistant blocks after a tool call when enforceToolCallLast is disabled", async () => { + const input = [ + { + role: "assistant", + content: [ + { type: "text", text: "before" }, + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + { type: "thinking", thinking: "after", thinkingSignature: "sig" }, + { type: "text", text: "after text" }, + ], + }, + ] satisfies AgentMessage[]; + + const out = await sanitizeSessionMessagesImages(input, "test"); + const assistant = out[0] as { content?: Array<{ type?: string }> }; + expect(assistant.content?.map((b) => b.type)).toEqual([ + "text", + "toolCall", + "thinking", + "text", + ]); + }); +}); + +describe("sanitizeToolUseResultPairing", () => { + it("moves tool results directly after tool calls and inserts missing results", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + { type: "toolCall", id: "call_2", name: "bash", arguments: {} }, + ], + }, + { role: "user", content: "user message that should come after tool use" }, + { + role: "toolResult", + toolCallId: "call_2", + toolName: "bash", + content: [{ type: "text", text: "ok" }], + isError: false, + }, + ] satisfies AgentMessage[]; + + const out = sanitizeToolUseResultPairing(input); + expect(out[0]?.role).toBe("assistant"); + expect(out[1]?.role).toBe("toolResult"); + expect((out[1] as { toolCallId?: string }).toolCallId).toBe("call_1"); + expect(out[2]?.role).toBe("toolResult"); + expect((out[2] as { toolCallId?: string }).toolCallId).toBe("call_2"); + expect(out[3]?.role).toBe("user"); + }); + + it("drops duplicate tool results for the same id within a span", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "first" }], + isError: false, + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "second" }], + isError: false, + }, + { role: "user", content: "ok" }, + ] satisfies AgentMessage[]; + + const out = sanitizeToolUseResultPairing(input); + expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1); + }); }); describe("normalizeTextForComparison", () => { diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 850e3cac1..a9e7710d5 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -85,7 +85,7 @@ function isEmptyAssistantErrorMessage( export async function sanitizeSessionMessagesImages( messages: AgentMessage[], label: string, - options?: { sanitizeToolCallIds?: boolean }, + options?: { sanitizeToolCallIds?: boolean; enforceToolCallLast?: boolean }, ): Promise { // We sanitize historical session messages because Anthropic can reject a request // if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX). @@ -155,9 +155,29 @@ export async function sanitizeSessionMessagesImages( if (rec.type !== "text" || typeof rec.text !== "string") return true; return rec.text.trim().length > 0; }); + const normalizedContent = options?.enforceToolCallLast + ? (() => { + let lastToolIndex = -1; + for (let i = filteredContent.length - 1; i >= 0; i -= 1) { + const block = filteredContent[i]; + if (!block || typeof block !== "object") continue; + const type = (block as { type?: unknown }).type; + if ( + type === "functionCall" || + type === "toolUse" || + type === "toolCall" + ) { + lastToolIndex = i; + break; + } + } + if (lastToolIndex === -1) return filteredContent; + return filteredContent.slice(0, lastToolIndex + 1); + })() + : filteredContent; const sanitizedContent = options?.sanitizeToolCallIds ? await Promise.all( - filteredContent.map(async (block) => { + normalizedContent.map(async (block) => { if (!block || typeof block !== "object") return block; const type = (block as { type?: unknown }).type; @@ -179,7 +199,7 @@ export async function sanitizeSessionMessagesImages( return block; }), ) - : filteredContent; + : normalizedContent; const finalContent = (await sanitizeContentBlocksImages( sanitizedContent as unknown as ContentBlock[], label, @@ -197,6 +217,150 @@ export async function sanitizeSessionMessagesImages( return out; } +type ToolCallLike = { + id: string; + name?: string; +}; + +function extractToolCallsFromAssistant( + msg: Extract, +): ToolCallLike[] { + const content = msg.content; + if (!Array.isArray(content)) return []; + + const toolCalls: ToolCallLike[] = []; + for (const block of content) { + if (!block || typeof block !== "object") continue; + const rec = block as { type?: unknown; id?: unknown; name?: unknown }; + if (typeof rec.id !== "string" || !rec.id) continue; + + if ( + rec.type === "toolCall" || + rec.type === "toolUse" || + rec.type === "functionCall" + ) { + toolCalls.push({ + id: rec.id, + name: typeof rec.name === "string" ? rec.name : undefined, + }); + } + } + return toolCalls; +} + +function extractToolResultId( + msg: Extract, +): string | null { + const toolCallId = (msg as { toolCallId?: unknown }).toolCallId; + if (typeof toolCallId === "string" && toolCallId) return toolCallId; + const toolUseId = (msg as { toolUseId?: unknown }).toolUseId; + if (typeof toolUseId === "string" && toolUseId) return toolUseId; + return null; +} + +function makeMissingToolResult(params: { + toolCallId: string; + toolName?: string; +}): Extract { + return { + role: "toolResult", + toolCallId: params.toolCallId, + toolName: params.toolName ?? "unknown", + content: [ + { + type: "text", + text: "[clawdbot] missing tool result in session history; inserted synthetic error result for transcript repair.", + }, + ], + isError: true, + timestamp: Date.now(), + } as Extract; +} + +export function sanitizeToolUseResultPairing( + messages: AgentMessage[], +): AgentMessage[] { + // Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not + // immediately followed by matching tool results. Session files can end up with results + // displaced (e.g. after user turns) or duplicated. Repair by: + // - moving matching toolResult messages directly after their assistant toolCall turn + // - inserting synthetic error toolResults for missing ids + // - dropping duplicate toolResults for the same id within the span + const out: AgentMessage[] = []; + + for (let i = 0; i < messages.length; i += 1) { + const msg = messages[i] as AgentMessage; + if (!msg || typeof msg !== "object") { + out.push(msg); + continue; + } + + const role = (msg as { role?: unknown }).role; + if (role !== "assistant") { + out.push(msg); + continue; + } + + const assistant = msg as Extract; + const toolCalls = extractToolCallsFromAssistant(assistant); + if (toolCalls.length === 0) { + out.push(msg); + continue; + } + + const toolCallIds = new Set(toolCalls.map((t) => t.id)); + + const spanResultsById = new Map< + string, + Extract + >(); + const remainder: AgentMessage[] = []; + + let j = i + 1; + for (; j < messages.length; j += 1) { + const next = messages[j] as AgentMessage; + if (!next || typeof next !== "object") { + remainder.push(next); + continue; + } + + const nextRole = (next as { role?: unknown }).role; + if (nextRole === "assistant") break; + + if (nextRole === "toolResult") { + const toolResult = next as Extract< + AgentMessage, + { role: "toolResult" } + >; + const id = extractToolResultId(toolResult); + if (id && toolCallIds.has(id)) { + if (!spanResultsById.has(id)) { + spanResultsById.set(id, toolResult); + } + continue; + } + } + + remainder.push(next); + } + + out.push(msg); + + for (const call of toolCalls) { + const existing = spanResultsById.get(call.id); + out.push( + existing ?? + makeMissingToolResult({ toolCallId: call.id, toolName: call.name }), + ); + } + + out.push(...remainder); + i = j - 1; + } + + return out; +} + const GOOGLE_TURN_ORDER_BOOTSTRAP_TEXT = "(session bootstrap)"; export function isGoogleModelApi(api?: string | null): boolean { diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 55c9219b2..6c38b4eba 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -88,6 +88,7 @@ import { pickFallbackThinkingLevel, sanitizeGoogleTurnOrdering, sanitizeSessionMessagesImages, + sanitizeToolUseResultPairing, validateGeminiTurns, } from "./pi-embedded-helpers.js"; import { @@ -379,10 +380,14 @@ async function sanitizeSessionHistory(params: { const sanitizedImages = await sanitizeSessionMessagesImages( params.messages, "session:history", - { sanitizeToolCallIds: isGoogleModelApi(params.modelApi) }, + { + sanitizeToolCallIds: isGoogleModelApi(params.modelApi), + enforceToolCallLast: params.modelApi === "anthropic-messages", + }, ); + const repairedTools = sanitizeToolUseResultPairing(sanitizedImages); return applyGoogleTurnOrderingFix({ - messages: sanitizedImages, + messages: repairedTools, modelApi: params.modelApi, sessionManager: params.sessionManager, sessionId: params.sessionId,