diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index d1e28b7fe..755d6290f 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -12,6 +12,11 @@ import { } from "../auto-reply/thinking.js"; import type { ClawdbotConfig } from "../config/config.js"; import { formatSandboxToolPolicyBlockedMessage } from "./sandbox.js"; +import { + isValidCloudCodeAssistToolId, + sanitizeToolCallId, + sanitizeToolCallIdsForCloudCodeAssist, +} from "./tool-call-id.js"; import { sanitizeContentBlocksImages } from "./tool-images.js"; import type { WorkspaceBootstrapFile } from "./workspace.js"; @@ -90,8 +95,11 @@ export async function sanitizeSessionMessagesImages( ): Promise { // We sanitize historical session messages because Anthropic can reject a request // if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX). + const sanitizedIds = options?.sanitizeToolCallIds + ? sanitizeToolCallIdsForCloudCodeAssist(messages) + : messages; const out: AgentMessage[] = []; - for (const msg of messages) { + for (const msg of sanitizedIds) { if (!msg || typeof msg !== "object") { out.push(msg); continue; @@ -105,28 +113,7 @@ export async function sanitizeSessionMessagesImages( content as ContentBlock[], label, )) as unknown as typeof toolMsg.content; - const sanitizedToolCallId = - options?.sanitizeToolCallIds && toolMsg.toolCallId - ? sanitizeToolCallId(toolMsg.toolCallId) - : undefined; - const toolUseId = (toolMsg as { toolUseId?: unknown }).toolUseId; - const sanitizedToolUseId = - options?.sanitizeToolCallIds && - typeof toolUseId === "string" && - toolUseId - ? sanitizeToolCallId(toolUseId) - : undefined; - const sanitizedMsg = { - ...toolMsg, - content: nextContent, - ...(sanitizedToolCallId && { - toolCallId: sanitizedToolCallId, - }), - ...(sanitizedToolUseId && { - toolUseId: sanitizedToolUseId, - }), - }; - out.push(sanitizedMsg); + out.push({ ...toolMsg, content: nextContent }); continue; } @@ -176,33 +163,8 @@ export async function sanitizeSessionMessagesImages( return filteredContent.slice(0, lastToolIndex + 1); })() : filteredContent; - const sanitizedContent = options?.sanitizeToolCallIds - ? await Promise.all( - normalizedContent.map(async (block) => { - if (!block || typeof block !== "object") return block; - - const type = (block as { type?: unknown }).type; - const id = (block as { id?: unknown }).id; - if (typeof id !== "string" || !id) return block; - - // Cloud Code Assist tool blocks require ids matching ^[a-zA-Z0-9_-]+$. - if ( - type === "functionCall" || - type === "toolUse" || - type === "toolCall" - ) { - return { - ...(block as unknown as Record), - id: sanitizeToolCallId(id), - }; - } - - return block; - }), - ) - : normalizedContent; const finalContent = (await sanitizeContentBlocksImages( - sanitizedContent as unknown as ContentBlock[], + normalizedContent as unknown as ContentBlock[], label, )) as unknown as typeof assistantMsg.content; if (finalContent.length === 0) { @@ -621,25 +583,7 @@ export function isMessagingToolDuplicateNormalized( // OpenAI Codex generates IDs like "call_abc123|item_456" with pipe characters, // but Google requires IDs matching ^[a-zA-Z0-9_-]+$ pattern. // This function sanitizes tool call IDs by replacing invalid characters with underscores. - -export function sanitizeToolCallId(id: string): string { - if (!id || typeof id !== "string") return "default_tool_id"; - - const cloudCodeAssistPatternReplacement = id.replace(/[^a-zA-Z0-9_-]/g, "_"); - const trimmedInvalidStartChars = cloudCodeAssistPatternReplacement.replace( - /^[^a-zA-Z0-9_-]+/, - "", - ); - - return trimmedInvalidStartChars.length > 0 - ? trimmedInvalidStartChars - : "sanitized_tool_id"; -} - -export function isValidCloudCodeAssistToolId(id: string): boolean { - if (!id || typeof id !== "string") return false; - return /^[a-zA-Z0-9_-]+$/.test(id); -} +export { sanitizeToolCallId, isValidCloudCodeAssistToolId }; export function isMessagingToolDuplicate( text: string, diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 69b788f8d..67a63ca9c 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -236,6 +236,21 @@ function buildContextPruningExtension(params: { }; } +function buildEmbeddedExtensionPaths(params: { + cfg: ClawdbotConfig | undefined; + sessionManager: SessionManager; + provider: string; + modelId: string; + model: Model | undefined; +}): string[] { + const paths = [resolvePiExtensionPath("transcript-sanitize")]; + const pruning = buildContextPruningExtension(params); + if (pruning.additionalExtensionPaths) { + paths.push(...pruning.additionalExtensionPaths); + } + return paths; +} + export type EmbeddedPiAgentMeta = { sessionId: string; provider: string; @@ -966,14 +981,13 @@ export async function compactEmbeddedPiSession(params: { effectiveWorkspace, agentDir, ); - const pruning = buildContextPruningExtension({ + const additionalExtensionPaths = buildEmbeddedExtensionPaths({ cfg: params.config, sessionManager, provider, modelId, model, }); - const additionalExtensionPaths = pruning.additionalExtensionPaths; const { builtInTools, customTools } = splitSdkTools({ tools, @@ -1355,14 +1369,13 @@ export async function runEmbeddedPiAgent(params: { effectiveWorkspace, agentDir, ); - const pruning = buildContextPruningExtension({ + const additionalExtensionPaths = buildEmbeddedExtensionPaths({ cfg: params.config, sessionManager, provider, modelId, model, }); - const additionalExtensionPaths = pruning.additionalExtensionPaths; const { builtInTools, customTools } = splitSdkTools({ tools, diff --git a/src/agents/pi-extensions/transcript-sanitize.ts b/src/agents/pi-extensions/transcript-sanitize.ts new file mode 100644 index 000000000..fd819bfe7 --- /dev/null +++ b/src/agents/pi-extensions/transcript-sanitize.ts @@ -0,0 +1,35 @@ +/** + * Transcript repair/sanitization extension. + * + * Runs on every context build to prevent strict provider request rejections: + * - duplicate or displaced tool results (Anthropic-compatible APIs, MiniMax, Cloud Code Assist) + * - Cloud Code Assist tool call ID constraints + collision-safe sanitization + */ + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { + ContextEvent, + ExtensionAPI, + ExtensionContext, +} from "@mariozechner/pi-coding-agent"; + +import { isGoogleModelApi } from "../pi-embedded-helpers.js"; +import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js"; +import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js"; + +export default function transcriptSanitizeExtension(api: ExtensionAPI): void { + api.on("context", (event: ContextEvent, ctx: ExtensionContext) => { + let next = event.messages as AgentMessage[]; + + const repairedTools = sanitizeToolUseResultPairing(next); + if (repairedTools !== next) next = repairedTools; + + if (isGoogleModelApi(ctx.model?.api)) { + const repairedIds = sanitizeToolCallIdsForCloudCodeAssist(next); + if (repairedIds !== next) next = repairedIds; + } + + if (next === event.messages) return undefined; + return { messages: next }; + }); +} diff --git a/src/agents/tool-call-id.test.ts b/src/agents/tool-call-id.test.ts new file mode 100644 index 000000000..9215652e8 --- /dev/null +++ b/src/agents/tool-call-id.test.ts @@ -0,0 +1,70 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { describe, expect, it } from "vitest"; + +import { + isValidCloudCodeAssistToolId, + sanitizeToolCallIdsForCloudCodeAssist, +} from "./tool-call-id.js"; + +describe("sanitizeToolCallIdsForCloudCodeAssist", () => { + it("is a no-op for already-valid non-colliding IDs", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "ok" }], + }, + ] satisfies AgentMessage[]; + + const out = sanitizeToolCallIdsForCloudCodeAssist(input); + expect(out).toBe(input); + }); + + it("avoids collisions when sanitization would produce duplicate IDs", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_a|b", name: "read", arguments: {} }, + { type: "toolCall", id: "call_a:b", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_a|b", + toolName: "read", + content: [{ type: "text", text: "one" }], + }, + { + role: "toolResult", + toolCallId: "call_a:b", + toolName: "read", + content: [{ type: "text", text: "two" }], + }, + ] satisfies AgentMessage[]; + + const out = sanitizeToolCallIdsForCloudCodeAssist(input); + expect(out).not.toBe(input); + + const assistant = out[0] as Extract; + const a = assistant.content?.[0] as { id?: string }; + const b = assistant.content?.[1] as { id?: string }; + expect(typeof a.id).toBe("string"); + expect(typeof b.id).toBe("string"); + expect(a.id).not.toBe(b.id); + expect(isValidCloudCodeAssistToolId(a.id as string)).toBe(true); + expect(isValidCloudCodeAssistToolId(b.id as string)).toBe(true); + + const r1 = out[1] as Extract; + const r2 = out[2] as Extract; + expect(r1.toolCallId).toBe(a.id); + expect(r2.toolCallId).toBe(b.id); + }); +}); diff --git a/src/agents/tool-call-id.ts b/src/agents/tool-call-id.ts new file mode 100644 index 000000000..e20863f68 --- /dev/null +++ b/src/agents/tool-call-id.ts @@ -0,0 +1,145 @@ +import { createHash } from "node:crypto"; + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; + +export function sanitizeToolCallId(id: string): string { + if (!id || typeof id !== "string") return "default_tool_id"; + + const cloudCodeAssistPatternReplacement = id.replace(/[^a-zA-Z0-9_-]/g, "_"); + const trimmedInvalidStartChars = cloudCodeAssistPatternReplacement.replace( + /^[^a-zA-Z0-9_-]+/, + "", + ); + + return trimmedInvalidStartChars.length > 0 + ? trimmedInvalidStartChars + : "sanitized_tool_id"; +} + +export function isValidCloudCodeAssistToolId(id: string): boolean { + if (!id || typeof id !== "string") return false; + return /^[a-zA-Z0-9_-]+$/.test(id); +} + +function shortHash(text: string): string { + return createHash("sha1").update(text).digest("hex").slice(0, 8); +} + +function makeUniqueToolId(params: { id: string; used: Set }): string { + const base = sanitizeToolCallId(params.id); + if (!params.used.has(base)) return base; + + const hash = shortHash(params.id); + const maxBaseLen = 64 - 1 - hash.length; + const clippedBase = + base.length > maxBaseLen ? base.slice(0, maxBaseLen) : base; + const candidate = `${clippedBase}_${hash}`; + if (!params.used.has(candidate)) return candidate; + + for (let i = 2; i < 1000; i += 1) { + const next = `${candidate}_${i}`; + if (!params.used.has(next)) return next; + } + + return `${candidate}_${Date.now()}`; +} + +function rewriteAssistantToolCallIds(params: { + message: Extract; + resolve: (id: string) => string; +}): Extract { + const content = params.message.content; + if (!Array.isArray(content)) return params.message; + + let changed = false; + const next = content.map((block) => { + if (!block || typeof block !== "object") return block; + const rec = block as { type?: unknown; id?: unknown }; + const type = rec.type; + const id = rec.id; + if ( + (type !== "functionCall" && type !== "toolUse" && type !== "toolCall") || + typeof id !== "string" || + !id + ) { + return block; + } + const nextId = params.resolve(id); + if (nextId === id) return block; + changed = true; + return { ...(block as unknown as Record), id: nextId }; + }); + + if (!changed) return params.message; + return { ...params.message, content: next as typeof params.message.content }; +} + +function rewriteToolResultIds(params: { + message: Extract; + resolve: (id: string) => string; +}): Extract { + const toolCallId = + typeof params.message.toolCallId === "string" && params.message.toolCallId + ? params.message.toolCallId + : undefined; + const toolUseId = (params.message as { toolUseId?: unknown }).toolUseId; + const toolUseIdStr = + typeof toolUseId === "string" && toolUseId ? toolUseId : undefined; + + const nextToolCallId = toolCallId ? params.resolve(toolCallId) : undefined; + const nextToolUseId = toolUseIdStr ? params.resolve(toolUseIdStr) : undefined; + + if (nextToolCallId === toolCallId && nextToolUseId === toolUseIdStr) { + return params.message; + } + + return { + ...params.message, + ...(nextToolCallId && { toolCallId: nextToolCallId }), + ...(nextToolUseId && { toolUseId: nextToolUseId }), + } as Extract; +} + +export function sanitizeToolCallIdsForCloudCodeAssist( + messages: AgentMessage[], +): AgentMessage[] { + // Cloud Code Assist requires tool IDs matching ^[a-zA-Z0-9_-]+$. + // Sanitization can introduce collisions (e.g. `a|b` and `a:b` -> `a_b`). + // Fix by applying a stable, transcript-wide mapping and de-duping via suffix. + const map = new Map(); + const used = new Set(); + + const resolve = (id: string) => { + const existing = map.get(id); + if (existing) return existing; + const next = makeUniqueToolId({ id, used }); + map.set(id, next); + used.add(next); + return next; + }; + + let changed = false; + const out = messages.map((msg) => { + if (!msg || typeof msg !== "object") return msg; + const role = (msg as { role?: unknown }).role; + if (role === "assistant") { + const next = rewriteAssistantToolCallIds({ + message: msg as Extract, + resolve, + }); + if (next !== msg) changed = true; + return next; + } + if (role === "toolResult") { + const next = rewriteToolResultIds({ + message: msg as Extract, + resolve, + }); + if (next !== msg) changed = true; + return next; + } + return msg; + }); + + return changed ? out : messages; +}