From db0235a26a2eae4d20af94137d0a28bb6abd558c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 23 Jan 2026 00:28:41 +0000 Subject: [PATCH] fix: gate transcript sanitization by provider --- CHANGELOG.md | 2 +- ...ool-call-tool-result-ids-unchanged.test.ts | 43 ------- ...ssistant-text-blocks-but-preserves.test.ts | 34 ----- ...mbedded-helpers.sanitizetoolcallid.test.ts | 14 +-- src/agents/pi-embedded-helpers/images.ts | 21 +++- ...ed-runner.sanitize-session-history.test.ts | 38 ++++-- src/agents/pi-embedded-runner/compact.ts | 17 ++- src/agents/pi-embedded-runner/google.ts | 70 ++++------- src/agents/pi-embedded-runner/run/attempt.ts | 18 ++- .../pi-extensions/transcript-sanitize.ts | 21 +++- .../session-tool-result-guard-wrapper.ts | 9 +- src/agents/session-tool-result-guard.ts | 51 +++++--- src/agents/tool-call-id.test.ts | 22 ++-- src/agents/tool-call-id.ts | 42 ++----- src/agents/transcript-policy.ts | 117 ++++++++++++++++++ 15 files changed, 307 insertions(+), 212 deletions(-) create mode 100644 src/agents/transcript-policy.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 18ef0efa6..6240b42b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ Docs: https://docs.clawd.bot - Control UI: resolve local avatar URLs with basePath across injection + identity RPC. (#1457) Thanks @dlauer. - Agents: surface concrete API error details instead of generic AI service errors. - Exec approvals: allow per-segment allowlists for chained shell commands on gateway + node hosts. (#1458) Thanks @czekaj. -- Agents: avoid sanitizing tool call IDs for OpenAI responses to preserve Pi pairing. +- Agents: make OpenAI sessions image-sanitize-only; gate tool-id/repair sanitization by provider. - Docs: fix gog auth services example to include docs scope. (#1454) Thanks @zerone0x. - macOS: prefer linked channels in gateway summary to avoid false “not linked” status. diff --git a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts index 377bc53fe..29bb6244c 100644 --- a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts +++ b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts @@ -43,49 +43,6 @@ describe("sanitizeSessionMessagesImages", () => { expect(toolResult.toolCallId).toBe("call_123|fc_456"); }); - it("sanitizes tool call + tool result IDs in standard mode (preserves underscores)", async () => { - const input = [ - { - role: "assistant", - content: [ - { - type: "toolCall", - id: "call_123|fc_456", - name: "read", - arguments: { path: "package.json" }, - }, - ], - }, - { - role: "toolResult", - toolCallId: "call_123|fc_456", - toolName: "read", - content: [{ type: "text", text: "ok" }], - isError: false, - }, - ] satisfies AgentMessage[]; - - const out = await sanitizeSessionMessagesImages(input, "test", { - sanitizeToolCallIds: true, - }); - - const assistant = out[0] as unknown as { role?: string; content?: unknown }; - expect(assistant.role).toBe("assistant"); - expect(Array.isArray(assistant.content)).toBe(true); - const toolCall = (assistant.content as Array<{ type?: string; id?: string }>).find( - (b) => b.type === "toolCall", - ); - // Standard mode preserves underscores for readability, replaces invalid chars - expect(toolCall?.id).toBe("call_123_fc_456"); - - const toolResult = out[1] as unknown as { - role?: string; - toolCallId?: string; - }; - expect(toolResult.role).toBe("toolResult"); - expect(toolResult.toolCallId).toBe("call_123_fc_456"); - }); - it("sanitizes tool call + tool result IDs in strict mode (alphanumeric only)", async () => { const input = [ { diff --git a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts index 6c12dfacf..25e6e94e9 100644 --- a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts +++ b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts @@ -23,40 +23,6 @@ describe("sanitizeSessionMessagesImages", () => { expect((content as Array<{ type?: string }>)[0]?.type).toBe("toolCall"); }); - it("sanitizes tool ids in standard mode (preserves underscores)", async () => { - const input = [ - { - role: "assistant", - content: [ - { type: "toolUse", id: "call_abc|item:123", name: "test", input: {} }, - { - type: "toolCall", - id: "call_abc|item:456", - name: "exec", - arguments: {}, - }, - ], - }, - { - role: "toolResult", - toolUseId: "call_abc|item:123", - content: [{ type: "text", text: "ok" }], - }, - ] satisfies AgentMessage[]; - - const out = await sanitizeSessionMessagesImages(input, "test", { - sanitizeToolCallIds: true, - }); - - // Standard mode preserves underscores for readability - const assistant = out[0] as { content?: Array<{ id?: string }> }; - expect(assistant.content?.[0]?.id).toBe("call_abc_item_123"); - expect(assistant.content?.[1]?.id).toBe("call_abc_item_456"); - - const toolResult = out[1] as { toolUseId?: string }; - expect(toolResult.toolUseId).toBe("call_abc_item_123"); - }); - it("sanitizes tool ids in strict mode (alphanumeric only)", async () => { const input = [ { diff --git a/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts b/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts index 9569afbf3..71256a71d 100644 --- a/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts +++ b/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts @@ -2,19 +2,19 @@ import { describe, expect, it } from "vitest"; import { sanitizeToolCallId } from "./pi-embedded-helpers.js"; describe("sanitizeToolCallId", () => { - describe("standard mode (default)", () => { + describe("strict mode (default)", () => { it("keeps valid alphanumeric tool call IDs", () => { expect(sanitizeToolCallId("callabc123")).toBe("callabc123"); }); - it("keeps underscores and hyphens for readability", () => { - expect(sanitizeToolCallId("call_abc-123")).toBe("call_abc-123"); - expect(sanitizeToolCallId("call_abc_def")).toBe("call_abc_def"); + it("strips underscores and hyphens", () => { + expect(sanitizeToolCallId("call_abc-123")).toBe("callabc123"); + expect(sanitizeToolCallId("call_abc_def")).toBe("callabcdef"); }); - it("replaces invalid characters with underscores", () => { - expect(sanitizeToolCallId("call_abc|item:456")).toBe("call_abc_item_456"); + it("strips invalid characters", () => { + expect(sanitizeToolCallId("call_abc|item:456")).toBe("callabcitem456"); }); it("returns default for empty IDs", () => { - expect(sanitizeToolCallId("")).toBe("default_tool_id"); + expect(sanitizeToolCallId("")).toBe("defaulttoolid"); }); }); diff --git a/src/agents/pi-embedded-helpers/images.ts b/src/agents/pi-embedded-helpers/images.ts index 13376e68c..6711621d6 100644 --- a/src/agents/pi-embedded-helpers/images.ts +++ b/src/agents/pi-embedded-helpers/images.ts @@ -32,10 +32,10 @@ export async function sanitizeSessionMessagesImages( messages: AgentMessage[], label: string, options?: { + sanitizeMode?: "full" | "images-only"; sanitizeToolCallIds?: boolean; /** * Mode for tool call ID sanitization: - * - "standard" (default, preserves _-) * - "strict" (alphanumeric only) * - "strict9" (alphanumeric only, length 9) */ @@ -48,11 +48,14 @@ export async function sanitizeSessionMessagesImages( }; }, ): Promise { + const sanitizeMode = options?.sanitizeMode ?? "full"; + const allowNonImageSanitization = sanitizeMode === "full"; // We sanitize historical session messages because Anthropic can reject a request // if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX). - const sanitizedIds = options?.sanitizeToolCallIds - ? sanitizeToolCallIdsForCloudCodeAssist(messages, options.toolCallIdMode) - : messages; + const sanitizedIds = + allowNonImageSanitization && options?.sanitizeToolCallIds + ? sanitizeToolCallIdsForCloudCodeAssist(messages, options.toolCallIdMode) + : messages; const out: AgentMessage[] = []; for (const msg of sanitizedIds) { if (!msg || typeof msg !== "object") { @@ -87,11 +90,19 @@ export async function sanitizeSessionMessagesImages( if (role === "assistant") { const assistantMsg = msg as Extract; - if (isEmptyAssistantErrorMessage(assistantMsg)) { + if (allowNonImageSanitization && isEmptyAssistantErrorMessage(assistantMsg)) { continue; } const content = assistantMsg.content; if (Array.isArray(content)) { + if (!allowNonImageSanitization) { + const nextContent = (await sanitizeContentBlocksImages( + content as unknown as ContentBlock[], + label, + )) as unknown as typeof assistantMsg.content; + out.push({ ...assistantMsg, content: nextContent }); + continue; + } const strippedContent = options?.preserveSignatures ? content // Keep signatures for Antigravity Claude : stripThoughtSignatures(content, options?.sanitizeThoughtSignatures); // Strip for Gemini diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index 7d812985b..26bc4ef96 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -40,17 +40,16 @@ describe("sanitizeSessionHistory", () => { await sanitizeSessionHistory({ messages: mockMessages, - modelApi: "google-gemini", + modelApi: "google-generative-ai", provider: "google-vertex", sessionManager: mockSessionManager, sessionId: "test-session", }); - expect(helpers.isGoogleModelApi).toHaveBeenCalledWith("google-gemini"); expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith( mockMessages, "session:history", - expect.objectContaining({ sanitizeToolCallIds: true }), + expect.objectContaining({ sanitizeMode: "full", sanitizeToolCallIds: true }), ); }); @@ -69,7 +68,11 @@ describe("sanitizeSessionHistory", () => { expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith( mockMessages, "session:history", - expect.objectContaining({ sanitizeToolCallIds: true, toolCallIdMode: "strict9" }), + expect.objectContaining({ + sanitizeMode: "full", + sanitizeToolCallIds: true, + toolCallIdMode: "strict9", + }), ); }); @@ -84,11 +87,10 @@ describe("sanitizeSessionHistory", () => { sessionId: "test-session", }); - expect(helpers.isGoogleModelApi).toHaveBeenCalledWith("anthropic-messages"); expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith( mockMessages, "session:history", - expect.objectContaining({ sanitizeToolCallIds: false }), + expect.objectContaining({ sanitizeMode: "full", sanitizeToolCallIds: false }), ); }); @@ -103,11 +105,10 @@ describe("sanitizeSessionHistory", () => { sessionId: "test-session", }); - expect(helpers.isGoogleModelApi).toHaveBeenCalledWith("openai-responses"); expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith( mockMessages, "session:history", - expect.objectContaining({ sanitizeToolCallIds: false }), + expect.objectContaining({ sanitizeMode: "images-only", sanitizeToolCallIds: false }), ); }); @@ -137,8 +138,27 @@ describe("sanitizeSessionHistory", () => { sessionId: "test-session", }); - expect(helpers.isGoogleModelApi).toHaveBeenCalledWith("openai-responses"); expect(result).toHaveLength(2); expect(result[1]?.role).toBe("assistant"); }); + + it("does not synthesize tool results for openai-responses", async () => { + const messages: AgentMessage[] = [ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + }, + ]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-responses", + provider: "openai", + sessionManager: mockSessionManager, + sessionId: "test-session", + }); + + expect(result).toHaveLength(1); + expect(result[0]?.role).toBe("assistant"); + }); }); diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 53b0ae8c0..95e484db3 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -40,6 +40,7 @@ import { import { createClawdbotCodingTools } from "../pi-tools.js"; import { resolveSandboxContext } from "../sandbox.js"; import { guardSessionManager } from "../session-tool-result-guard-wrapper.js"; +import { resolveTranscriptPolicy } from "../transcript-policy.js"; import { acquireSessionWriteLock } from "../session-write-lock.js"; import { applySkillEnvOverrides, @@ -315,9 +316,16 @@ export async function compactEmbeddedPiSession(params: { }); try { await prewarmSessionFile(params.sessionFile); + const transcriptPolicy = resolveTranscriptPolicy({ + modelApi: model.api, + provider, + modelId, + }); const sessionManager = guardSessionManager(SessionManager.open(params.sessionFile), { agentId: sessionAgentId, sessionKey: params.sessionKey, + allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults, + stripFinalTags: transcriptPolicy.stripFinalTags, }); trackSessionManagerAccess(params.sessionFile); const settingsManager = SettingsManager.create(effectiveWorkspace, agentDir); @@ -364,9 +372,14 @@ export async function compactEmbeddedPiSession(params: { provider, sessionManager, sessionId: params.sessionId, + policy: transcriptPolicy, }); - const validatedGemini = validateGeminiTurns(prior); - const validated = validateAnthropicTurns(validatedGemini); + const validatedGemini = transcriptPolicy.validateGeminiTurns + ? validateGeminiTurns(prior) + : prior; + const validated = transcriptPolicy.validateAnthropicTurns + ? validateAnthropicTurns(validatedGemini) + : validatedGemini; const limited = limitHistoryTurns( validated, getDmHistoryLimitFromSessionKey(params.sessionKey, params.config), diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts index 41f776568..f18f97e0a 100644 --- a/src/agents/pi-embedded-runner/google.ts +++ b/src/agents/pi-embedded-runner/google.ts @@ -12,10 +12,9 @@ import { import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js"; import { log } from "./logger.js"; import { describeUnknownError } from "./utils.js"; -import { isAntigravityClaude } from "../pi-embedded-helpers/google.js"; import { cleanToolSchemaForGemini } from "../pi-tools.schema.js"; -import { normalizeProviderId } from "../model-selection.js"; -import type { ToolCallIdMode } from "../tool-call-id.js"; +import type { TranscriptPolicy } from "../transcript-policy.js"; +import { resolveTranscriptPolicy } from "../transcript-policy.js"; const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap"; const GOOGLE_SCHEMA_UNSUPPORTED_KEYWORDS = new Set([ @@ -40,15 +39,6 @@ const GOOGLE_SCHEMA_UNSUPPORTED_KEYWORDS = new Set([ "minProperties", "maxProperties", ]); -const MISTRAL_MODEL_HINTS = [ - "mistral", - "mixtral", - "codestral", - "pixtral", - "devstral", - "ministral", - "mistralai", -]; const ANTIGRAVITY_SIGNATURE_RE = /^[A-Za-z0-9+/]+={0,2}$/; function isValidAntigravitySignature(value: unknown): value is string { @@ -59,19 +49,6 @@ function isValidAntigravitySignature(value: unknown): value is string { return ANTIGRAVITY_SIGNATURE_RE.test(trimmed); } -function shouldSanitizeToolCallIds(modelApi?: string | null): boolean { - if (!modelApi) return false; - return isGoogleModelApi(modelApi); -} - -function isMistralModel(params: { provider?: string | null; modelId?: string | null }): boolean { - const provider = normalizeProviderId(params.provider ?? ""); - if (provider === "mistral") return true; - const modelId = (params.modelId ?? "").toLowerCase(); - if (!modelId) return false; - return MISTRAL_MODEL_HINTS.some((hint) => modelId.includes(hint)); -} - function sanitizeAntigravityThinkingBlocks(messages: AgentMessage[]): AgentMessage[] { let touched = false; const out: AgentMessage[] = []; @@ -271,32 +248,33 @@ export async function sanitizeSessionHistory(params: { provider?: string; sessionManager: SessionManager; sessionId: string; + policy?: TranscriptPolicy; }): Promise { - const isAntigravityClaudeModel = isAntigravityClaude({ - api: params.modelApi, - provider: params.provider, - modelId: params.modelId, - }); - const provider = normalizeProviderId(params.provider ?? ""); - const modelId = (params.modelId ?? "").toLowerCase(); - const isOpenRouterGemini = - (provider === "openrouter" || provider === "opencode") && modelId.includes("gemini"); - const isMistral = isMistralModel({ provider, modelId }); - const toolCallIdMode: ToolCallIdMode | undefined = isMistral ? "strict9" : undefined; - const sanitizeToolCallIds = shouldSanitizeToolCallIds(params.modelApi) || isMistral; + const policy = + params.policy ?? + resolveTranscriptPolicy({ + modelApi: params.modelApi, + provider: params.provider, + modelId: params.modelId, + }); const sanitizedImages = await sanitizeSessionMessagesImages(params.messages, "session:history", { - sanitizeToolCallIds, - toolCallIdMode, - enforceToolCallLast: params.modelApi === "anthropic-messages", - preserveSignatures: isAntigravityClaudeModel, - sanitizeThoughtSignatures: isOpenRouterGemini - ? { allowBase64Only: true, includeCamelCase: true } - : undefined, + sanitizeMode: policy.sanitizeMode, + sanitizeToolCallIds: policy.sanitizeToolCallIds, + toolCallIdMode: policy.toolCallIdMode, + enforceToolCallLast: policy.enforceToolCallLast, + preserveSignatures: policy.preserveSignatures, + sanitizeThoughtSignatures: policy.sanitizeThoughtSignatures, }); - const sanitizedThinking = isAntigravityClaudeModel + const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks ? sanitizeAntigravityThinkingBlocks(sanitizedImages) : sanitizedImages; - const repairedTools = sanitizeToolUseResultPairing(sanitizedThinking); + const repairedTools = policy.repairToolUseResultPairing + ? sanitizeToolUseResultPairing(sanitizedThinking) + : sanitizedThinking; + + if (!policy.applyGoogleTurnOrdering) { + return repairedTools; + } return applyGoogleTurnOrderingFix({ messages: repairedTools, diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index c770b241a..b8c99ec11 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -39,6 +39,7 @@ import { import { createClawdbotCodingTools } from "../../pi-tools.js"; import { resolveSandboxContext } from "../../sandbox.js"; import { guardSessionManager } from "../../session-tool-result-guard-wrapper.js"; +import { resolveTranscriptPolicy } from "../../transcript-policy.js"; import { acquireSessionWriteLock } from "../../session-write-lock.js"; import { applySkillEnvOverrides, @@ -369,10 +370,18 @@ export async function runEmbeddedAttempt( .then(() => true) .catch(() => false); + const transcriptPolicy = resolveTranscriptPolicy({ + modelApi: params.model?.api, + provider: params.provider, + modelId: params.modelId, + }); + await prewarmSessionFile(params.sessionFile); sessionManager = guardSessionManager(SessionManager.open(params.sessionFile), { agentId: sessionAgentId, sessionKey: params.sessionKey, + allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults, + stripFinalTags: transcriptPolicy.stripFinalTags, }); trackSessionManagerAccess(params.sessionFile); @@ -473,10 +482,15 @@ export async function runEmbeddedAttempt( provider: params.provider, sessionManager, sessionId: params.sessionId, + policy: transcriptPolicy, }); cacheTrace?.recordStage("session:sanitized", { messages: prior }); - const validatedGemini = validateGeminiTurns(prior); - const validated = validateAnthropicTurns(validatedGemini); + const validatedGemini = transcriptPolicy.validateGeminiTurns + ? validateGeminiTurns(prior) + : prior; + const validated = transcriptPolicy.validateAnthropicTurns + ? validateAnthropicTurns(validatedGemini) + : validatedGemini; const limited = limitHistoryTurns( validated, getDmHistoryLimitFromSessionKey(params.sessionKey, params.config), diff --git a/src/agents/pi-extensions/transcript-sanitize.ts b/src/agents/pi-extensions/transcript-sanitize.ts index da57e9f28..2c5ee4c00 100644 --- a/src/agents/pi-extensions/transcript-sanitize.ts +++ b/src/agents/pi-extensions/transcript-sanitize.ts @@ -9,19 +9,30 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { ContextEvent, ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; -import { isGoogleModelApi } from "../pi-embedded-helpers.js"; import { repairToolUseResultPairing } from "../session-transcript-repair.js"; import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js"; +import { resolveTranscriptPolicy } from "../transcript-policy.js"; export default function transcriptSanitizeExtension(api: ExtensionAPI): void { api.on("context", (event: ContextEvent, ctx: ExtensionContext) => { let next = event.messages as AgentMessage[]; - const repaired = repairToolUseResultPairing(next); - if (repaired.messages !== next) next = repaired.messages; + const policy = resolveTranscriptPolicy({ + modelApi: ctx.model?.api, + provider: ctx.model?.provider, + modelId: ctx.model?.id, + }); - if (isGoogleModelApi(ctx.model?.api)) { - const repairedIds = sanitizeToolCallIdsForCloudCodeAssist(next); + if (policy.repairToolUseResultPairing) { + const repaired = repairToolUseResultPairing(next); + if (repaired.messages !== next) next = repaired.messages; + } + + if (policy.sanitizeToolCallIds) { + const repairedIds = sanitizeToolCallIdsForCloudCodeAssist( + next, + policy.toolCallIdMode ?? "strict", + ); if (repairedIds !== next) next = repairedIds; } diff --git a/src/agents/session-tool-result-guard-wrapper.ts b/src/agents/session-tool-result-guard-wrapper.ts index d83d14829..f1728cb5c 100644 --- a/src/agents/session-tool-result-guard-wrapper.ts +++ b/src/agents/session-tool-result-guard-wrapper.ts @@ -14,7 +14,12 @@ export type GuardedSessionManager = SessionManager & { */ export function guardSessionManager( sessionManager: SessionManager, - opts?: { agentId?: string; sessionKey?: string }, + opts?: { + agentId?: string; + sessionKey?: string; + allowSyntheticToolResults?: boolean; + stripFinalTags?: boolean; + }, ): GuardedSessionManager { if (typeof (sessionManager as GuardedSessionManager).flushPendingToolResults === "function") { return sessionManager as GuardedSessionManager; @@ -43,6 +48,8 @@ export function guardSessionManager( const guard = installSessionToolResultGuard(sessionManager, { transformToolResultForPersistence: transform, + allowSyntheticToolResults: opts?.allowSyntheticToolResults, + stripFinalTags: opts?.stripFinalTags, }); (sessionManager as GuardedSessionManager).flushPendingToolResults = guard.flushPendingToolResults; return sessionManager as GuardedSessionManager; diff --git a/src/agents/session-tool-result-guard.ts b/src/agents/session-tool-result-guard.ts index 35a00e0e3..6e8fff5eb 100644 --- a/src/agents/session-tool-result-guard.ts +++ b/src/agents/session-tool-result-guard.ts @@ -79,6 +79,16 @@ export function installSessionToolResultGuard( message: AgentMessage, meta: { toolCallId?: string; toolName?: string; isSynthetic?: boolean }, ) => AgentMessage; + /** + * Whether to strip tags from assistant text before persistence. + * Defaults to true. + */ + stripFinalTags?: boolean; + /** + * Whether to synthesize missing tool results to satisfy strict providers. + * Defaults to true. + */ + allowSyntheticToolResults?: boolean; }, ): { flushPendingToolResults: () => void; @@ -95,17 +105,22 @@ export function installSessionToolResultGuard( return transformer ? transformer(message, meta) : message; }; + const allowSyntheticToolResults = opts?.allowSyntheticToolResults ?? true; + const stripFinalTags = opts?.stripFinalTags ?? true; + const flushPendingToolResults = () => { if (pending.size === 0) return; - for (const [id, name] of pending.entries()) { - const synthetic = makeMissingToolResult({ toolCallId: id, toolName: name }); - originalAppend( - persistToolResult(synthetic, { - toolCallId: id, - toolName: name, - isSynthetic: true, - }) as never, - ); + if (allowSyntheticToolResults) { + for (const [id, name] of pending.entries()) { + const synthetic = makeMissingToolResult({ toolCallId: id, toolName: name }); + originalAppend( + persistToolResult(synthetic, { + toolCallId: id, + toolName: name, + isSynthetic: true, + }) as never, + ); + } } pending.clear(); }; @@ -127,7 +142,7 @@ export function installSessionToolResultGuard( } const sanitized = - role === "assistant" + role === "assistant" && stripFinalTags ? stripFinalTagsFromAssistant(message as Extract) : message; const toolCalls = @@ -135,13 +150,15 @@ export function installSessionToolResultGuard( ? extractAssistantToolCalls(sanitized as Extract) : []; - // If previous tool calls are still pending, flush before non-tool results. - if (pending.size > 0 && (toolCalls.length === 0 || role !== "assistant")) { - flushPendingToolResults(); - } - // If new tool calls arrive while older ones are pending, flush the old ones first. - if (pending.size > 0 && toolCalls.length > 0) { - flushPendingToolResults(); + if (allowSyntheticToolResults) { + // If previous tool calls are still pending, flush before non-tool results. + if (pending.size > 0 && (toolCalls.length === 0 || role !== "assistant")) { + flushPendingToolResults(); + } + // If new tool calls arrive while older ones are pending, flush the old ones first. + if (pending.size > 0 && toolCalls.length > 0) { + flushPendingToolResults(); + } } const result = originalAppend(sanitized as never); diff --git a/src/agents/tool-call-id.test.ts b/src/agents/tool-call-id.test.ts index 897bdf512..5ce554e42 100644 --- a/src/agents/tool-call-id.test.ts +++ b/src/agents/tool-call-id.test.ts @@ -7,16 +7,16 @@ import { } from "./tool-call-id.js"; describe("sanitizeToolCallIdsForCloudCodeAssist", () => { - describe("standard mode (default)", () => { + describe("strict mode (default)", () => { it("is a no-op for already-valid non-colliding IDs", () => { const input = [ { role: "assistant", - content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + content: [{ type: "toolCall", id: "call1", name: "read", arguments: {} }], }, { role: "toolResult", - toolCallId: "call_1", + toolCallId: "call1", toolName: "read", content: [{ type: "text", text: "ok" }], }, @@ -26,7 +26,7 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => { expect(out).toBe(input); }); - it("replaces invalid characters with underscores (preserves readability)", () => { + it("strips non-alphanumeric characters from tool call IDs", () => { const input = [ { role: "assistant", @@ -45,9 +45,9 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => { const assistant = out[0] as Extract; const toolCall = assistant.content?.[0] as { id?: string }; - // Standard mode preserves underscores for readability - expect(toolCall.id).toBe("call_item_123"); - expect(isValidCloudCodeAssistToolId(toolCall.id as string)).toBe(true); + // Strict mode strips all non-alphanumeric characters + expect(toolCall.id).toBe("callitem123"); + expect(isValidCloudCodeAssistToolId(toolCall.id as string, "strict")).toBe(true); const result = out[1] as Extract; expect(result.toolCallId).toBe(toolCall.id); @@ -85,8 +85,8 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => { expect(typeof a.id).toBe("string"); expect(typeof b.id).toBe("string"); expect(a.id).not.toBe(b.id); - expect(isValidCloudCodeAssistToolId(a.id as string)).toBe(true); - expect(isValidCloudCodeAssistToolId(b.id as string)).toBe(true); + expect(isValidCloudCodeAssistToolId(a.id as string, "strict")).toBe(true); + expect(isValidCloudCodeAssistToolId(b.id as string, "strict")).toBe(true); const r1 = out[1] as Extract; const r2 = out[2] as Extract; @@ -129,8 +129,8 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => { expect(a.id).not.toBe(b.id); expect(a.id?.length).toBeLessThanOrEqual(40); expect(b.id?.length).toBeLessThanOrEqual(40); - expect(isValidCloudCodeAssistToolId(a.id as string)).toBe(true); - expect(isValidCloudCodeAssistToolId(b.id as string)).toBe(true); + expect(isValidCloudCodeAssistToolId(a.id as string, "strict")).toBe(true); + expect(isValidCloudCodeAssistToolId(b.id as string, "strict")).toBe(true); const r1 = out[1] as Extract; const r2 = out[2] as Extract; diff --git a/src/agents/tool-call-id.ts b/src/agents/tool-call-id.ts index d5d3cf078..8b7e05417 100644 --- a/src/agents/tool-call-id.ts +++ b/src/agents/tool-call-id.ts @@ -2,27 +2,20 @@ import { createHash } from "node:crypto"; import type { AgentMessage } from "@mariozechner/pi-agent-core"; -export type ToolCallIdMode = "standard" | "strict" | "strict9"; +export type ToolCallIdMode = "strict" | "strict9"; const STRICT9_LEN = 9; /** * Sanitize a tool call ID to be compatible with various providers. * - * - "standard" mode: allows [a-zA-Z0-9_-], better readability (default) * - "strict" mode: only [a-zA-Z0-9] * - "strict9" mode: only [a-zA-Z0-9], length 9 (Mistral tool call requirement) */ -export function sanitizeToolCallId(id: string, mode: ToolCallIdMode = "standard"): string { +export function sanitizeToolCallId(id: string, mode: ToolCallIdMode = "strict"): string { if (!id || typeof id !== "string") { if (mode === "strict9") return "defaultid"; - return mode === "strict" ? "defaulttoolid" : "default_tool_id"; - } - - if (mode === "strict") { - // Some providers require strictly alphanumeric tool call IDs. - const alphanumericOnly = id.replace(/[^a-zA-Z0-9]/g, ""); - return alphanumericOnly.length > 0 ? alphanumericOnly : "sanitizedtoolid"; + return "defaulttoolid"; } if (mode === "strict9") { @@ -32,26 +25,18 @@ export function sanitizeToolCallId(id: string, mode: ToolCallIdMode = "standard" return shortHash("sanitized", STRICT9_LEN); } - // Standard mode: allow underscores and hyphens for better readability in logs - const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_"); - const trimmed = sanitized.replace(/^[^a-zA-Z0-9_-]+/, ""); - return trimmed.length > 0 ? trimmed : "sanitized_tool_id"; + // Some providers require strictly alphanumeric tool call IDs. + const alphanumericOnly = id.replace(/[^a-zA-Z0-9]/g, ""); + return alphanumericOnly.length > 0 ? alphanumericOnly : "sanitizedtoolid"; } -export function isValidCloudCodeAssistToolId( - id: string, - mode: ToolCallIdMode = "standard", -): boolean { +export function isValidCloudCodeAssistToolId(id: string, mode: ToolCallIdMode = "strict"): boolean { if (!id || typeof id !== "string") return false; - if (mode === "strict") { - // Strictly alphanumeric for providers with tighter tool ID constraints - return /^[a-zA-Z0-9]+$/.test(id); - } if (mode === "strict9") { return /^[a-zA-Z0-9]{9}$/.test(id); } - // Standard mode allows underscores and hyphens - return /^[a-zA-Z0-9_-]+$/.test(id); + // Strictly alphanumeric for providers with tighter tool ID constraints + return /^[a-zA-Z0-9]+$/.test(id); } function shortHash(text: string, length = 8): string { @@ -78,7 +63,7 @@ function makeUniqueToolId(params: { id: string; used: Set; mode: ToolCal if (!params.used.has(base)) return base; const hash = shortHash(params.id); - // Use separator based on mode: underscore for standard (readable), none for strict + // Use separator based on mode: none for strict, underscore for non-strict variants const separator = params.mode === "strict" ? "" : "_"; const maxBaseLen = MAX_LEN - separator.length - hash.length; const clippedBase = base.length > maxBaseLen ? base.slice(0, maxBaseLen) : base; @@ -154,16 +139,15 @@ function rewriteToolResultIds(params: { * Sanitize tool call IDs for provider compatibility. * * @param messages - The messages to sanitize - * @param mode - "standard" (default, allows _-), "strict" (alphanumeric only), or "strict9" (alphanumeric length 9) + * @param mode - "strict" (alphanumeric only) or "strict9" (alphanumeric length 9) */ export function sanitizeToolCallIdsForCloudCodeAssist( messages: AgentMessage[], - mode: ToolCallIdMode = "standard", + mode: ToolCallIdMode = "strict", ): AgentMessage[] { - // Standard mode: allows [a-zA-Z0-9_-] for better readability in session logs // Strict mode: only [a-zA-Z0-9] // Strict9 mode: only [a-zA-Z0-9], length 9 (Mistral tool call requirement) - // Sanitization can introduce collisions (e.g. `a|b` and `a:b` -> `a_b` or `ab`). + // Sanitization can introduce collisions (e.g. `a|b` and `a:b` -> `ab`). // Fix by applying a stable, transcript-wide mapping and de-duping via suffix. const map = new Map(); const used = new Set(); diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts new file mode 100644 index 000000000..58d33dbcc --- /dev/null +++ b/src/agents/transcript-policy.ts @@ -0,0 +1,117 @@ +import { isAntigravityClaude, isGoogleModelApi } from "./pi-embedded-helpers/google.js"; +import { normalizeProviderId } from "./model-selection.js"; +import type { ToolCallIdMode } from "./tool-call-id.js"; + +export type TranscriptSanitizeMode = "full" | "images-only"; + +export type TranscriptPolicy = { + sanitizeMode: TranscriptSanitizeMode; + sanitizeToolCallIds: boolean; + toolCallIdMode?: ToolCallIdMode; + repairToolUseResultPairing: boolean; + enforceToolCallLast: boolean; + preserveSignatures: boolean; + sanitizeThoughtSignatures?: { + allowBase64Only?: boolean; + includeCamelCase?: boolean; + }; + normalizeAntigravityThinkingBlocks: boolean; + applyGoogleTurnOrdering: boolean; + validateGeminiTurns: boolean; + validateAnthropicTurns: boolean; + stripFinalTags: boolean; + allowSyntheticToolResults: boolean; +}; + +const MISTRAL_MODEL_HINTS = [ + "mistral", + "mixtral", + "codestral", + "pixtral", + "devstral", + "ministral", + "mistralai", +]; +const OPENAI_MODEL_APIS = new Set([ + "openai", + "openai-completions", + "openai-responses", + "openai-codex-responses", +]); +const OPENAI_PROVIDERS = new Set(["openai", "openai-codex"]); + +function isOpenAiApi(modelApi?: string | null): boolean { + if (!modelApi) return false; + return OPENAI_MODEL_APIS.has(modelApi); +} + +function isOpenAiProvider(provider?: string | null): boolean { + if (!provider) return false; + return OPENAI_PROVIDERS.has(normalizeProviderId(provider)); +} + +function isAnthropicApi(modelApi?: string | null, provider?: string | null): boolean { + if (modelApi === "anthropic-messages") return true; + const normalized = normalizeProviderId(provider ?? ""); + return normalized === "anthropic" || normalized === "minimax"; +} + +function isMistralModel(params: { provider?: string | null; modelId?: string | null }): boolean { + const provider = normalizeProviderId(params.provider ?? ""); + if (provider === "mistral") return true; + const modelId = (params.modelId ?? "").toLowerCase(); + if (!modelId) return false; + return MISTRAL_MODEL_HINTS.some((hint) => modelId.includes(hint)); +} + +export function resolveTranscriptPolicy(params: { + modelApi?: string | null; + provider?: string | null; + modelId?: string | null; +}): TranscriptPolicy { + const provider = normalizeProviderId(params.provider ?? ""); + const modelId = params.modelId ?? ""; + const isGoogle = isGoogleModelApi(params.modelApi); + const isAnthropic = isAnthropicApi(params.modelApi, provider); + const isOpenAi = isOpenAiProvider(provider) || (!provider && isOpenAiApi(params.modelApi)); + const isMistral = isMistralModel({ provider, modelId }); + const isOpenRouterGemini = + (provider === "openrouter" || provider === "opencode") && + modelId.toLowerCase().includes("gemini"); + const isAntigravityClaudeModel = isAntigravityClaude({ + api: params.modelApi, + provider, + modelId, + }); + + const needsNonImageSanitize = isGoogle || isAnthropic || isMistral || isOpenRouterGemini; + + const sanitizeToolCallIds = isGoogle || isMistral; + const toolCallIdMode: ToolCallIdMode | undefined = isMistral + ? "strict9" + : sanitizeToolCallIds + ? "strict" + : undefined; + const repairToolUseResultPairing = isGoogle || isAnthropic; + const enforceToolCallLast = isAnthropic; + const sanitizeThoughtSignatures = isOpenRouterGemini + ? { allowBase64Only: true, includeCamelCase: true } + : undefined; + const normalizeAntigravityThinkingBlocks = isAntigravityClaudeModel; + + return { + sanitizeMode: isOpenAi ? "images-only" : needsNonImageSanitize ? "full" : "images-only", + sanitizeToolCallIds: !isOpenAi && sanitizeToolCallIds, + toolCallIdMode, + repairToolUseResultPairing: !isOpenAi && repairToolUseResultPairing, + enforceToolCallLast: !isOpenAi && enforceToolCallLast, + preserveSignatures: isAntigravityClaudeModel, + sanitizeThoughtSignatures: isOpenAi ? undefined : sanitizeThoughtSignatures, + normalizeAntigravityThinkingBlocks, + applyGoogleTurnOrdering: !isOpenAi && isGoogle, + validateGeminiTurns: !isOpenAi && isGoogle, + validateAnthropicTurns: !isOpenAi && isAnthropic, + stripFinalTags: !isOpenAi && (isGoogle || isAnthropic), + allowSyntheticToolResults: !isOpenAi && (isGoogle || isAnthropic), + }; +}