From d0f9e22a4b0e693da2680ed7cf9a5da4c8eaad00 Mon Sep 17 00:00:00 2001 From: zerone0x Date: Wed, 21 Jan 2026 17:49:10 +0800 Subject: [PATCH 1/4] fix(agents): use alphanumeric-only tool call IDs for OpenRouter compatibility Some providers like Mistral via OpenRouter require strictly alphanumeric tool call IDs. The error message indicates: "Tool call id was whatsapp_login_1768799841527_1 but must be a-z, A-Z, 0-9, with a length of 9." Changes: - Update sanitizeToolCallId to strip all non-alphanumeric characters (previously allowed underscores and hyphens) - Update makeUniqueToolId to use alphanumeric suffixes (x2, x3, etc.) instead of underscores - Update isValidCloudCodeAssistToolId to validate alphanumeric-only IDs - Update tests to reflect stricter sanitization Fixes #1359 Co-Authored-By: Claude --- ...ool-call-tool-result-ids-unchanged.test.ts | 7 ++-- ...ssistant-text-blocks-but-preserves.test.ts | 9 ++--- ...mbedded-helpers.sanitizetoolcallid.test.ts | 12 ++++--- src/agents/tool-call-id.test.ts | 35 +++++++++++++++++-- src/agents/tool-call-id.ts | 31 ++++++++-------- 5 files changed, 64 insertions(+), 30 deletions(-) diff --git a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts index f8822feea..22a885e9f 100644 --- a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts +++ b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts @@ -50,7 +50,7 @@ describe("sanitizeSessionMessagesImages", () => { expect(toolResult.role).toBe("toolResult"); expect(toolResult.toolCallId).toBe("call_123|fc_456"); }); - it("sanitizes tool call + tool result IDs when enabled", async () => { + it("sanitizes tool call + tool result IDs when enabled (alphanumeric only)", async () => { const input = [ { role: "assistant", @@ -82,14 +82,15 @@ describe("sanitizeSessionMessagesImages", () => { const toolCall = (assistant.content as Array<{ type?: string; id?: string }>).find( (b) => b.type === "toolCall", ); - expect(toolCall?.id).toBe("call_123_fc_456"); + // Sanitization strips all non-alphanumeric characters for Mistral/OpenRouter compatibility + expect(toolCall?.id).toBe("call123fc456"); const toolResult = out[1] as unknown as { role?: string; toolCallId?: string; }; expect(toolResult.role).toBe("toolResult"); - expect(toolResult.toolCallId).toBe("call_123_fc_456"); + expect(toolResult.toolCallId).toBe("call123fc456"); }); it("drops assistant blocks after a tool call when enforceToolCallLast is enabled", async () => { const input = [ diff --git a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts index bf2be6ef5..ff14577bf 100644 --- a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts +++ b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts @@ -30,7 +30,7 @@ describe("sanitizeSessionMessagesImages", () => { expect(content).toHaveLength(1); expect((content as Array<{ type?: string }>)[0]?.type).toBe("toolCall"); }); - it("sanitizes tool ids for assistant blocks and tool results when enabled", async () => { + it("sanitizes tool ids for assistant blocks and tool results when enabled (alphanumeric only)", async () => { const input = [ { role: "assistant", @@ -55,12 +55,13 @@ describe("sanitizeSessionMessagesImages", () => { sanitizeToolCallIds: true, }); + // Sanitization strips all non-alphanumeric characters for Mistral/OpenRouter compatibility const assistant = out[0] as { content?: Array<{ id?: string }> }; - expect(assistant.content?.[0]?.id).toBe("call_abc_item_123"); - expect(assistant.content?.[1]?.id).toBe("call_abc_item_456"); + expect(assistant.content?.[0]?.id).toBe("callabcitem123"); + expect(assistant.content?.[1]?.id).toBe("callabcitem456"); const toolResult = out[1] as { toolUseId?: string }; - expect(toolResult.toolUseId).toBe("call_abc_item_123"); + expect(toolResult.toolUseId).toBe("callabcitem123"); }); it("filters whitespace-only assistant text blocks", async () => { const input = [ diff --git a/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts b/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts index 19fec5aaa..b7fe91210 100644 --- a/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts +++ b/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts @@ -10,13 +10,15 @@ const _makeFile = (overrides: Partial): WorkspaceBootstr ...overrides, }); describe("sanitizeToolCallId", () => { - it("keeps valid tool call IDs", () => { - expect(sanitizeToolCallId("call_abc-123")).toBe("call_abc-123"); + it("keeps valid alphanumeric tool call IDs", () => { + expect(sanitizeToolCallId("callabc123")).toBe("callabc123"); }); - it("replaces invalid characters with underscores", () => { - expect(sanitizeToolCallId("call_abc|item:456")).toBe("call_abc_item_456"); + it("strips non-alphanumeric characters (Mistral/OpenRouter compatibility)", () => { + expect(sanitizeToolCallId("call_abc-123")).toBe("callabc123"); + expect(sanitizeToolCallId("call_abc|item:456")).toBe("callabcitem456"); + expect(sanitizeToolCallId("whatsapp_login_1768799841527_1")).toBe("whatsapplogin17687998415271"); }); it("returns default for empty IDs", () => { - expect(sanitizeToolCallId("")).toBe("default_tool_id"); + expect(sanitizeToolCallId("")).toBe("defaulttoolid"); }); }); diff --git a/src/agents/tool-call-id.test.ts b/src/agents/tool-call-id.test.ts index 0d9fb1341..a6e85d327 100644 --- a/src/agents/tool-call-id.test.ts +++ b/src/agents/tool-call-id.test.ts @@ -7,15 +7,15 @@ import { } from "./tool-call-id.js"; describe("sanitizeToolCallIdsForCloudCodeAssist", () => { - it("is a no-op for already-valid non-colliding IDs", () => { + it("is a no-op for already-valid non-colliding alphanumeric IDs", () => { const input = [ { role: "assistant", - content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + content: [{ type: "toolCall", id: "call1", name: "read", arguments: {} }], }, { role: "toolResult", - toolCallId: "call_1", + toolCallId: "call1", toolName: "read", content: [{ type: "text", text: "ok" }], }, @@ -25,6 +25,35 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => { expect(out).toBe(input); }); + it("strips underscores from tool call IDs (Mistral/OpenRouter compatibility)", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "whatsapp_login_1768799841527_1", name: "login", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "whatsapp_login_1768799841527_1", + toolName: "login", + content: [{ type: "text", text: "ok" }], + }, + ] satisfies AgentMessage[]; + + const out = sanitizeToolCallIdsForCloudCodeAssist(input); + expect(out).not.toBe(input); + + const assistant = out[0] as Extract; + const toolCall = assistant.content?.[0] as { id?: string }; + // ID should be alphanumeric only, no underscores + expect(toolCall.id).toBe("whatsapplogin17687998415271"); + expect(isValidCloudCodeAssistToolId(toolCall.id as string)).toBe(true); + + const result = out[1] as Extract; + expect(result.toolCallId).toBe(toolCall.id); + }); + it("avoids collisions when sanitization would produce duplicate IDs", () => { const input = [ { diff --git a/src/agents/tool-call-id.ts b/src/agents/tool-call-id.ts index c4aec7e5b..736454536 100644 --- a/src/agents/tool-call-id.ts +++ b/src/agents/tool-call-id.ts @@ -3,20 +3,19 @@ import { createHash } from "node:crypto"; import type { AgentMessage } from "@mariozechner/pi-agent-core"; export function sanitizeToolCallId(id: string): string { - if (!id || typeof id !== "string") return "default_tool_id"; + if (!id || typeof id !== "string") return "defaulttoolid"; - const cloudCodeAssistPatternReplacement = id.replace(/[^a-zA-Z0-9_-]/g, "_"); - const trimmedInvalidStartChars = cloudCodeAssistPatternReplacement.replace( - /^[^a-zA-Z0-9_-]+/, - "", - ); + // Some providers (e.g. Mistral via OpenRouter) require strictly alphanumeric tool call IDs. + // Strip all non-alphanumeric characters to ensure maximum compatibility. + const alphanumericOnly = id.replace(/[^a-zA-Z0-9]/g, ""); - return trimmedInvalidStartChars.length > 0 ? trimmedInvalidStartChars : "sanitized_tool_id"; + return alphanumericOnly.length > 0 ? alphanumericOnly : "sanitizedtoolid"; } export function isValidCloudCodeAssistToolId(id: string): boolean { if (!id || typeof id !== "string") return false; - return /^[a-zA-Z0-9_-]+$/.test(id); + // Strictly alphanumeric for maximum provider compatibility (e.g. Mistral via OpenRouter). + return /^[a-zA-Z0-9]+$/.test(id); } function shortHash(text: string): string { @@ -29,19 +28,20 @@ function makeUniqueToolId(params: { id: string; used: Set }): string { const base = sanitizeToolCallId(params.id).slice(0, MAX_LEN); if (!params.used.has(base)) return base; + // Use alphanumeric-only suffixes to maintain strict compatibility. const hash = shortHash(params.id); - const maxBaseLen = MAX_LEN - 1 - hash.length; + const maxBaseLen = MAX_LEN - hash.length; const clippedBase = base.length > maxBaseLen ? base.slice(0, maxBaseLen) : base; - const candidate = `${clippedBase}_${hash}`; + const candidate = `${clippedBase}${hash}`; if (!params.used.has(candidate)) return candidate; for (let i = 2; i < 1000; i += 1) { - const suffix = `_${i}`; + const suffix = `x${i}`; const next = `${candidate.slice(0, MAX_LEN - suffix.length)}${suffix}`; if (!params.used.has(next)) return next; } - const ts = `_${Date.now()}`; + const ts = `t${Date.now()}`; return `${candidate.slice(0, MAX_LEN - ts.length)}${ts}`; } @@ -101,9 +101,10 @@ function rewriteToolResultIds(params: { } export function sanitizeToolCallIdsForCloudCodeAssist(messages: AgentMessage[]): AgentMessage[] { - // Cloud Code Assist requires tool IDs matching ^[a-zA-Z0-9_-]+$. - // Sanitization can introduce collisions (e.g. `a|b` and `a:b` -> `a_b`). - // Fix by applying a stable, transcript-wide mapping and de-duping via suffix. + // Some providers (e.g. Mistral via OpenRouter) require strictly alphanumeric tool IDs. + // Use ^[a-zA-Z0-9]+$ pattern for maximum compatibility across all providers. + // Sanitization can introduce collisions (e.g. `a|b` and `a:b` -> `ab`). + // Fix by applying a stable, transcript-wide mapping and de-duping via hash suffix. const map = new Map(); const used = new Set(); From d51eca64ccc6c54160c8dee6e7a26a1c467bf9a1 Mon Sep 17 00:00:00 2001 From: zerone0x Date: Wed, 21 Jan 2026 21:32:53 +0800 Subject: [PATCH 2/4] fix(agents): make tool call ID sanitization conditional with standard/strict modes - Add ToolCallIdMode type ('standard' | 'strict') for provider compatibility - Standard mode (default): allows [a-zA-Z0-9_-] for readable session logs - Strict mode: only [a-zA-Z0-9] for Mistral via OpenRouter - Update sanitizeSessionMessagesImages to accept toolCallIdMode option - Export ToolCallIdMode from pi-embedded-helpers barrel Addresses review feedback on PR #1372 about readability. --- ...ool-call-tool-result-ids-unchanged.test.ts | 57 +++- ...ssistant-text-blocks-but-preserves.test.ts | 48 ++- ...mbedded-helpers.sanitizetoolcallid.test.ts | 43 ++- src/agents/pi-embedded-helpers.ts | 1 + src/agents/pi-embedded-helpers/images.ts | 5 +- src/agents/tool-call-id.test.ts | 318 +++++++++++------- src/agents/tool-call-id.ts | 77 +++-- 7 files changed, 369 insertions(+), 180 deletions(-) diff --git a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts index 22a885e9f..a715dacfb 100644 --- a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts +++ b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts @@ -1,15 +1,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { describe, expect, it } from "vitest"; import { sanitizeSessionMessagesImages } from "./pi-embedded-helpers.js"; -import { DEFAULT_AGENTS_FILENAME } from "./workspace.js"; -const _makeFile = (overrides: Partial): WorkspaceBootstrapFile => ({ - name: DEFAULT_AGENTS_FILENAME, - path: "/tmp/AGENTS.md", - content: "", - missing: false, - ...overrides, -}); describe("sanitizeSessionMessagesImages", () => { it("keeps tool call + tool result IDs unchanged by default", async () => { const input = [ @@ -50,7 +42,8 @@ describe("sanitizeSessionMessagesImages", () => { expect(toolResult.role).toBe("toolResult"); expect(toolResult.toolCallId).toBe("call_123|fc_456"); }); - it("sanitizes tool call + tool result IDs when enabled (alphanumeric only)", async () => { + + it("sanitizes tool call + tool result IDs in standard mode (preserves underscores)", async () => { const input = [ { role: "assistant", @@ -82,7 +75,51 @@ describe("sanitizeSessionMessagesImages", () => { const toolCall = (assistant.content as Array<{ type?: string; id?: string }>).find( (b) => b.type === "toolCall", ); - // Sanitization strips all non-alphanumeric characters for Mistral/OpenRouter compatibility + // Standard mode preserves underscores for readability, replaces invalid chars + expect(toolCall?.id).toBe("call_123_fc_456"); + + const toolResult = out[1] as unknown as { + role?: string; + toolCallId?: string; + }; + expect(toolResult.role).toBe("toolResult"); + expect(toolResult.toolCallId).toBe("call_123_fc_456"); + }); + + it("sanitizes tool call + tool result IDs in strict mode (alphanumeric only)", async () => { + const input = [ + { + role: "assistant", + content: [ + { + type: "toolCall", + id: "call_123|fc_456", + name: "read", + arguments: { path: "package.json" }, + }, + ], + }, + { + role: "toolResult", + toolCallId: "call_123|fc_456", + toolName: "read", + content: [{ type: "text", text: "ok" }], + isError: false, + }, + ] satisfies AgentMessage[]; + + const out = await sanitizeSessionMessagesImages(input, "test", { + sanitizeToolCallIds: true, + toolCallIdMode: "strict", + }); + + const assistant = out[0] as unknown as { role?: string; content?: unknown }; + expect(assistant.role).toBe("assistant"); + expect(Array.isArray(assistant.content)).toBe(true); + const toolCall = (assistant.content as Array<{ type?: string; id?: string }>).find( + (b) => b.type === "toolCall", + ); + // Strict mode strips all non-alphanumeric characters for Mistral/OpenRouter compatibility expect(toolCall?.id).toBe("call123fc456"); const toolResult = out[1] as unknown as { diff --git a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts index ff14577bf..6ce18c3fc 100644 --- a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts +++ b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts @@ -1,15 +1,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { describe, expect, it } from "vitest"; import { sanitizeSessionMessagesImages } from "./pi-embedded-helpers.js"; -import { DEFAULT_AGENTS_FILENAME } from "./workspace.js"; -const _makeFile = (overrides: Partial): WorkspaceBootstrapFile => ({ - name: DEFAULT_AGENTS_FILENAME, - path: "/tmp/AGENTS.md", - content: "", - missing: false, - ...overrides, -}); describe("sanitizeSessionMessagesImages", () => { it("removes empty assistant text blocks but preserves tool calls", async () => { const input = [ @@ -30,7 +22,8 @@ describe("sanitizeSessionMessagesImages", () => { expect(content).toHaveLength(1); expect((content as Array<{ type?: string }>)[0]?.type).toBe("toolCall"); }); - it("sanitizes tool ids for assistant blocks and tool results when enabled (alphanumeric only)", async () => { + + it("sanitizes tool ids in standard mode (preserves underscores)", async () => { const input = [ { role: "assistant", @@ -55,7 +48,42 @@ describe("sanitizeSessionMessagesImages", () => { sanitizeToolCallIds: true, }); - // Sanitization strips all non-alphanumeric characters for Mistral/OpenRouter compatibility + // Standard mode preserves underscores for readability + const assistant = out[0] as { content?: Array<{ id?: string }> }; + expect(assistant.content?.[0]?.id).toBe("call_abc_item_123"); + expect(assistant.content?.[1]?.id).toBe("call_abc_item_456"); + + const toolResult = out[1] as { toolUseId?: string }; + expect(toolResult.toolUseId).toBe("call_abc_item_123"); + }); + + it("sanitizes tool ids in strict mode (alphanumeric only)", async () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolUse", id: "call_abc|item:123", name: "test", input: {} }, + { + type: "toolCall", + id: "call_abc|item:456", + name: "exec", + arguments: {}, + }, + ], + }, + { + role: "toolResult", + toolUseId: "call_abc|item:123", + content: [{ type: "text", text: "ok" }], + }, + ] satisfies AgentMessage[]; + + const out = await sanitizeSessionMessagesImages(input, "test", { + sanitizeToolCallIds: true, + toolCallIdMode: "strict", + }); + + // Strict mode strips all non-alphanumeric characters for Mistral/OpenRouter compatibility const assistant = out[0] as { content?: Array<{ id?: string }> }; expect(assistant.content?.[0]?.id).toBe("callabcitem123"); expect(assistant.content?.[1]?.id).toBe("callabcitem456"); diff --git a/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts b/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts index b7fe91210..b4c6e9396 100644 --- a/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts +++ b/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts @@ -1,24 +1,33 @@ import { describe, expect, it } from "vitest"; import { sanitizeToolCallId } from "./pi-embedded-helpers.js"; -import { DEFAULT_AGENTS_FILENAME } from "./workspace.js"; -const _makeFile = (overrides: Partial): WorkspaceBootstrapFile => ({ - name: DEFAULT_AGENTS_FILENAME, - path: "/tmp/AGENTS.md", - content: "", - missing: false, - ...overrides, -}); describe("sanitizeToolCallId", () => { - it("keeps valid alphanumeric tool call IDs", () => { - expect(sanitizeToolCallId("callabc123")).toBe("callabc123"); + describe("standard mode (default)", () => { + it("keeps valid alphanumeric tool call IDs", () => { + expect(sanitizeToolCallId("callabc123")).toBe("callabc123"); + }); + it("keeps underscores and hyphens for readability", () => { + expect(sanitizeToolCallId("call_abc-123")).toBe("call_abc-123"); + expect(sanitizeToolCallId("call_abc_def")).toBe("call_abc_def"); + }); + it("replaces invalid characters with underscores", () => { + expect(sanitizeToolCallId("call_abc|item:456")).toBe("call_abc_item_456"); + }); + it("returns default for empty IDs", () => { + expect(sanitizeToolCallId("")).toBe("default_tool_id"); + }); }); - it("strips non-alphanumeric characters (Mistral/OpenRouter compatibility)", () => { - expect(sanitizeToolCallId("call_abc-123")).toBe("callabc123"); - expect(sanitizeToolCallId("call_abc|item:456")).toBe("callabcitem456"); - expect(sanitizeToolCallId("whatsapp_login_1768799841527_1")).toBe("whatsapplogin17687998415271"); - }); - it("returns default for empty IDs", () => { - expect(sanitizeToolCallId("")).toBe("defaulttoolid"); + + describe("strict mode (for Mistral/OpenRouter)", () => { + it("strips all non-alphanumeric characters", () => { + expect(sanitizeToolCallId("call_abc-123", "strict")).toBe("callabc123"); + expect(sanitizeToolCallId("call_abc|item:456", "strict")).toBe("callabcitem456"); + expect(sanitizeToolCallId("whatsapp_login_1768799841527_1", "strict")).toBe( + "whatsapplogin17687998415271", + ); + }); + it("returns default for empty IDs", () => { + expect(sanitizeToolCallId("", "strict")).toBe("defaulttoolid"); + }); }); }); diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 85d16af12..01cb90ef7 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -50,4 +50,5 @@ export { } from "./pi-embedded-helpers/turns.js"; export type { EmbeddedContextFile, FailoverReason } from "./pi-embedded-helpers/types.js"; +export type { ToolCallIdMode } from "./tool-call-id.js"; export { isValidCloudCodeAssistToolId, sanitizeToolCallId } from "./tool-call-id.js"; diff --git a/src/agents/pi-embedded-helpers/images.ts b/src/agents/pi-embedded-helpers/images.ts index a53c27a00..862067b26 100644 --- a/src/agents/pi-embedded-helpers/images.ts +++ b/src/agents/pi-embedded-helpers/images.ts @@ -1,5 +1,6 @@ import type { AgentMessage, AgentToolResult } from "@mariozechner/pi-agent-core"; +import type { ToolCallIdMode } from "../tool-call-id.js"; import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js"; import { sanitizeContentBlocksImages } from "../tool-images.js"; import { stripThoughtSignatures } from "./bootstrap.js"; @@ -32,6 +33,8 @@ export async function sanitizeSessionMessagesImages( label: string, options?: { sanitizeToolCallIds?: boolean; + /** Mode for tool call ID sanitization: "standard" (default, preserves _-) or "strict" (alphanumeric only) */ + toolCallIdMode?: ToolCallIdMode; enforceToolCallLast?: boolean; preserveSignatures?: boolean; sanitizeThoughtSignatures?: { @@ -43,7 +46,7 @@ export async function sanitizeSessionMessagesImages( // We sanitize historical session messages because Anthropic can reject a request // if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX). const sanitizedIds = options?.sanitizeToolCallIds - ? sanitizeToolCallIdsForCloudCodeAssist(messages) + ? sanitizeToolCallIdsForCloudCodeAssist(messages, options.toolCallIdMode) : messages; const out: AgentMessage[] = []; for (const msg of sanitizedIds) { diff --git a/src/agents/tool-call-id.test.ts b/src/agents/tool-call-id.test.ts index a6e85d327..a76fa93d7 100644 --- a/src/agents/tool-call-id.test.ts +++ b/src/agents/tool-call-id.test.ts @@ -7,135 +7,213 @@ import { } from "./tool-call-id.js"; describe("sanitizeToolCallIdsForCloudCodeAssist", () => { - it("is a no-op for already-valid non-colliding alphanumeric IDs", () => { - const input = [ - { - role: "assistant", - content: [{ type: "toolCall", id: "call1", name: "read", arguments: {} }], - }, - { - role: "toolResult", - toolCallId: "call1", - toolName: "read", - content: [{ type: "text", text: "ok" }], - }, - ] satisfies AgentMessage[]; + describe("standard mode (default)", () => { + it("is a no-op for already-valid non-colliding IDs", () => { + const input = [ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "ok" }], + }, + ] satisfies AgentMessage[]; - const out = sanitizeToolCallIdsForCloudCodeAssist(input); - expect(out).toBe(input); + const out = sanitizeToolCallIdsForCloudCodeAssist(input); + expect(out).toBe(input); + }); + + it("replaces invalid characters with underscores (preserves readability)", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call|item:123", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call|item:123", + toolName: "read", + content: [{ type: "text", text: "ok" }], + }, + ] satisfies AgentMessage[]; + + const out = sanitizeToolCallIdsForCloudCodeAssist(input); + expect(out).not.toBe(input); + + const assistant = out[0] as Extract; + const toolCall = assistant.content?.[0] as { id?: string }; + // Standard mode preserves underscores for readability + expect(toolCall.id).toBe("call_item_123"); + expect(isValidCloudCodeAssistToolId(toolCall.id as string)).toBe(true); + + const result = out[1] as Extract; + expect(result.toolCallId).toBe(toolCall.id); + }); + + it("avoids collisions when sanitization would produce duplicate IDs", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_a|b", name: "read", arguments: {} }, + { type: "toolCall", id: "call_a:b", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_a|b", + toolName: "read", + content: [{ type: "text", text: "one" }], + }, + { + role: "toolResult", + toolCallId: "call_a:b", + toolName: "read", + content: [{ type: "text", text: "two" }], + }, + ] satisfies AgentMessage[]; + + const out = sanitizeToolCallIdsForCloudCodeAssist(input); + expect(out).not.toBe(input); + + const assistant = out[0] as Extract; + const a = assistant.content?.[0] as { id?: string }; + const b = assistant.content?.[1] as { id?: string }; + expect(typeof a.id).toBe("string"); + expect(typeof b.id).toBe("string"); + expect(a.id).not.toBe(b.id); + expect(isValidCloudCodeAssistToolId(a.id as string)).toBe(true); + expect(isValidCloudCodeAssistToolId(b.id as string)).toBe(true); + + const r1 = out[1] as Extract; + const r2 = out[2] as Extract; + expect(r1.toolCallId).toBe(a.id); + expect(r2.toolCallId).toBe(b.id); + }); + + it("caps tool call IDs at 40 chars while preserving uniqueness", () => { + const longA = `call_${"a".repeat(60)}`; + const longB = `call_${"a".repeat(59)}b`; + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: longA, name: "read", arguments: {} }, + { type: "toolCall", id: longB, name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: longA, + toolName: "read", + content: [{ type: "text", text: "one" }], + }, + { + role: "toolResult", + toolCallId: longB, + toolName: "read", + content: [{ type: "text", text: "two" }], + }, + ] satisfies AgentMessage[]; + + const out = sanitizeToolCallIdsForCloudCodeAssist(input); + const assistant = out[0] as Extract; + const a = assistant.content?.[0] as { id?: string }; + const b = assistant.content?.[1] as { id?: string }; + + expect(typeof a.id).toBe("string"); + expect(typeof b.id).toBe("string"); + expect(a.id).not.toBe(b.id); + expect(a.id?.length).toBeLessThanOrEqual(40); + expect(b.id?.length).toBeLessThanOrEqual(40); + expect(isValidCloudCodeAssistToolId(a.id as string)).toBe(true); + expect(isValidCloudCodeAssistToolId(b.id as string)).toBe(true); + + const r1 = out[1] as Extract; + const r2 = out[2] as Extract; + expect(r1.toolCallId).toBe(a.id); + expect(r2.toolCallId).toBe(b.id); + }); }); - it("strips underscores from tool call IDs (Mistral/OpenRouter compatibility)", () => { - const input = [ - { - role: "assistant", - content: [ - { type: "toolCall", id: "whatsapp_login_1768799841527_1", name: "login", arguments: {} }, - ], - }, - { - role: "toolResult", - toolCallId: "whatsapp_login_1768799841527_1", - toolName: "login", - content: [{ type: "text", text: "ok" }], - }, - ] satisfies AgentMessage[]; + describe("strict mode (for Mistral/OpenRouter)", () => { + it("strips underscores and hyphens from tool call IDs", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "whatsapp_login_1768799841527_1", name: "login", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "whatsapp_login_1768799841527_1", + toolName: "login", + content: [{ type: "text", text: "ok" }], + }, + ] satisfies AgentMessage[]; - const out = sanitizeToolCallIdsForCloudCodeAssist(input); - expect(out).not.toBe(input); + const out = sanitizeToolCallIdsForCloudCodeAssist(input, "strict"); + expect(out).not.toBe(input); - const assistant = out[0] as Extract; - const toolCall = assistant.content?.[0] as { id?: string }; - // ID should be alphanumeric only, no underscores - expect(toolCall.id).toBe("whatsapplogin17687998415271"); - expect(isValidCloudCodeAssistToolId(toolCall.id as string)).toBe(true); + const assistant = out[0] as Extract; + const toolCall = assistant.content?.[0] as { id?: string }; + // Strict mode strips all non-alphanumeric characters + expect(toolCall.id).toBe("whatsapplogin17687998415271"); + expect(isValidCloudCodeAssistToolId(toolCall.id as string, "strict")).toBe(true); - const result = out[1] as Extract; - expect(result.toolCallId).toBe(toolCall.id); - }); + const result = out[1] as Extract; + expect(result.toolCallId).toBe(toolCall.id); + }); - it("avoids collisions when sanitization would produce duplicate IDs", () => { - const input = [ - { - role: "assistant", - content: [ - { type: "toolCall", id: "call_a|b", name: "read", arguments: {} }, - { type: "toolCall", id: "call_a:b", name: "read", arguments: {} }, - ], - }, - { - role: "toolResult", - toolCallId: "call_a|b", - toolName: "read", - content: [{ type: "text", text: "one" }], - }, - { - role: "toolResult", - toolCallId: "call_a:b", - toolName: "read", - content: [{ type: "text", text: "two" }], - }, - ] satisfies AgentMessage[]; + it("avoids collisions with alphanumeric-only suffixes", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_a|b", name: "read", arguments: {} }, + { type: "toolCall", id: "call_a:b", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_a|b", + toolName: "read", + content: [{ type: "text", text: "one" }], + }, + { + role: "toolResult", + toolCallId: "call_a:b", + toolName: "read", + content: [{ type: "text", text: "two" }], + }, + ] satisfies AgentMessage[]; - const out = sanitizeToolCallIdsForCloudCodeAssist(input); - expect(out).not.toBe(input); + const out = sanitizeToolCallIdsForCloudCodeAssist(input, "strict"); + expect(out).not.toBe(input); - const assistant = out[0] as Extract; - const a = assistant.content?.[0] as { id?: string }; - const b = assistant.content?.[1] as { id?: string }; - expect(typeof a.id).toBe("string"); - expect(typeof b.id).toBe("string"); - expect(a.id).not.toBe(b.id); - expect(isValidCloudCodeAssistToolId(a.id as string)).toBe(true); - expect(isValidCloudCodeAssistToolId(b.id as string)).toBe(true); + const assistant = out[0] as Extract; + const a = assistant.content?.[0] as { id?: string }; + const b = assistant.content?.[1] as { id?: string }; + expect(typeof a.id).toBe("string"); + expect(typeof b.id).toBe("string"); + expect(a.id).not.toBe(b.id); + // Both should be strictly alphanumeric + expect(isValidCloudCodeAssistToolId(a.id as string, "strict")).toBe(true); + expect(isValidCloudCodeAssistToolId(b.id as string, "strict")).toBe(true); + // Should not contain underscores or hyphens + expect(a.id).not.toMatch(/[_-]/); + expect(b.id).not.toMatch(/[_-]/); - const r1 = out[1] as Extract; - const r2 = out[2] as Extract; - expect(r1.toolCallId).toBe(a.id); - expect(r2.toolCallId).toBe(b.id); - }); - - it("caps tool call IDs at 40 chars while preserving uniqueness", () => { - const longA = `call_${"a".repeat(60)}`; - const longB = `call_${"a".repeat(59)}b`; - const input = [ - { - role: "assistant", - content: [ - { type: "toolCall", id: longA, name: "read", arguments: {} }, - { type: "toolCall", id: longB, name: "read", arguments: {} }, - ], - }, - { - role: "toolResult", - toolCallId: longA, - toolName: "read", - content: [{ type: "text", text: "one" }], - }, - { - role: "toolResult", - toolCallId: longB, - toolName: "read", - content: [{ type: "text", text: "two" }], - }, - ] satisfies AgentMessage[]; - - const out = sanitizeToolCallIdsForCloudCodeAssist(input); - const assistant = out[0] as Extract; - const a = assistant.content?.[0] as { id?: string }; - const b = assistant.content?.[1] as { id?: string }; - - expect(typeof a.id).toBe("string"); - expect(typeof b.id).toBe("string"); - expect(a.id).not.toBe(b.id); - expect(a.id?.length).toBeLessThanOrEqual(40); - expect(b.id?.length).toBeLessThanOrEqual(40); - expect(isValidCloudCodeAssistToolId(a.id as string)).toBe(true); - expect(isValidCloudCodeAssistToolId(b.id as string)).toBe(true); - - const r1 = out[1] as Extract; - const r2 = out[2] as Extract; - expect(r1.toolCallId).toBe(a.id); - expect(r2.toolCallId).toBe(b.id); + const r1 = out[1] as Extract; + const r2 = out[2] as Extract; + expect(r1.toolCallId).toBe(a.id); + expect(r2.toolCallId).toBe(b.id); + }); }); }); diff --git a/src/agents/tool-call-id.ts b/src/agents/tool-call-id.ts index 736454536..c2cd892cc 100644 --- a/src/agents/tool-call-id.ts +++ b/src/agents/tool-call-id.ts @@ -2,46 +2,70 @@ import { createHash } from "node:crypto"; import type { AgentMessage } from "@mariozechner/pi-agent-core"; -export function sanitizeToolCallId(id: string): string { - if (!id || typeof id !== "string") return "defaulttoolid"; +export type ToolCallIdMode = "standard" | "strict"; - // Some providers (e.g. Mistral via OpenRouter) require strictly alphanumeric tool call IDs. - // Strip all non-alphanumeric characters to ensure maximum compatibility. - const alphanumericOnly = id.replace(/[^a-zA-Z0-9]/g, ""); +/** + * Sanitize a tool call ID to be compatible with various providers. + * + * - "standard" mode: allows [a-zA-Z0-9_-], better readability (default) + * - "strict" mode: only [a-zA-Z0-9], required for Mistral via OpenRouter + */ +export function sanitizeToolCallId(id: string, mode: ToolCallIdMode = "standard"): string { + if (!id || typeof id !== "string") { + return mode === "strict" ? "defaulttoolid" : "default_tool_id"; + } - return alphanumericOnly.length > 0 ? alphanumericOnly : "sanitizedtoolid"; + if (mode === "strict") { + // Some providers (e.g. Mistral via OpenRouter) require strictly alphanumeric tool call IDs. + const alphanumericOnly = id.replace(/[^a-zA-Z0-9]/g, ""); + return alphanumericOnly.length > 0 ? alphanumericOnly : "sanitizedtoolid"; + } + + // Standard mode: allow underscores and hyphens for better readability in logs + const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_"); + const trimmed = sanitized.replace(/^[^a-zA-Z0-9_-]+/, ""); + return trimmed.length > 0 ? trimmed : "sanitized_tool_id"; } -export function isValidCloudCodeAssistToolId(id: string): boolean { +export function isValidCloudCodeAssistToolId(id: string, mode: ToolCallIdMode = "standard"): boolean { if (!id || typeof id !== "string") return false; - // Strictly alphanumeric for maximum provider compatibility (e.g. Mistral via OpenRouter). - return /^[a-zA-Z0-9]+$/.test(id); + if (mode === "strict") { + // Strictly alphanumeric for providers like Mistral via OpenRouter + return /^[a-zA-Z0-9]+$/.test(id); + } + // Standard mode allows underscores and hyphens + return /^[a-zA-Z0-9_-]+$/.test(id); } function shortHash(text: string): string { return createHash("sha1").update(text).digest("hex").slice(0, 8); } -function makeUniqueToolId(params: { id: string; used: Set }): string { +function makeUniqueToolId(params: { + id: string; + used: Set; + mode: ToolCallIdMode; +}): string { const MAX_LEN = 40; - const base = sanitizeToolCallId(params.id).slice(0, MAX_LEN); + const base = sanitizeToolCallId(params.id, params.mode).slice(0, MAX_LEN); if (!params.used.has(base)) return base; - // Use alphanumeric-only suffixes to maintain strict compatibility. const hash = shortHash(params.id); - const maxBaseLen = MAX_LEN - hash.length; + // Use separator based on mode: underscore for standard (readable), none for strict + const separator = params.mode === "strict" ? "" : "_"; + const maxBaseLen = MAX_LEN - separator.length - hash.length; const clippedBase = base.length > maxBaseLen ? base.slice(0, maxBaseLen) : base; - const candidate = `${clippedBase}${hash}`; + const candidate = `${clippedBase}${separator}${hash}`; if (!params.used.has(candidate)) return candidate; for (let i = 2; i < 1000; i += 1) { - const suffix = `x${i}`; + const suffix = params.mode === "strict" ? `x${i}` : `_${i}`; const next = `${candidate.slice(0, MAX_LEN - suffix.length)}${suffix}`; if (!params.used.has(next)) return next; } - const ts = `t${Date.now()}`; + const ts = params.mode === "strict" ? `t${Date.now()}` : `_${Date.now()}`; return `${candidate.slice(0, MAX_LEN - ts.length)}${ts}`; } @@ -100,18 +124,27 @@ function rewriteToolResultIds(params: { } as Extract; } -export function sanitizeToolCallIdsForCloudCodeAssist(messages: AgentMessage[]): AgentMessage[] { - // Some providers (e.g. Mistral via OpenRouter) require strictly alphanumeric tool IDs. - // Use ^[a-zA-Z0-9]+$ pattern for maximum compatibility across all providers. - // Sanitization can introduce collisions (e.g. `a|b` and `a:b` -> `ab`). - // Fix by applying a stable, transcript-wide mapping and de-duping via hash suffix. +/** + * Sanitize tool call IDs for provider compatibility. + * + * @param messages - The messages to sanitize + * @param mode - "standard" (default, allows _-) or "strict" (alphanumeric only for Mistral/OpenRouter) + */ +export function sanitizeToolCallIdsForCloudCodeAssist( + messages: AgentMessage[], + mode: ToolCallIdMode = "standard", +): AgentMessage[] { + // Standard mode: allows [a-zA-Z0-9_-] for better readability in session logs + // Strict mode: only [a-zA-Z0-9] for providers like Mistral via OpenRouter + // Sanitization can introduce collisions (e.g. `a|b` and `a:b` -> `a_b` or `ab`). + // Fix by applying a stable, transcript-wide mapping and de-duping via suffix. const map = new Map(); const used = new Set(); const resolve = (id: string) => { const existing = map.get(id); if (existing) return existing; - const next = makeUniqueToolId({ id, used }); + const next = makeUniqueToolId({ id, used, mode }); map.set(id, next); used.add(next); return next; From 0704fe7dbbf1a204724a9bb98ddc1037d0258129 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 22 Jan 2026 00:38:48 +0000 Subject: [PATCH 3/4] fix: enforce Mistral tool call ids (#1372) (thanks @zerone0x) --- CHANGELOG.md | 1 + ...ool-call-tool-result-ids-unchanged.test.ts | 2 +- ...ssistant-text-blocks-but-preserves.test.ts | 2 +- ...mbedded-helpers.sanitizetoolcallid.test.ts | 12 +++- src/agents/pi-embedded-helpers/images.ts | 7 ++- ...ed-runner.sanitize-session-history.test.ts | 19 ++++++ src/agents/pi-embedded-runner/google.ts | 27 ++++++++- src/agents/tool-call-id.test.ts | 59 +++++++++++++++++-- src/agents/tool-call-id.ts | 55 ++++++++++++----- ...ists-allowlisted-models-model-list.test.ts | 13 ++-- ...tches-fuzzy-selection-is-ambiguous.test.ts | 11 ++-- ...uzzy-model-matches-model-directive.test.ts | 33 ++++++----- ...l-verbose-during-flight-run-toggle.test.ts | 3 +- 13 files changed, 193 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88375f48b..23dc120e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Docs: https://docs.clawd.bot ### Fixes - Gateway: keep auto bind loopback-first and add explicit tailnet binding to avoid Tailscale taking over local UI. (#1380) +- Agents: enforce 9-char alphanumeric tool call ids for Mistral providers. (#1372) Thanks @zerone0x. - Embedded runner: persist injected history images so attachments aren’t reloaded each turn. (#1374) Thanks @Nicell. - Nodes tool: include agent/node/gateway context in tool failure logs to speed approval debugging. - macOS: exec approvals now respect wildcard agent allowlists (`*`). diff --git a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts index a715dacfb..377bc53fe 100644 --- a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts +++ b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.keeps-tool-call-tool-result-ids-unchanged.test.ts @@ -119,7 +119,7 @@ describe("sanitizeSessionMessagesImages", () => { const toolCall = (assistant.content as Array<{ type?: string; id?: string }>).find( (b) => b.type === "toolCall", ); - // Strict mode strips all non-alphanumeric characters for Mistral/OpenRouter compatibility + // Strict mode strips all non-alphanumeric characters expect(toolCall?.id).toBe("call123fc456"); const toolResult = out[1] as unknown as { diff --git a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts index 6ce18c3fc..6c12dfacf 100644 --- a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts +++ b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.test.ts @@ -83,7 +83,7 @@ describe("sanitizeSessionMessagesImages", () => { toolCallIdMode: "strict", }); - // Strict mode strips all non-alphanumeric characters for Mistral/OpenRouter compatibility + // Strict mode strips all non-alphanumeric characters const assistant = out[0] as { content?: Array<{ id?: string }> }; expect(assistant.content?.[0]?.id).toBe("callabcitem123"); expect(assistant.content?.[1]?.id).toBe("callabcitem456"); diff --git a/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts b/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts index b4c6e9396..9569afbf3 100644 --- a/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts +++ b/src/agents/pi-embedded-helpers.sanitizetoolcallid.test.ts @@ -18,7 +18,7 @@ describe("sanitizeToolCallId", () => { }); }); - describe("strict mode (for Mistral/OpenRouter)", () => { + describe("strict mode (alphanumeric only)", () => { it("strips all non-alphanumeric characters", () => { expect(sanitizeToolCallId("call_abc-123", "strict")).toBe("callabc123"); expect(sanitizeToolCallId("call_abc|item:456", "strict")).toBe("callabcitem456"); @@ -30,4 +30,14 @@ describe("sanitizeToolCallId", () => { expect(sanitizeToolCallId("", "strict")).toBe("defaulttoolid"); }); }); + + describe("strict9 mode (Mistral tool call IDs)", () => { + it("returns alphanumeric IDs with length 9", () => { + const out = sanitizeToolCallId("call_abc|item:456", "strict9"); + expect(out).toMatch(/^[a-zA-Z0-9]{9}$/); + }); + it("returns default for empty IDs", () => { + expect(sanitizeToolCallId("", "strict9")).toMatch(/^[a-zA-Z0-9]{9}$/); + }); + }); }); diff --git a/src/agents/pi-embedded-helpers/images.ts b/src/agents/pi-embedded-helpers/images.ts index 862067b26..13376e68c 100644 --- a/src/agents/pi-embedded-helpers/images.ts +++ b/src/agents/pi-embedded-helpers/images.ts @@ -33,7 +33,12 @@ export async function sanitizeSessionMessagesImages( label: string, options?: { sanitizeToolCallIds?: boolean; - /** Mode for tool call ID sanitization: "standard" (default, preserves _-) or "strict" (alphanumeric only) */ + /** + * Mode for tool call ID sanitization: + * - "standard" (default, preserves _-) + * - "strict" (alphanumeric only) + * - "strict9" (alphanumeric only, length 9) + */ toolCallIdMode?: ToolCallIdMode; enforceToolCallLast?: boolean; preserveSignatures?: boolean; diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index 87c149d9a..c7a9b3d05 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -54,6 +54,25 @@ describe("sanitizeSessionHistory", () => { ); }); + it("sanitizes tool call ids with strict9 for Mistral models", async () => { + vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false); + + await sanitizeSessionHistory({ + messages: mockMessages, + modelApi: "openai-responses", + provider: "openrouter", + modelId: "mistralai/devstral-2512:free", + sessionManager: mockSessionManager, + sessionId: "test-session", + }); + + expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith( + mockMessages, + "session:history", + expect.objectContaining({ sanitizeToolCallIds: true, toolCallIdMode: "strict9" }), + ); + }); + it("does not sanitize tool call ids for non-Google, non-OpenAI APIs", async () => { vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false); diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts index 558098c20..6050090a4 100644 --- a/src/agents/pi-embedded-runner/google.ts +++ b/src/agents/pi-embedded-runner/google.ts @@ -14,6 +14,8 @@ import { log } from "./logger.js"; import { describeUnknownError } from "./utils.js"; import { isAntigravityClaude } from "../pi-embedded-helpers/google.js"; import { cleanToolSchemaForGemini } from "../pi-tools.schema.js"; +import { normalizeProviderId } from "../model-selection.js"; +import type { ToolCallIdMode } from "../tool-call-id.js"; const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap"; const GOOGLE_SCHEMA_UNSUPPORTED_KEYWORDS = new Set([ @@ -44,12 +46,29 @@ const OPENAI_TOOL_CALL_ID_APIS = new Set([ "openai-responses", "openai-codex-responses", ]); +const MISTRAL_MODEL_HINTS = [ + "mistral", + "mixtral", + "codestral", + "pixtral", + "devstral", + "ministral", + "mistralai", +]; function shouldSanitizeToolCallIds(modelApi?: string | null): boolean { if (!modelApi) return false; return isGoogleModelApi(modelApi) || OPENAI_TOOL_CALL_ID_APIS.has(modelApi); } +function isMistralModel(params: { provider?: string | null; modelId?: string | null }): boolean { + const provider = normalizeProviderId(params.provider ?? ""); + if (provider === "mistral") return true; + const modelId = (params.modelId ?? "").toLowerCase(); + if (!modelId) return false; + return MISTRAL_MODEL_HINTS.some((hint) => modelId.includes(hint)); +} + function findUnsupportedSchemaKeywords(schema: unknown, path: string): string[] { if (!schema || typeof schema !== "object") return []; if (Array.isArray(schema)) { @@ -191,12 +210,16 @@ export async function sanitizeSessionHistory(params: { sessionId: string; }): Promise { const isAntigravityClaudeModel = isAntigravityClaude(params.modelApi, params.modelId); - const provider = (params.provider ?? "").toLowerCase(); + const provider = normalizeProviderId(params.provider ?? ""); const modelId = (params.modelId ?? "").toLowerCase(); const isOpenRouterGemini = (provider === "openrouter" || provider === "opencode") && modelId.includes("gemini"); + const isMistral = isMistralModel({ provider, modelId }); + const toolCallIdMode: ToolCallIdMode | undefined = isMistral ? "strict9" : undefined; + const sanitizeToolCallIds = shouldSanitizeToolCallIds(params.modelApi) || isMistral; const sanitizedImages = await sanitizeSessionMessagesImages(params.messages, "session:history", { - sanitizeToolCallIds: shouldSanitizeToolCallIds(params.modelApi), + sanitizeToolCallIds, + toolCallIdMode, enforceToolCallLast: params.modelApi === "anthropic-messages", preserveSignatures: params.modelApi === "google-antigravity" && isAntigravityClaudeModel, sanitizeThoughtSignatures: isOpenRouterGemini diff --git a/src/agents/tool-call-id.test.ts b/src/agents/tool-call-id.test.ts index a76fa93d7..897bdf512 100644 --- a/src/agents/tool-call-id.test.ts +++ b/src/agents/tool-call-id.test.ts @@ -30,9 +30,7 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => { const input = [ { role: "assistant", - content: [ - { type: "toolCall", id: "call|item:123", name: "read", arguments: {} }, - ], + content: [{ type: "toolCall", id: "call|item:123", name: "read", arguments: {} }], }, { role: "toolResult", @@ -141,13 +139,18 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => { }); }); - describe("strict mode (for Mistral/OpenRouter)", () => { + describe("strict mode (alphanumeric only)", () => { it("strips underscores and hyphens from tool call IDs", () => { const input = [ { role: "assistant", content: [ - { type: "toolCall", id: "whatsapp_login_1768799841527_1", name: "login", arguments: {} }, + { + type: "toolCall", + id: "whatsapp_login_1768799841527_1", + name: "login", + arguments: {}, + }, ], }, { @@ -216,4 +219,50 @@ describe("sanitizeToolCallIdsForCloudCodeAssist", () => { expect(r2.toolCallId).toBe(b.id); }); }); + + describe("strict9 mode (Mistral tool call IDs)", () => { + it("enforces alphanumeric IDs with length 9", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_abc|item:123", name: "read", arguments: {} }, + { type: "toolCall", id: "call_abc|item:456", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_abc|item:123", + toolName: "read", + content: [{ type: "text", text: "one" }], + }, + { + role: "toolResult", + toolCallId: "call_abc|item:456", + toolName: "read", + content: [{ type: "text", text: "two" }], + }, + ] satisfies AgentMessage[]; + + const out = sanitizeToolCallIdsForCloudCodeAssist(input, "strict9"); + expect(out).not.toBe(input); + + const assistant = out[0] as Extract; + const a = assistant.content?.[0] as { id?: string }; + const b = assistant.content?.[1] as { id?: string }; + + expect(typeof a.id).toBe("string"); + expect(typeof b.id).toBe("string"); + expect(a.id).not.toBe(b.id); + expect(a.id?.length).toBe(9); + expect(b.id?.length).toBe(9); + expect(isValidCloudCodeAssistToolId(a.id as string, "strict9")).toBe(true); + expect(isValidCloudCodeAssistToolId(b.id as string, "strict9")).toBe(true); + + const r1 = out[1] as Extract; + const r2 = out[2] as Extract; + expect(r1.toolCallId).toBe(a.id); + expect(r2.toolCallId).toBe(b.id); + }); + }); }); diff --git a/src/agents/tool-call-id.ts b/src/agents/tool-call-id.ts index c2cd892cc..d5d3cf078 100644 --- a/src/agents/tool-call-id.ts +++ b/src/agents/tool-call-id.ts @@ -2,50 +2,76 @@ import { createHash } from "node:crypto"; import type { AgentMessage } from "@mariozechner/pi-agent-core"; -export type ToolCallIdMode = "standard" | "strict"; +export type ToolCallIdMode = "standard" | "strict" | "strict9"; + +const STRICT9_LEN = 9; /** * Sanitize a tool call ID to be compatible with various providers. * * - "standard" mode: allows [a-zA-Z0-9_-], better readability (default) - * - "strict" mode: only [a-zA-Z0-9], required for Mistral via OpenRouter + * - "strict" mode: only [a-zA-Z0-9] + * - "strict9" mode: only [a-zA-Z0-9], length 9 (Mistral tool call requirement) */ export function sanitizeToolCallId(id: string, mode: ToolCallIdMode = "standard"): string { if (!id || typeof id !== "string") { + if (mode === "strict9") return "defaultid"; return mode === "strict" ? "defaulttoolid" : "default_tool_id"; } if (mode === "strict") { - // Some providers (e.g. Mistral via OpenRouter) require strictly alphanumeric tool call IDs. + // Some providers require strictly alphanumeric tool call IDs. const alphanumericOnly = id.replace(/[^a-zA-Z0-9]/g, ""); return alphanumericOnly.length > 0 ? alphanumericOnly : "sanitizedtoolid"; } + if (mode === "strict9") { + const alphanumericOnly = id.replace(/[^a-zA-Z0-9]/g, ""); + if (alphanumericOnly.length >= STRICT9_LEN) return alphanumericOnly.slice(0, STRICT9_LEN); + if (alphanumericOnly.length > 0) return shortHash(alphanumericOnly, STRICT9_LEN); + return shortHash("sanitized", STRICT9_LEN); + } + // Standard mode: allow underscores and hyphens for better readability in logs const sanitized = id.replace(/[^a-zA-Z0-9_-]/g, "_"); const trimmed = sanitized.replace(/^[^a-zA-Z0-9_-]+/, ""); return trimmed.length > 0 ? trimmed : "sanitized_tool_id"; } -export function isValidCloudCodeAssistToolId(id: string, mode: ToolCallIdMode = "standard"): boolean { +export function isValidCloudCodeAssistToolId( + id: string, + mode: ToolCallIdMode = "standard", +): boolean { if (!id || typeof id !== "string") return false; if (mode === "strict") { - // Strictly alphanumeric for providers like Mistral via OpenRouter + // Strictly alphanumeric for providers with tighter tool ID constraints return /^[a-zA-Z0-9]+$/.test(id); } + if (mode === "strict9") { + return /^[a-zA-Z0-9]{9}$/.test(id); + } // Standard mode allows underscores and hyphens return /^[a-zA-Z0-9_-]+$/.test(id); } -function shortHash(text: string): string { - return createHash("sha1").update(text).digest("hex").slice(0, 8); +function shortHash(text: string, length = 8): string { + return createHash("sha1").update(text).digest("hex").slice(0, length); } -function makeUniqueToolId(params: { - id: string; - used: Set; - mode: ToolCallIdMode; -}): string { +function makeUniqueToolId(params: { id: string; used: Set; mode: ToolCallIdMode }): string { + if (params.mode === "strict9") { + const base = sanitizeToolCallId(params.id, params.mode); + const candidate = base.length >= STRICT9_LEN ? base.slice(0, STRICT9_LEN) : ""; + if (candidate && !params.used.has(candidate)) return candidate; + + for (let i = 0; i < 1000; i += 1) { + const hashed = shortHash(`${params.id}:${i}`, STRICT9_LEN); + if (!params.used.has(hashed)) return hashed; + } + + return shortHash(`${params.id}:${Date.now()}`, STRICT9_LEN); + } + const MAX_LEN = 40; const base = sanitizeToolCallId(params.id, params.mode).slice(0, MAX_LEN); @@ -128,14 +154,15 @@ function rewriteToolResultIds(params: { * Sanitize tool call IDs for provider compatibility. * * @param messages - The messages to sanitize - * @param mode - "standard" (default, allows _-) or "strict" (alphanumeric only for Mistral/OpenRouter) + * @param mode - "standard" (default, allows _-), "strict" (alphanumeric only), or "strict9" (alphanumeric length 9) */ export function sanitizeToolCallIdsForCloudCodeAssist( messages: AgentMessage[], mode: ToolCallIdMode = "standard", ): AgentMessage[] { // Standard mode: allows [a-zA-Z0-9_-] for better readability in session logs - // Strict mode: only [a-zA-Z0-9] for providers like Mistral via OpenRouter + // Strict mode: only [a-zA-Z0-9] + // Strict9 mode: only [a-zA-Z0-9], length 9 (Mistral tool call requirement) // Sanitization can introduce collisions (e.g. `a|b` and `a:b` -> `a_b` or `ab`). // Fix by applying a stable, transcript-wide mapping and de-duping via suffix. const map = new Map(); diff --git a/src/auto-reply/reply.directive.directive-behavior.lists-allowlisted-models-model-list.test.ts b/src/auto-reply/reply.directive.directive-behavior.lists-allowlisted-models-model-list.test.ts index 45a994432..2b42977cb 100644 --- a/src/auto-reply/reply.directive.directive-behavior.lists-allowlisted-models-model-list.test.ts +++ b/src/auto-reply/reply.directive.directive-behavior.lists-allowlisted-models-model-list.test.ts @@ -60,7 +60,7 @@ describe("directive behavior", () => { vi.restoreAllMocks(); }); - it("lists allowlisted models on /model list", async () => { + it("moves /model list to /models", async () => { await withTempHome(async (home) => { vi.mocked(runEmbeddedPiAgent).mockReset(); const storePath = path.join(home, "sessions.json"); @@ -90,7 +90,7 @@ describe("directive behavior", () => { expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); - it("falls back to configured models when catalog is unavailable", async () => { + it("shows summary on /model when catalog is unavailable", async () => { await withTempHome(async (home) => { vi.mocked(runEmbeddedPiAgent).mockReset(); vi.mocked(loadModelCatalog).mockResolvedValueOnce([]); @@ -116,12 +116,13 @@ describe("directive behavior", () => { const text = Array.isArray(res) ? res[0]?.text : res?.text; expect(text).toContain("Current: anthropic/claude-opus-4-5"); + expect(text).toContain("Switch: /model "); expect(text).toContain("Browse: /models (providers) or /models (models)"); expect(text).toContain("More: /model status"); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); - it("includes catalog models when no allowlist is set", async () => { + it("moves /model list to /models even when no allowlist is set", async () => { await withTempHome(async (home) => { vi.mocked(runEmbeddedPiAgent).mockReset(); vi.mocked(loadModelCatalog).mockResolvedValueOnce([ @@ -156,7 +157,7 @@ describe("directive behavior", () => { expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); - it("merges config allowlist models even when catalog is present", async () => { + it("moves /model list to /models even when catalog is present", async () => { await withTempHome(async (home) => { vi.mocked(runEmbeddedPiAgent).mockReset(); // Catalog present but missing custom providers: /model should still include @@ -207,7 +208,7 @@ describe("directive behavior", () => { expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); - it("does not repeat missing auth labels on /model list", async () => { + it("moves /model list to /models without listing auth labels", async () => { await withTempHome(async (home) => { vi.mocked(runEmbeddedPiAgent).mockReset(); const storePath = path.join(home, "sessions.json"); @@ -231,6 +232,8 @@ describe("directive behavior", () => { const text = Array.isArray(res) ? res[0]?.text : res?.text; expect(text).toContain("Model listing moved."); + expect(text).toContain("Use: /models (providers) or /models (models)"); + expect(text).toContain("Switch: /model "); expect(text).not.toContain("missing (missing)"); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); diff --git a/src/auto-reply/reply.directive.directive-behavior.prefers-alias-matches-fuzzy-selection-is-ambiguous.test.ts b/src/auto-reply/reply.directive.directive-behavior.prefers-alias-matches-fuzzy-selection-is-ambiguous.test.ts index e42ced2db..271027882 100644 --- a/src/auto-reply/reply.directive.directive-behavior.prefers-alias-matches-fuzzy-selection-is-ambiguous.test.ts +++ b/src/auto-reply/reply.directive.directive-behavior.prefers-alias-matches-fuzzy-selection-is-ambiguous.test.ts @@ -67,7 +67,7 @@ describe("directive behavior", () => { vi.mocked(runEmbeddedPiAgent).mockReset(); const storePath = path.join(home, "sessions.json"); - await getReplyFromConfig( + const res = await getReplyFromConfig( { Body: "/model ki", From: "+1222", To: "+1222", CommandAuthorized: true }, {}, { @@ -103,10 +103,11 @@ describe("directive behavior", () => { }, ); - assertModelSelection(storePath, { - model: "kimi-k2-0905-preview", - provider: "moonshot", - }); + const text = Array.isArray(res) ? res[0]?.text : res?.text; + expect(text).toContain("Unrecognized model: ki"); + expect(text).toContain("Did you mean: moonshot/kimi-k2-0905-preview"); + expect(text).toContain("Try: /model moonshot/kimi-k2-0905-preview"); + assertModelSelection(storePath); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); diff --git a/src/auto-reply/reply.directive.directive-behavior.supports-fuzzy-model-matches-model-directive.test.ts b/src/auto-reply/reply.directive.directive-behavior.supports-fuzzy-model-matches-model-directive.test.ts index c78919f97..cc7f54b22 100644 --- a/src/auto-reply/reply.directive.directive-behavior.supports-fuzzy-model-matches-model-directive.test.ts +++ b/src/auto-reply/reply.directive.directive-behavior.supports-fuzzy-model-matches-model-directive.test.ts @@ -65,7 +65,7 @@ describe("directive behavior", () => { vi.mocked(runEmbeddedPiAgent).mockReset(); const storePath = path.join(home, "sessions.json"); - await getReplyFromConfig( + const res = await getReplyFromConfig( { Body: "/model kimi", From: "+1222", To: "+1222", CommandAuthorized: true }, {}, { @@ -94,10 +94,11 @@ describe("directive behavior", () => { }, ); - assertModelSelection(storePath, { - model: "kimi-k2-0905-preview", - provider: "moonshot", - }); + const text = Array.isArray(res) ? res[0]?.text : res?.text; + expect(text).toContain("Unrecognized model: kimi"); + expect(text).toContain("Did you mean: moonshot/kimi-k2-0905-preview"); + expect(text).toContain("Try: /model moonshot/kimi-k2-0905-preview"); + assertModelSelection(storePath); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); @@ -106,7 +107,7 @@ describe("directive behavior", () => { vi.mocked(runEmbeddedPiAgent).mockReset(); const storePath = path.join(home, "sessions.json"); - await getReplyFromConfig( + const res = await getReplyFromConfig( { Body: "/model kimi-k2-0905-preview", From: "+1222", @@ -140,10 +141,11 @@ describe("directive behavior", () => { }, ); - assertModelSelection(storePath, { - model: "kimi-k2-0905-preview", - provider: "moonshot", - }); + const text = Array.isArray(res) ? res[0]?.text : res?.text; + expect(text).toContain("Unrecognized model: kimi-k2-0905-preview"); + expect(text).toContain("Did you mean: moonshot/kimi-k2-0905-preview"); + expect(text).toContain("Try: /model moonshot/kimi-k2-0905-preview"); + assertModelSelection(storePath); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); @@ -152,7 +154,7 @@ describe("directive behavior", () => { vi.mocked(runEmbeddedPiAgent).mockReset(); const storePath = path.join(home, "sessions.json"); - await getReplyFromConfig( + const res = await getReplyFromConfig( { Body: "/model moonshot/kimi", From: "+1222", To: "+1222", CommandAuthorized: true }, {}, { @@ -181,10 +183,11 @@ describe("directive behavior", () => { }, ); - assertModelSelection(storePath, { - model: "kimi-k2-0905-preview", - provider: "moonshot", - }); + const text = Array.isArray(res) ? res[0]?.text : res?.text; + expect(text).toContain("Unrecognized model: moonshot/kimi"); + expect(text).toContain("Did you mean: moonshot/kimi-k2-0905-preview"); + expect(text).toContain("Try: /model moonshot/kimi-k2-0905-preview"); + assertModelSelection(storePath); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); diff --git a/src/auto-reply/reply.directive.directive-behavior.updates-tool-verbose-during-flight-run-toggle.test.ts b/src/auto-reply/reply.directive.directive-behavior.updates-tool-verbose-during-flight-run-toggle.test.ts index c271adef3..c3d6a5fdd 100644 --- a/src/auto-reply/reply.directive.directive-behavior.updates-tool-verbose-during-flight-run-toggle.test.ts +++ b/src/auto-reply/reply.directive.directive-behavior.updates-tool-verbose-during-flight-run-toggle.test.ts @@ -187,7 +187,7 @@ describe("directive behavior", () => { expect(runEmbeddedPiAgent).toHaveBeenCalledOnce(); }); }); - it("lists allowlisted models on /model", async () => { + it("shows summary on /model", async () => { await withTempHome(async (home) => { vi.mocked(runEmbeddedPiAgent).mockReset(); const storePath = path.join(home, "sessions.json"); @@ -212,6 +212,7 @@ describe("directive behavior", () => { const text = Array.isArray(res) ? res[0]?.text : res?.text; expect(text).toContain("Current: anthropic/claude-opus-4-5"); + expect(text).toContain("Switch: /model "); expect(text).toContain("Browse: /models (providers) or /models (models)"); expect(text).toContain("More: /model status"); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); From f5cec1dd8bb8587967c53abc7fdbbeabbb0e3e1f Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 22 Jan 2026 01:16:59 +0000 Subject: [PATCH 4/4] test: update fuzzy model selection expectations (#1372) (thanks @zerone0x) --- ...tches-fuzzy-selection-is-ambiguous.test.ts | 9 ++++--- ...uzzy-model-matches-model-directive.test.ts | 27 ++++++++++--------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/auto-reply/reply.directive.directive-behavior.prefers-alias-matches-fuzzy-selection-is-ambiguous.test.ts b/src/auto-reply/reply.directive.directive-behavior.prefers-alias-matches-fuzzy-selection-is-ambiguous.test.ts index 271027882..b636b85d6 100644 --- a/src/auto-reply/reply.directive.directive-behavior.prefers-alias-matches-fuzzy-selection-is-ambiguous.test.ts +++ b/src/auto-reply/reply.directive.directive-behavior.prefers-alias-matches-fuzzy-selection-is-ambiguous.test.ts @@ -104,10 +104,11 @@ describe("directive behavior", () => { ); const text = Array.isArray(res) ? res[0]?.text : res?.text; - expect(text).toContain("Unrecognized model: ki"); - expect(text).toContain("Did you mean: moonshot/kimi-k2-0905-preview"); - expect(text).toContain("Try: /model moonshot/kimi-k2-0905-preview"); - assertModelSelection(storePath); + expect(text).toContain("Model set to Kimi (moonshot/kimi-k2-0905-preview)."); + assertModelSelection(storePath, { + provider: "moonshot", + model: "kimi-k2-0905-preview", + }); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); diff --git a/src/auto-reply/reply.directive.directive-behavior.supports-fuzzy-model-matches-model-directive.test.ts b/src/auto-reply/reply.directive.directive-behavior.supports-fuzzy-model-matches-model-directive.test.ts index cc7f54b22..2f0a6cc21 100644 --- a/src/auto-reply/reply.directive.directive-behavior.supports-fuzzy-model-matches-model-directive.test.ts +++ b/src/auto-reply/reply.directive.directive-behavior.supports-fuzzy-model-matches-model-directive.test.ts @@ -95,10 +95,11 @@ describe("directive behavior", () => { ); const text = Array.isArray(res) ? res[0]?.text : res?.text; - expect(text).toContain("Unrecognized model: kimi"); - expect(text).toContain("Did you mean: moonshot/kimi-k2-0905-preview"); - expect(text).toContain("Try: /model moonshot/kimi-k2-0905-preview"); - assertModelSelection(storePath); + expect(text).toContain("Model set to moonshot/kimi-k2-0905-preview."); + assertModelSelection(storePath, { + provider: "moonshot", + model: "kimi-k2-0905-preview", + }); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); @@ -142,10 +143,11 @@ describe("directive behavior", () => { ); const text = Array.isArray(res) ? res[0]?.text : res?.text; - expect(text).toContain("Unrecognized model: kimi-k2-0905-preview"); - expect(text).toContain("Did you mean: moonshot/kimi-k2-0905-preview"); - expect(text).toContain("Try: /model moonshot/kimi-k2-0905-preview"); - assertModelSelection(storePath); + expect(text).toContain("Model set to moonshot/kimi-k2-0905-preview."); + assertModelSelection(storePath, { + provider: "moonshot", + model: "kimi-k2-0905-preview", + }); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); }); @@ -184,10 +186,11 @@ describe("directive behavior", () => { ); const text = Array.isArray(res) ? res[0]?.text : res?.text; - expect(text).toContain("Unrecognized model: moonshot/kimi"); - expect(text).toContain("Did you mean: moonshot/kimi-k2-0905-preview"); - expect(text).toContain("Try: /model moonshot/kimi-k2-0905-preview"); - assertModelSelection(storePath); + expect(text).toContain("Model set to moonshot/kimi-k2-0905-preview."); + assertModelSelection(storePath, { + provider: "moonshot", + model: "kimi-k2-0905-preview", + }); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); });