fix(agents): sanitize transcripts for strict tool APIs

This commit is contained in:
Peter Steinberger
2026-01-11 04:46:18 +00:00
parent f00038b383
commit dbe156e881
5 changed files with 279 additions and 72 deletions

View File

@@ -12,6 +12,11 @@ import {
} from "../auto-reply/thinking.js";
import type { ClawdbotConfig } from "../config/config.js";
import { formatSandboxToolPolicyBlockedMessage } from "./sandbox.js";
import {
isValidCloudCodeAssistToolId,
sanitizeToolCallId,
sanitizeToolCallIdsForCloudCodeAssist,
} from "./tool-call-id.js";
import { sanitizeContentBlocksImages } from "./tool-images.js";
import type { WorkspaceBootstrapFile } from "./workspace.js";
@@ -90,8 +95,11 @@ export async function sanitizeSessionMessagesImages(
): Promise<AgentMessage[]> {
// We sanitize historical session messages because Anthropic can reject a request
// if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX).
const sanitizedIds = options?.sanitizeToolCallIds
? sanitizeToolCallIdsForCloudCodeAssist(messages)
: messages;
const out: AgentMessage[] = [];
for (const msg of messages) {
for (const msg of sanitizedIds) {
if (!msg || typeof msg !== "object") {
out.push(msg);
continue;
@@ -105,28 +113,7 @@ export async function sanitizeSessionMessagesImages(
content as ContentBlock[],
label,
)) as unknown as typeof toolMsg.content;
const sanitizedToolCallId =
options?.sanitizeToolCallIds && toolMsg.toolCallId
? sanitizeToolCallId(toolMsg.toolCallId)
: undefined;
const toolUseId = (toolMsg as { toolUseId?: unknown }).toolUseId;
const sanitizedToolUseId =
options?.sanitizeToolCallIds &&
typeof toolUseId === "string" &&
toolUseId
? sanitizeToolCallId(toolUseId)
: undefined;
const sanitizedMsg = {
...toolMsg,
content: nextContent,
...(sanitizedToolCallId && {
toolCallId: sanitizedToolCallId,
}),
...(sanitizedToolUseId && {
toolUseId: sanitizedToolUseId,
}),
};
out.push(sanitizedMsg);
out.push({ ...toolMsg, content: nextContent });
continue;
}
@@ -176,33 +163,8 @@ export async function sanitizeSessionMessagesImages(
return filteredContent.slice(0, lastToolIndex + 1);
})()
: filteredContent;
const sanitizedContent = options?.sanitizeToolCallIds
? await Promise.all(
normalizedContent.map(async (block) => {
if (!block || typeof block !== "object") return block;
const type = (block as { type?: unknown }).type;
const id = (block as { id?: unknown }).id;
if (typeof id !== "string" || !id) return block;
// Cloud Code Assist tool blocks require ids matching ^[a-zA-Z0-9_-]+$.
if (
type === "functionCall" ||
type === "toolUse" ||
type === "toolCall"
) {
return {
...(block as unknown as Record<string, unknown>),
id: sanitizeToolCallId(id),
};
}
return block;
}),
)
: normalizedContent;
const finalContent = (await sanitizeContentBlocksImages(
sanitizedContent as unknown as ContentBlock[],
normalizedContent as unknown as ContentBlock[],
label,
)) as unknown as typeof assistantMsg.content;
if (finalContent.length === 0) {
@@ -621,25 +583,7 @@ export function isMessagingToolDuplicateNormalized(
// OpenAI Codex generates IDs like "call_abc123|item_456" with pipe characters,
// but Google requires IDs matching ^[a-zA-Z0-9_-]+$ pattern.
// This function sanitizes tool call IDs by replacing invalid characters with underscores.
export function sanitizeToolCallId(id: string): string {
if (!id || typeof id !== "string") return "default_tool_id";
const cloudCodeAssistPatternReplacement = id.replace(/[^a-zA-Z0-9_-]/g, "_");
const trimmedInvalidStartChars = cloudCodeAssistPatternReplacement.replace(
/^[^a-zA-Z0-9_-]+/,
"",
);
return trimmedInvalidStartChars.length > 0
? trimmedInvalidStartChars
: "sanitized_tool_id";
}
export function isValidCloudCodeAssistToolId(id: string): boolean {
if (!id || typeof id !== "string") return false;
return /^[a-zA-Z0-9_-]+$/.test(id);
}
export { sanitizeToolCallId, isValidCloudCodeAssistToolId };
export function isMessagingToolDuplicate(
text: string,

View File

@@ -236,6 +236,21 @@ function buildContextPruningExtension(params: {
};
}
function buildEmbeddedExtensionPaths(params: {
cfg: ClawdbotConfig | undefined;
sessionManager: SessionManager;
provider: string;
modelId: string;
model: Model<Api> | undefined;
}): string[] {
const paths = [resolvePiExtensionPath("transcript-sanitize")];
const pruning = buildContextPruningExtension(params);
if (pruning.additionalExtensionPaths) {
paths.push(...pruning.additionalExtensionPaths);
}
return paths;
}
export type EmbeddedPiAgentMeta = {
sessionId: string;
provider: string;
@@ -966,14 +981,13 @@ export async function compactEmbeddedPiSession(params: {
effectiveWorkspace,
agentDir,
);
const pruning = buildContextPruningExtension({
const additionalExtensionPaths = buildEmbeddedExtensionPaths({
cfg: params.config,
sessionManager,
provider,
modelId,
model,
});
const additionalExtensionPaths = pruning.additionalExtensionPaths;
const { builtInTools, customTools } = splitSdkTools({
tools,
@@ -1355,14 +1369,13 @@ export async function runEmbeddedPiAgent(params: {
effectiveWorkspace,
agentDir,
);
const pruning = buildContextPruningExtension({
const additionalExtensionPaths = buildEmbeddedExtensionPaths({
cfg: params.config,
sessionManager,
provider,
modelId,
model,
});
const additionalExtensionPaths = pruning.additionalExtensionPaths;
const { builtInTools, customTools } = splitSdkTools({
tools,

View File

@@ -0,0 +1,35 @@
/**
* Transcript repair/sanitization extension.
*
* Runs on every context build to prevent strict provider request rejections:
* - duplicate or displaced tool results (Anthropic-compatible APIs, MiniMax, Cloud Code Assist)
* - Cloud Code Assist tool call ID constraints + collision-safe sanitization
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type {
ContextEvent,
ExtensionAPI,
ExtensionContext,
} from "@mariozechner/pi-coding-agent";
import { isGoogleModelApi } from "../pi-embedded-helpers.js";
import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js";
import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js";
export default function transcriptSanitizeExtension(api: ExtensionAPI): void {
api.on("context", (event: ContextEvent, ctx: ExtensionContext) => {
let next = event.messages as AgentMessage[];
const repairedTools = sanitizeToolUseResultPairing(next);
if (repairedTools !== next) next = repairedTools;
if (isGoogleModelApi(ctx.model?.api)) {
const repairedIds = sanitizeToolCallIdsForCloudCodeAssist(next);
if (repairedIds !== next) next = repairedIds;
}
if (next === event.messages) return undefined;
return { messages: next };
});
}

View File

@@ -0,0 +1,70 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { describe, expect, it } from "vitest";
import {
isValidCloudCodeAssistToolId,
sanitizeToolCallIdsForCloudCodeAssist,
} from "./tool-call-id.js";
describe("sanitizeToolCallIdsForCloudCodeAssist", () => {
it("is a no-op for already-valid non-colliding IDs", () => {
const input = [
{
role: "assistant",
content: [
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
],
},
{
role: "toolResult",
toolCallId: "call_1",
toolName: "read",
content: [{ type: "text", text: "ok" }],
},
] satisfies AgentMessage[];
const out = sanitizeToolCallIdsForCloudCodeAssist(input);
expect(out).toBe(input);
});
it("avoids collisions when sanitization would produce duplicate IDs", () => {
const input = [
{
role: "assistant",
content: [
{ type: "toolCall", id: "call_a|b", name: "read", arguments: {} },
{ type: "toolCall", id: "call_a:b", name: "read", arguments: {} },
],
},
{
role: "toolResult",
toolCallId: "call_a|b",
toolName: "read",
content: [{ type: "text", text: "one" }],
},
{
role: "toolResult",
toolCallId: "call_a:b",
toolName: "read",
content: [{ type: "text", text: "two" }],
},
] satisfies AgentMessage[];
const out = sanitizeToolCallIdsForCloudCodeAssist(input);
expect(out).not.toBe(input);
const assistant = out[0] as Extract<AgentMessage, { role: "assistant" }>;
const a = assistant.content?.[0] as { id?: string };
const b = assistant.content?.[1] as { id?: string };
expect(typeof a.id).toBe("string");
expect(typeof b.id).toBe("string");
expect(a.id).not.toBe(b.id);
expect(isValidCloudCodeAssistToolId(a.id as string)).toBe(true);
expect(isValidCloudCodeAssistToolId(b.id as string)).toBe(true);
const r1 = out[1] as Extract<AgentMessage, { role: "toolResult" }>;
const r2 = out[2] as Extract<AgentMessage, { role: "toolResult" }>;
expect(r1.toolCallId).toBe(a.id);
expect(r2.toolCallId).toBe(b.id);
});
});

145
src/agents/tool-call-id.ts Normal file
View File

@@ -0,0 +1,145 @@
import { createHash } from "node:crypto";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
export function sanitizeToolCallId(id: string): string {
if (!id || typeof id !== "string") return "default_tool_id";
const cloudCodeAssistPatternReplacement = id.replace(/[^a-zA-Z0-9_-]/g, "_");
const trimmedInvalidStartChars = cloudCodeAssistPatternReplacement.replace(
/^[^a-zA-Z0-9_-]+/,
"",
);
return trimmedInvalidStartChars.length > 0
? trimmedInvalidStartChars
: "sanitized_tool_id";
}
export function isValidCloudCodeAssistToolId(id: string): boolean {
if (!id || typeof id !== "string") return false;
return /^[a-zA-Z0-9_-]+$/.test(id);
}
function shortHash(text: string): string {
return createHash("sha1").update(text).digest("hex").slice(0, 8);
}
function makeUniqueToolId(params: { id: string; used: Set<string> }): string {
const base = sanitizeToolCallId(params.id);
if (!params.used.has(base)) return base;
const hash = shortHash(params.id);
const maxBaseLen = 64 - 1 - hash.length;
const clippedBase =
base.length > maxBaseLen ? base.slice(0, maxBaseLen) : base;
const candidate = `${clippedBase}_${hash}`;
if (!params.used.has(candidate)) return candidate;
for (let i = 2; i < 1000; i += 1) {
const next = `${candidate}_${i}`;
if (!params.used.has(next)) return next;
}
return `${candidate}_${Date.now()}`;
}
function rewriteAssistantToolCallIds(params: {
message: Extract<AgentMessage, { role: "assistant" }>;
resolve: (id: string) => string;
}): Extract<AgentMessage, { role: "assistant" }> {
const content = params.message.content;
if (!Array.isArray(content)) return params.message;
let changed = false;
const next = content.map((block) => {
if (!block || typeof block !== "object") return block;
const rec = block as { type?: unknown; id?: unknown };
const type = rec.type;
const id = rec.id;
if (
(type !== "functionCall" && type !== "toolUse" && type !== "toolCall") ||
typeof id !== "string" ||
!id
) {
return block;
}
const nextId = params.resolve(id);
if (nextId === id) return block;
changed = true;
return { ...(block as unknown as Record<string, unknown>), id: nextId };
});
if (!changed) return params.message;
return { ...params.message, content: next as typeof params.message.content };
}
function rewriteToolResultIds(params: {
message: Extract<AgentMessage, { role: "toolResult" }>;
resolve: (id: string) => string;
}): Extract<AgentMessage, { role: "toolResult" }> {
const toolCallId =
typeof params.message.toolCallId === "string" && params.message.toolCallId
? params.message.toolCallId
: undefined;
const toolUseId = (params.message as { toolUseId?: unknown }).toolUseId;
const toolUseIdStr =
typeof toolUseId === "string" && toolUseId ? toolUseId : undefined;
const nextToolCallId = toolCallId ? params.resolve(toolCallId) : undefined;
const nextToolUseId = toolUseIdStr ? params.resolve(toolUseIdStr) : undefined;
if (nextToolCallId === toolCallId && nextToolUseId === toolUseIdStr) {
return params.message;
}
return {
...params.message,
...(nextToolCallId && { toolCallId: nextToolCallId }),
...(nextToolUseId && { toolUseId: nextToolUseId }),
} as Extract<AgentMessage, { role: "toolResult" }>;
}
export function sanitizeToolCallIdsForCloudCodeAssist(
messages: AgentMessage[],
): AgentMessage[] {
// Cloud Code Assist requires tool IDs matching ^[a-zA-Z0-9_-]+$.
// Sanitization can introduce collisions (e.g. `a|b` and `a:b` -> `a_b`).
// Fix by applying a stable, transcript-wide mapping and de-duping via suffix.
const map = new Map<string, string>();
const used = new Set<string>();
const resolve = (id: string) => {
const existing = map.get(id);
if (existing) return existing;
const next = makeUniqueToolId({ id, used });
map.set(id, next);
used.add(next);
return next;
};
let changed = false;
const out = messages.map((msg) => {
if (!msg || typeof msg !== "object") return msg;
const role = (msg as { role?: unknown }).role;
if (role === "assistant") {
const next = rewriteAssistantToolCallIds({
message: msg as Extract<AgentMessage, { role: "assistant" }>,
resolve,
});
if (next !== msg) changed = true;
return next;
}
if (role === "toolResult") {
const next = rewriteToolResultIds({
message: msg as Extract<AgentMessage, { role: "toolResult" }>,
resolve,
});
if (next !== msg) changed = true;
return next;
}
return msg;
});
return changed ? out : messages;
}