fix(agents): sanitize transcripts for strict tool APIs
This commit is contained in:
@@ -12,6 +12,11 @@ import {
|
||||
} from "../auto-reply/thinking.js";
|
||||
import type { ClawdbotConfig } from "../config/config.js";
|
||||
import { formatSandboxToolPolicyBlockedMessage } from "./sandbox.js";
|
||||
import {
|
||||
isValidCloudCodeAssistToolId,
|
||||
sanitizeToolCallId,
|
||||
sanitizeToolCallIdsForCloudCodeAssist,
|
||||
} from "./tool-call-id.js";
|
||||
import { sanitizeContentBlocksImages } from "./tool-images.js";
|
||||
import type { WorkspaceBootstrapFile } from "./workspace.js";
|
||||
|
||||
@@ -90,8 +95,11 @@ export async function sanitizeSessionMessagesImages(
|
||||
): Promise<AgentMessage[]> {
|
||||
// We sanitize historical session messages because Anthropic can reject a request
|
||||
// if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX).
|
||||
const sanitizedIds = options?.sanitizeToolCallIds
|
||||
? sanitizeToolCallIdsForCloudCodeAssist(messages)
|
||||
: messages;
|
||||
const out: AgentMessage[] = [];
|
||||
for (const msg of messages) {
|
||||
for (const msg of sanitizedIds) {
|
||||
if (!msg || typeof msg !== "object") {
|
||||
out.push(msg);
|
||||
continue;
|
||||
@@ -105,28 +113,7 @@ export async function sanitizeSessionMessagesImages(
|
||||
content as ContentBlock[],
|
||||
label,
|
||||
)) as unknown as typeof toolMsg.content;
|
||||
const sanitizedToolCallId =
|
||||
options?.sanitizeToolCallIds && toolMsg.toolCallId
|
||||
? sanitizeToolCallId(toolMsg.toolCallId)
|
||||
: undefined;
|
||||
const toolUseId = (toolMsg as { toolUseId?: unknown }).toolUseId;
|
||||
const sanitizedToolUseId =
|
||||
options?.sanitizeToolCallIds &&
|
||||
typeof toolUseId === "string" &&
|
||||
toolUseId
|
||||
? sanitizeToolCallId(toolUseId)
|
||||
: undefined;
|
||||
const sanitizedMsg = {
|
||||
...toolMsg,
|
||||
content: nextContent,
|
||||
...(sanitizedToolCallId && {
|
||||
toolCallId: sanitizedToolCallId,
|
||||
}),
|
||||
...(sanitizedToolUseId && {
|
||||
toolUseId: sanitizedToolUseId,
|
||||
}),
|
||||
};
|
||||
out.push(sanitizedMsg);
|
||||
out.push({ ...toolMsg, content: nextContent });
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -176,33 +163,8 @@ export async function sanitizeSessionMessagesImages(
|
||||
return filteredContent.slice(0, lastToolIndex + 1);
|
||||
})()
|
||||
: filteredContent;
|
||||
const sanitizedContent = options?.sanitizeToolCallIds
|
||||
? await Promise.all(
|
||||
normalizedContent.map(async (block) => {
|
||||
if (!block || typeof block !== "object") return block;
|
||||
|
||||
const type = (block as { type?: unknown }).type;
|
||||
const id = (block as { id?: unknown }).id;
|
||||
if (typeof id !== "string" || !id) return block;
|
||||
|
||||
// Cloud Code Assist tool blocks require ids matching ^[a-zA-Z0-9_-]+$.
|
||||
if (
|
||||
type === "functionCall" ||
|
||||
type === "toolUse" ||
|
||||
type === "toolCall"
|
||||
) {
|
||||
return {
|
||||
...(block as unknown as Record<string, unknown>),
|
||||
id: sanitizeToolCallId(id),
|
||||
};
|
||||
}
|
||||
|
||||
return block;
|
||||
}),
|
||||
)
|
||||
: normalizedContent;
|
||||
const finalContent = (await sanitizeContentBlocksImages(
|
||||
sanitizedContent as unknown as ContentBlock[],
|
||||
normalizedContent as unknown as ContentBlock[],
|
||||
label,
|
||||
)) as unknown as typeof assistantMsg.content;
|
||||
if (finalContent.length === 0) {
|
||||
@@ -621,25 +583,7 @@ export function isMessagingToolDuplicateNormalized(
|
||||
// OpenAI Codex generates IDs like "call_abc123|item_456" with pipe characters,
|
||||
// but Google requires IDs matching ^[a-zA-Z0-9_-]+$ pattern.
|
||||
// This function sanitizes tool call IDs by replacing invalid characters with underscores.
|
||||
|
||||
export function sanitizeToolCallId(id: string): string {
|
||||
if (!id || typeof id !== "string") return "default_tool_id";
|
||||
|
||||
const cloudCodeAssistPatternReplacement = id.replace(/[^a-zA-Z0-9_-]/g, "_");
|
||||
const trimmedInvalidStartChars = cloudCodeAssistPatternReplacement.replace(
|
||||
/^[^a-zA-Z0-9_-]+/,
|
||||
"",
|
||||
);
|
||||
|
||||
return trimmedInvalidStartChars.length > 0
|
||||
? trimmedInvalidStartChars
|
||||
: "sanitized_tool_id";
|
||||
}
|
||||
|
||||
export function isValidCloudCodeAssistToolId(id: string): boolean {
|
||||
if (!id || typeof id !== "string") return false;
|
||||
return /^[a-zA-Z0-9_-]+$/.test(id);
|
||||
}
|
||||
export { sanitizeToolCallId, isValidCloudCodeAssistToolId };
|
||||
|
||||
export function isMessagingToolDuplicate(
|
||||
text: string,
|
||||
|
||||
@@ -236,6 +236,21 @@ function buildContextPruningExtension(params: {
|
||||
};
|
||||
}
|
||||
|
||||
function buildEmbeddedExtensionPaths(params: {
|
||||
cfg: ClawdbotConfig | undefined;
|
||||
sessionManager: SessionManager;
|
||||
provider: string;
|
||||
modelId: string;
|
||||
model: Model<Api> | undefined;
|
||||
}): string[] {
|
||||
const paths = [resolvePiExtensionPath("transcript-sanitize")];
|
||||
const pruning = buildContextPruningExtension(params);
|
||||
if (pruning.additionalExtensionPaths) {
|
||||
paths.push(...pruning.additionalExtensionPaths);
|
||||
}
|
||||
return paths;
|
||||
}
|
||||
|
||||
export type EmbeddedPiAgentMeta = {
|
||||
sessionId: string;
|
||||
provider: string;
|
||||
@@ -966,14 +981,13 @@ export async function compactEmbeddedPiSession(params: {
|
||||
effectiveWorkspace,
|
||||
agentDir,
|
||||
);
|
||||
const pruning = buildContextPruningExtension({
|
||||
const additionalExtensionPaths = buildEmbeddedExtensionPaths({
|
||||
cfg: params.config,
|
||||
sessionManager,
|
||||
provider,
|
||||
modelId,
|
||||
model,
|
||||
});
|
||||
const additionalExtensionPaths = pruning.additionalExtensionPaths;
|
||||
|
||||
const { builtInTools, customTools } = splitSdkTools({
|
||||
tools,
|
||||
@@ -1355,14 +1369,13 @@ export async function runEmbeddedPiAgent(params: {
|
||||
effectiveWorkspace,
|
||||
agentDir,
|
||||
);
|
||||
const pruning = buildContextPruningExtension({
|
||||
const additionalExtensionPaths = buildEmbeddedExtensionPaths({
|
||||
cfg: params.config,
|
||||
sessionManager,
|
||||
provider,
|
||||
modelId,
|
||||
model,
|
||||
});
|
||||
const additionalExtensionPaths = pruning.additionalExtensionPaths;
|
||||
|
||||
const { builtInTools, customTools } = splitSdkTools({
|
||||
tools,
|
||||
|
||||
35
src/agents/pi-extensions/transcript-sanitize.ts
Normal file
35
src/agents/pi-extensions/transcript-sanitize.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Transcript repair/sanitization extension.
|
||||
*
|
||||
* Runs on every context build to prevent strict provider request rejections:
|
||||
* - duplicate or displaced tool results (Anthropic-compatible APIs, MiniMax, Cloud Code Assist)
|
||||
* - Cloud Code Assist tool call ID constraints + collision-safe sanitization
|
||||
*/
|
||||
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import type {
|
||||
ContextEvent,
|
||||
ExtensionAPI,
|
||||
ExtensionContext,
|
||||
} from "@mariozechner/pi-coding-agent";
|
||||
|
||||
import { isGoogleModelApi } from "../pi-embedded-helpers.js";
|
||||
import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js";
|
||||
import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js";
|
||||
|
||||
export default function transcriptSanitizeExtension(api: ExtensionAPI): void {
|
||||
api.on("context", (event: ContextEvent, ctx: ExtensionContext) => {
|
||||
let next = event.messages as AgentMessage[];
|
||||
|
||||
const repairedTools = sanitizeToolUseResultPairing(next);
|
||||
if (repairedTools !== next) next = repairedTools;
|
||||
|
||||
if (isGoogleModelApi(ctx.model?.api)) {
|
||||
const repairedIds = sanitizeToolCallIdsForCloudCodeAssist(next);
|
||||
if (repairedIds !== next) next = repairedIds;
|
||||
}
|
||||
|
||||
if (next === event.messages) return undefined;
|
||||
return { messages: next };
|
||||
});
|
||||
}
|
||||
70
src/agents/tool-call-id.test.ts
Normal file
70
src/agents/tool-call-id.test.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import {
|
||||
isValidCloudCodeAssistToolId,
|
||||
sanitizeToolCallIdsForCloudCodeAssist,
|
||||
} from "./tool-call-id.js";
|
||||
|
||||
describe("sanitizeToolCallIdsForCloudCodeAssist", () => {
|
||||
it("is a no-op for already-valid non-colliding IDs", () => {
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_1",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
},
|
||||
] satisfies AgentMessage[];
|
||||
|
||||
const out = sanitizeToolCallIdsForCloudCodeAssist(input);
|
||||
expect(out).toBe(input);
|
||||
});
|
||||
|
||||
it("avoids collisions when sanitization would produce duplicate IDs", () => {
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "toolCall", id: "call_a|b", name: "read", arguments: {} },
|
||||
{ type: "toolCall", id: "call_a:b", name: "read", arguments: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_a|b",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "one" }],
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_a:b",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "two" }],
|
||||
},
|
||||
] satisfies AgentMessage[];
|
||||
|
||||
const out = sanitizeToolCallIdsForCloudCodeAssist(input);
|
||||
expect(out).not.toBe(input);
|
||||
|
||||
const assistant = out[0] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
const a = assistant.content?.[0] as { id?: string };
|
||||
const b = assistant.content?.[1] as { id?: string };
|
||||
expect(typeof a.id).toBe("string");
|
||||
expect(typeof b.id).toBe("string");
|
||||
expect(a.id).not.toBe(b.id);
|
||||
expect(isValidCloudCodeAssistToolId(a.id as string)).toBe(true);
|
||||
expect(isValidCloudCodeAssistToolId(b.id as string)).toBe(true);
|
||||
|
||||
const r1 = out[1] as Extract<AgentMessage, { role: "toolResult" }>;
|
||||
const r2 = out[2] as Extract<AgentMessage, { role: "toolResult" }>;
|
||||
expect(r1.toolCallId).toBe(a.id);
|
||||
expect(r2.toolCallId).toBe(b.id);
|
||||
});
|
||||
});
|
||||
145
src/agents/tool-call-id.ts
Normal file
145
src/agents/tool-call-id.ts
Normal file
@@ -0,0 +1,145 @@
|
||||
import { createHash } from "node:crypto";
|
||||
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
|
||||
export function sanitizeToolCallId(id: string): string {
|
||||
if (!id || typeof id !== "string") return "default_tool_id";
|
||||
|
||||
const cloudCodeAssistPatternReplacement = id.replace(/[^a-zA-Z0-9_-]/g, "_");
|
||||
const trimmedInvalidStartChars = cloudCodeAssistPatternReplacement.replace(
|
||||
/^[^a-zA-Z0-9_-]+/,
|
||||
"",
|
||||
);
|
||||
|
||||
return trimmedInvalidStartChars.length > 0
|
||||
? trimmedInvalidStartChars
|
||||
: "sanitized_tool_id";
|
||||
}
|
||||
|
||||
export function isValidCloudCodeAssistToolId(id: string): boolean {
|
||||
if (!id || typeof id !== "string") return false;
|
||||
return /^[a-zA-Z0-9_-]+$/.test(id);
|
||||
}
|
||||
|
||||
function shortHash(text: string): string {
|
||||
return createHash("sha1").update(text).digest("hex").slice(0, 8);
|
||||
}
|
||||
|
||||
function makeUniqueToolId(params: { id: string; used: Set<string> }): string {
|
||||
const base = sanitizeToolCallId(params.id);
|
||||
if (!params.used.has(base)) return base;
|
||||
|
||||
const hash = shortHash(params.id);
|
||||
const maxBaseLen = 64 - 1 - hash.length;
|
||||
const clippedBase =
|
||||
base.length > maxBaseLen ? base.slice(0, maxBaseLen) : base;
|
||||
const candidate = `${clippedBase}_${hash}`;
|
||||
if (!params.used.has(candidate)) return candidate;
|
||||
|
||||
for (let i = 2; i < 1000; i += 1) {
|
||||
const next = `${candidate}_${i}`;
|
||||
if (!params.used.has(next)) return next;
|
||||
}
|
||||
|
||||
return `${candidate}_${Date.now()}`;
|
||||
}
|
||||
|
||||
function rewriteAssistantToolCallIds(params: {
|
||||
message: Extract<AgentMessage, { role: "assistant" }>;
|
||||
resolve: (id: string) => string;
|
||||
}): Extract<AgentMessage, { role: "assistant" }> {
|
||||
const content = params.message.content;
|
||||
if (!Array.isArray(content)) return params.message;
|
||||
|
||||
let changed = false;
|
||||
const next = content.map((block) => {
|
||||
if (!block || typeof block !== "object") return block;
|
||||
const rec = block as { type?: unknown; id?: unknown };
|
||||
const type = rec.type;
|
||||
const id = rec.id;
|
||||
if (
|
||||
(type !== "functionCall" && type !== "toolUse" && type !== "toolCall") ||
|
||||
typeof id !== "string" ||
|
||||
!id
|
||||
) {
|
||||
return block;
|
||||
}
|
||||
const nextId = params.resolve(id);
|
||||
if (nextId === id) return block;
|
||||
changed = true;
|
||||
return { ...(block as unknown as Record<string, unknown>), id: nextId };
|
||||
});
|
||||
|
||||
if (!changed) return params.message;
|
||||
return { ...params.message, content: next as typeof params.message.content };
|
||||
}
|
||||
|
||||
function rewriteToolResultIds(params: {
|
||||
message: Extract<AgentMessage, { role: "toolResult" }>;
|
||||
resolve: (id: string) => string;
|
||||
}): Extract<AgentMessage, { role: "toolResult" }> {
|
||||
const toolCallId =
|
||||
typeof params.message.toolCallId === "string" && params.message.toolCallId
|
||||
? params.message.toolCallId
|
||||
: undefined;
|
||||
const toolUseId = (params.message as { toolUseId?: unknown }).toolUseId;
|
||||
const toolUseIdStr =
|
||||
typeof toolUseId === "string" && toolUseId ? toolUseId : undefined;
|
||||
|
||||
const nextToolCallId = toolCallId ? params.resolve(toolCallId) : undefined;
|
||||
const nextToolUseId = toolUseIdStr ? params.resolve(toolUseIdStr) : undefined;
|
||||
|
||||
if (nextToolCallId === toolCallId && nextToolUseId === toolUseIdStr) {
|
||||
return params.message;
|
||||
}
|
||||
|
||||
return {
|
||||
...params.message,
|
||||
...(nextToolCallId && { toolCallId: nextToolCallId }),
|
||||
...(nextToolUseId && { toolUseId: nextToolUseId }),
|
||||
} as Extract<AgentMessage, { role: "toolResult" }>;
|
||||
}
|
||||
|
||||
export function sanitizeToolCallIdsForCloudCodeAssist(
|
||||
messages: AgentMessage[],
|
||||
): AgentMessage[] {
|
||||
// Cloud Code Assist requires tool IDs matching ^[a-zA-Z0-9_-]+$.
|
||||
// Sanitization can introduce collisions (e.g. `a|b` and `a:b` -> `a_b`).
|
||||
// Fix by applying a stable, transcript-wide mapping and de-duping via suffix.
|
||||
const map = new Map<string, string>();
|
||||
const used = new Set<string>();
|
||||
|
||||
const resolve = (id: string) => {
|
||||
const existing = map.get(id);
|
||||
if (existing) return existing;
|
||||
const next = makeUniqueToolId({ id, used });
|
||||
map.set(id, next);
|
||||
used.add(next);
|
||||
return next;
|
||||
};
|
||||
|
||||
let changed = false;
|
||||
const out = messages.map((msg) => {
|
||||
if (!msg || typeof msg !== "object") return msg;
|
||||
const role = (msg as { role?: unknown }).role;
|
||||
if (role === "assistant") {
|
||||
const next = rewriteAssistantToolCallIds({
|
||||
message: msg as Extract<AgentMessage, { role: "assistant" }>,
|
||||
resolve,
|
||||
});
|
||||
if (next !== msg) changed = true;
|
||||
return next;
|
||||
}
|
||||
if (role === "toolResult") {
|
||||
const next = rewriteToolResultIds({
|
||||
message: msg as Extract<AgentMessage, { role: "toolResult" }>,
|
||||
resolve,
|
||||
});
|
||||
if (next !== msg) changed = true;
|
||||
return next;
|
||||
}
|
||||
return msg;
|
||||
});
|
||||
|
||||
return changed ? out : messages;
|
||||
}
|
||||
Reference in New Issue
Block a user