fix: gate transcript sanitization by provider

This commit is contained in:
Peter Steinberger
2026-01-23 00:28:41 +00:00
parent fac21e6eb4
commit db0235a26a
15 changed files with 307 additions and 212 deletions

View File

@@ -40,6 +40,7 @@ import {
import { createClawdbotCodingTools } from "../pi-tools.js";
import { resolveSandboxContext } from "../sandbox.js";
import { guardSessionManager } from "../session-tool-result-guard-wrapper.js";
import { resolveTranscriptPolicy } from "../transcript-policy.js";
import { acquireSessionWriteLock } from "../session-write-lock.js";
import {
applySkillEnvOverrides,
@@ -315,9 +316,16 @@ export async function compactEmbeddedPiSession(params: {
});
try {
await prewarmSessionFile(params.sessionFile);
const transcriptPolicy = resolveTranscriptPolicy({
modelApi: model.api,
provider,
modelId,
});
const sessionManager = guardSessionManager(SessionManager.open(params.sessionFile), {
agentId: sessionAgentId,
sessionKey: params.sessionKey,
allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
stripFinalTags: transcriptPolicy.stripFinalTags,
});
trackSessionManagerAccess(params.sessionFile);
const settingsManager = SettingsManager.create(effectiveWorkspace, agentDir);
@@ -364,9 +372,14 @@ export async function compactEmbeddedPiSession(params: {
provider,
sessionManager,
sessionId: params.sessionId,
policy: transcriptPolicy,
});
const validatedGemini = validateGeminiTurns(prior);
const validated = validateAnthropicTurns(validatedGemini);
const validatedGemini = transcriptPolicy.validateGeminiTurns
? validateGeminiTurns(prior)
: prior;
const validated = transcriptPolicy.validateAnthropicTurns
? validateAnthropicTurns(validatedGemini)
: validatedGemini;
const limited = limitHistoryTurns(
validated,
getDmHistoryLimitFromSessionKey(params.sessionKey, params.config),

View File

@@ -12,10 +12,9 @@ import {
import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js";
import { log } from "./logger.js";
import { describeUnknownError } from "./utils.js";
import { isAntigravityClaude } from "../pi-embedded-helpers/google.js";
import { cleanToolSchemaForGemini } from "../pi-tools.schema.js";
import { normalizeProviderId } from "../model-selection.js";
import type { ToolCallIdMode } from "../tool-call-id.js";
import type { TranscriptPolicy } from "../transcript-policy.js";
import { resolveTranscriptPolicy } from "../transcript-policy.js";
const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap";
const GOOGLE_SCHEMA_UNSUPPORTED_KEYWORDS = new Set([
@@ -40,15 +39,6 @@ const GOOGLE_SCHEMA_UNSUPPORTED_KEYWORDS = new Set([
"minProperties",
"maxProperties",
]);
const MISTRAL_MODEL_HINTS = [
"mistral",
"mixtral",
"codestral",
"pixtral",
"devstral",
"ministral",
"mistralai",
];
const ANTIGRAVITY_SIGNATURE_RE = /^[A-Za-z0-9+/]+={0,2}$/;
function isValidAntigravitySignature(value: unknown): value is string {
@@ -59,19 +49,6 @@ function isValidAntigravitySignature(value: unknown): value is string {
return ANTIGRAVITY_SIGNATURE_RE.test(trimmed);
}
function shouldSanitizeToolCallIds(modelApi?: string | null): boolean {
if (!modelApi) return false;
return isGoogleModelApi(modelApi);
}
function isMistralModel(params: { provider?: string | null; modelId?: string | null }): boolean {
const provider = normalizeProviderId(params.provider ?? "");
if (provider === "mistral") return true;
const modelId = (params.modelId ?? "").toLowerCase();
if (!modelId) return false;
return MISTRAL_MODEL_HINTS.some((hint) => modelId.includes(hint));
}
function sanitizeAntigravityThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
let touched = false;
const out: AgentMessage[] = [];
@@ -271,32 +248,33 @@ export async function sanitizeSessionHistory(params: {
provider?: string;
sessionManager: SessionManager;
sessionId: string;
policy?: TranscriptPolicy;
}): Promise<AgentMessage[]> {
const isAntigravityClaudeModel = isAntigravityClaude({
api: params.modelApi,
provider: params.provider,
modelId: params.modelId,
});
const provider = normalizeProviderId(params.provider ?? "");
const modelId = (params.modelId ?? "").toLowerCase();
const isOpenRouterGemini =
(provider === "openrouter" || provider === "opencode") && modelId.includes("gemini");
const isMistral = isMistralModel({ provider, modelId });
const toolCallIdMode: ToolCallIdMode | undefined = isMistral ? "strict9" : undefined;
const sanitizeToolCallIds = shouldSanitizeToolCallIds(params.modelApi) || isMistral;
const policy =
params.policy ??
resolveTranscriptPolicy({
modelApi: params.modelApi,
provider: params.provider,
modelId: params.modelId,
});
const sanitizedImages = await sanitizeSessionMessagesImages(params.messages, "session:history", {
sanitizeToolCallIds,
toolCallIdMode,
enforceToolCallLast: params.modelApi === "anthropic-messages",
preserveSignatures: isAntigravityClaudeModel,
sanitizeThoughtSignatures: isOpenRouterGemini
? { allowBase64Only: true, includeCamelCase: true }
: undefined,
sanitizeMode: policy.sanitizeMode,
sanitizeToolCallIds: policy.sanitizeToolCallIds,
toolCallIdMode: policy.toolCallIdMode,
enforceToolCallLast: policy.enforceToolCallLast,
preserveSignatures: policy.preserveSignatures,
sanitizeThoughtSignatures: policy.sanitizeThoughtSignatures,
});
const sanitizedThinking = isAntigravityClaudeModel
const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks
? sanitizeAntigravityThinkingBlocks(sanitizedImages)
: sanitizedImages;
const repairedTools = sanitizeToolUseResultPairing(sanitizedThinking);
const repairedTools = policy.repairToolUseResultPairing
? sanitizeToolUseResultPairing(sanitizedThinking)
: sanitizedThinking;
if (!policy.applyGoogleTurnOrdering) {
return repairedTools;
}
return applyGoogleTurnOrderingFix({
messages: repairedTools,

View File

@@ -39,6 +39,7 @@ import {
import { createClawdbotCodingTools } from "../../pi-tools.js";
import { resolveSandboxContext } from "../../sandbox.js";
import { guardSessionManager } from "../../session-tool-result-guard-wrapper.js";
import { resolveTranscriptPolicy } from "../../transcript-policy.js";
import { acquireSessionWriteLock } from "../../session-write-lock.js";
import {
applySkillEnvOverrides,
@@ -369,10 +370,18 @@ export async function runEmbeddedAttempt(
.then(() => true)
.catch(() => false);
const transcriptPolicy = resolveTranscriptPolicy({
modelApi: params.model?.api,
provider: params.provider,
modelId: params.modelId,
});
await prewarmSessionFile(params.sessionFile);
sessionManager = guardSessionManager(SessionManager.open(params.sessionFile), {
agentId: sessionAgentId,
sessionKey: params.sessionKey,
allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults,
stripFinalTags: transcriptPolicy.stripFinalTags,
});
trackSessionManagerAccess(params.sessionFile);
@@ -473,10 +482,15 @@ export async function runEmbeddedAttempt(
provider: params.provider,
sessionManager,
sessionId: params.sessionId,
policy: transcriptPolicy,
});
cacheTrace?.recordStage("session:sanitized", { messages: prior });
const validatedGemini = validateGeminiTurns(prior);
const validated = validateAnthropicTurns(validatedGemini);
const validatedGemini = transcriptPolicy.validateGeminiTurns
? validateGeminiTurns(prior)
: prior;
const validated = transcriptPolicy.validateAnthropicTurns
? validateAnthropicTurns(validatedGemini)
: validatedGemini;
const limited = limitHistoryTurns(
validated,
getDmHistoryLimitFromSessionKey(params.sessionKey, params.config),