fix: guard anthropic refusal trigger
This commit is contained in:
@@ -51,6 +51,18 @@ import { describeUnknownError } from "./utils.js";
|
||||
|
||||
type ApiKeyInfo = ResolvedProviderAuth;
|
||||
|
||||
// Avoid Anthropic's refusal test token poisoning session transcripts.
|
||||
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
|
||||
const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";
|
||||
|
||||
function scrubAnthropicRefusalMagic(prompt: string): string {
|
||||
if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) return prompt;
|
||||
return prompt.replaceAll(
|
||||
ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL,
|
||||
ANTHROPIC_MAGIC_STRING_REPLACEMENT,
|
||||
);
|
||||
}
|
||||
|
||||
export async function runEmbeddedPiAgent(
|
||||
params: RunEmbeddedPiAgentParams,
|
||||
): Promise<EmbeddedPiRunResult> {
|
||||
@@ -211,6 +223,9 @@ export async function runEmbeddedPiAgent(
|
||||
attemptedThinking.add(thinkLevel);
|
||||
await fs.mkdir(resolvedWorkspace, { recursive: true });
|
||||
|
||||
const prompt =
|
||||
provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt;
|
||||
|
||||
const attempt = await runEmbeddedAttempt({
|
||||
sessionId: params.sessionId,
|
||||
sessionKey: params.sessionKey,
|
||||
@@ -228,7 +243,7 @@ export async function runEmbeddedPiAgent(
|
||||
agentDir,
|
||||
config: params.config,
|
||||
skillsSnapshot: params.skillsSnapshot,
|
||||
prompt: params.prompt,
|
||||
prompt,
|
||||
images: params.images,
|
||||
provider,
|
||||
modelId,
|
||||
|
||||
Reference in New Issue
Block a user