Merge pull request #754 from mcinteerj/fix/reasoning-tag-strip

fix(agent): prevent reasoning and tag leaks for Gemini 3/Antigravity models
This commit is contained in:
Peter Steinberger
2026-01-12 08:47:15 +00:00
committed by GitHub
7 changed files with 130 additions and 96 deletions

View File

@@ -93,6 +93,7 @@
- CLI/Update: preserve base environment when passing overrides to update subprocesses. (#713) — thanks @danielz1z. - CLI/Update: preserve base environment when passing overrides to update subprocesses. (#713) — thanks @danielz1z.
- Agents: treat message tool errors as failures so fallback replies still send; require `to` + `message` for `action=send`. (#717) — thanks @theglove44. - Agents: treat message tool errors as failures so fallback replies still send; require `to` + `message` for `action=send`. (#717) — thanks @theglove44.
- Agents: preserve reasoning items on tool-only turns. - Agents: preserve reasoning items on tool-only turns.
- Agents: enforce `<final>` gating for reasoning-tag providers to prevent tag/reasoning leaks. (#754) — thanks @mcinteerj.
- Agents/Subagents: wait for completion before announcing, align wait timeout with run timeout, and make announce prompts more emphatic. - Agents/Subagents: wait for completion before announcing, align wait timeout with run timeout, and make announce prompts more emphatic.
- Agents: route subagent transcripts to the target agent sessions directory and add regression coverage. (#708) — thanks @xMikeMickelson. - Agents: route subagent transcripts to the target agent sessions directory and add regression coverage. (#708) — thanks @xMikeMickelson.
- Agents/Tools: preserve action enums when flattening tool schemas. (#708) — thanks @xMikeMickelson. - Agents/Tools: preserve action enums when flattening tool schemas. (#708) — thanks @xMikeMickelson.

View File

@@ -183,7 +183,11 @@ export async function sanitizeSessionMessagesImages(
const GOOGLE_TURN_ORDER_BOOTSTRAP_TEXT = "(session bootstrap)"; const GOOGLE_TURN_ORDER_BOOTSTRAP_TEXT = "(session bootstrap)";
export function isGoogleModelApi(api?: string | null): boolean { export function isGoogleModelApi(api?: string | null): boolean {
return api === "google-gemini-cli" || api === "google-generative-ai"; return (
api === "google-gemini-cli" ||
api === "google-generative-ai" ||
api === "google-antigravity"
);
} }
export function sanitizeGoogleTurnOrdering( export function sanitizeGoogleTurnOrdering(

View File

@@ -43,6 +43,7 @@ import {
enqueueCommandInLane, enqueueCommandInLane,
} from "../process/command-queue.js"; } from "../process/command-queue.js";
import { normalizeMessageProvider } from "../utils/message-provider.js"; import { normalizeMessageProvider } from "../utils/message-provider.js";
import { isReasoningTagProvider } from "../utils/provider-utils.js";
import { resolveUserPath } from "../utils.js"; import { resolveUserPath } from "../utils.js";
import { resolveClawdbotAgentDir } from "./agent-paths.js"; import { resolveClawdbotAgentDir } from "./agent-paths.js";
import { resolveSessionAgentIds } from "./agent-scope.js"; import { resolveSessionAgentIds } from "./agent-scope.js";
@@ -1141,7 +1142,7 @@ export async function compactEmbeddedPiSession(params: {
sandbox, sandbox,
params.bashElevated, params.bashElevated,
); );
const reasoningTagHint = provider === "ollama"; const reasoningTagHint = isReasoningTagProvider(provider);
const userTimezone = resolveUserTimezone( const userTimezone = resolveUserTimezone(
params.config?.agents?.defaults?.userTimezone, params.config?.agents?.defaults?.userTimezone,
); );
@@ -1547,7 +1548,7 @@ export async function runEmbeddedPiAgent(params: {
sandbox, sandbox,
params.bashElevated, params.bashElevated,
); );
const reasoningTagHint = provider === "ollama"; const reasoningTagHint = isReasoningTagProvider(provider);
const userTimezone = resolveUserTimezone( const userTimezone = resolveUserTimezone(
params.config?.agents?.defaults?.userTimezone, params.config?.agents?.defaults?.userTimezone,
); );

View File

@@ -16,7 +16,7 @@ describe("subscribeEmbeddedPiSession", () => {
{ tag: "thought", open: "<thought>", close: "</thought>" }, { tag: "thought", open: "<thought>", close: "</thought>" },
{ tag: "antthinking", open: "<antthinking>", close: "</antthinking>" }, { tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
] as const; ] as const;
it("filters to <final> and falls back when tags are malformed", () => { it("filters to <final> and suppresses output without a start tag", () => {
let handler: ((evt: unknown) => void) | undefined; let handler: ((evt: unknown) => void) | undefined;
const session: StubSession = { const session: StubSession = {
subscribe: (fn) => { subscribe: (fn) => {
@@ -38,6 +38,7 @@ describe("subscribeEmbeddedPiSession", () => {
onAgentEvent, onAgentEvent,
}); });
handler?.({ type: "message_start", message: { role: "assistant" } });
handler?.({ handler?.({
type: "message_update", type: "message_update",
message: { role: "assistant" }, message: { role: "assistant" },
@@ -53,11 +54,7 @@ describe("subscribeEmbeddedPiSession", () => {
onPartialReply.mockReset(); onPartialReply.mockReset();
handler?.({ handler?.({ type: "message_start", message: { role: "assistant" } });
type: "message_end",
message: { role: "assistant" },
});
handler?.({ handler?.({
type: "message_update", type: "message_update",
message: { role: "assistant" }, message: { role: "assistant" },
@@ -67,8 +64,7 @@ describe("subscribeEmbeddedPiSession", () => {
}, },
}); });
const secondPayload = onPartialReply.mock.calls[0][0]; expect(onPartialReply).not.toHaveBeenCalled();
expect(secondPayload.text).toContain("Oops no start");
}); });
it("does not require <final> when enforcement is off", () => { it("does not require <final> when enforcement is off", () => {

View File

@@ -36,11 +36,9 @@ import {
promoteThinkingTagsToBlocks, promoteThinkingTagsToBlocks,
} from "./pi-embedded-utils.js"; } from "./pi-embedded-utils.js";
const THINKING_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
const THINKING_OPEN_RE = /<\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
const THINKING_CLOSE_RE = /<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
const THINKING_TAG_SCAN_RE = const THINKING_TAG_SCAN_RE =
/<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi; /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
const FINAL_TAG_SCAN_RE = /<\s*(\/?)\s*final\s*>/gi;
const TOOL_RESULT_MAX_CHARS = 8000; const TOOL_RESULT_MAX_CHARS = 8000;
const log = createSubsystemLogger("agent/embedded"); const log = createSubsystemLogger("agent/embedded");
const RAW_STREAM_ENABLED = process.env.CLAWDBOT_RAW_STREAM === "1"; const RAW_STREAM_ENABLED = process.env.CLAWDBOT_RAW_STREAM === "1";
@@ -111,37 +109,6 @@ function isToolResultError(result: unknown): boolean {
return normalized === "error" || normalized === "timeout"; return normalized === "error" || normalized === "timeout";
} }
function stripThinkingSegments(text: string): string {
if (!text || !THINKING_TAG_RE.test(text)) return text;
THINKING_TAG_RE.lastIndex = 0;
let result = "";
let lastIndex = 0;
let inThinking = false;
for (const match of text.matchAll(THINKING_TAG_RE)) {
const idx = match.index ?? 0;
if (!inThinking) {
result += text.slice(lastIndex, idx);
}
const tag = match[0].toLowerCase();
inThinking = !tag.includes("/");
lastIndex = idx + match[0].length;
}
if (!inThinking) {
result += text.slice(lastIndex);
}
return result;
}
function stripUnpairedThinkingTags(text: string): string {
if (!text) return text;
const hasOpen = THINKING_OPEN_RE.test(text);
const hasClose = THINKING_CLOSE_RE.test(text);
if (hasOpen && hasClose) return text;
if (!hasOpen) return text.replace(THINKING_CLOSE_RE, "");
if (!hasClose) return text.replace(THINKING_OPEN_RE, "");
return text;
}
function extractMessagingToolSend( function extractMessagingToolSend(
toolName: string, toolName: string,
args: Record<string, unknown>, args: Record<string, unknown>,
@@ -226,7 +193,7 @@ export function subscribeEmbeddedPiSession(params: {
let deltaBuffer = ""; let deltaBuffer = "";
let blockBuffer = ""; let blockBuffer = "";
// Track if a streamed chunk opened a <think> block (stateful across chunks). // Track if a streamed chunk opened a <think> block (stateful across chunks).
let blockThinkingActive = false; const blockState = { thinking: false, final: false };
let lastStreamedAssistant: string | undefined; let lastStreamedAssistant: string | undefined;
let lastStreamedReasoning: string | undefined; let lastStreamedReasoning: string | undefined;
let lastBlockReplyText: string | undefined; let lastBlockReplyText: string | undefined;
@@ -242,7 +209,8 @@ export function subscribeEmbeddedPiSession(params: {
deltaBuffer = ""; deltaBuffer = "";
blockBuffer = ""; blockBuffer = "";
blockChunker?.reset(); blockChunker?.reset();
blockThinkingActive = false; blockState.thinking = false;
blockState.final = false;
lastStreamedAssistant = undefined; lastStreamedAssistant = undefined;
lastBlockReplyText = undefined; lastBlockReplyText = undefined;
lastStreamedReasoning = undefined; lastStreamedReasoning = undefined;
@@ -337,27 +305,6 @@ export function subscribeEmbeddedPiSession(params: {
compactionRetryPromise = null; compactionRetryPromise = null;
} }
}; };
const FINAL_START_RE = /<\s*final\s*>/i;
const FINAL_END_RE = /<\s*\/\s*final\s*>/i;
// Local providers sometimes emit malformed tags; normalize before filtering.
const sanitizeFinalText = (text: string): string => {
if (!text) return text;
const hasStart = FINAL_START_RE.test(text);
const hasEnd = FINAL_END_RE.test(text);
if (hasStart && !hasEnd) return text.replace(FINAL_START_RE, "");
if (!hasStart && hasEnd) return text.replace(FINAL_END_RE, "");
return text;
};
const extractFinalText = (text: string): string | undefined => {
const cleaned = sanitizeFinalText(text);
const startMatch = FINAL_START_RE.exec(cleaned);
if (!startMatch) return undefined;
const startIndex = startMatch.index + startMatch[0].length;
const afterStart = cleaned.slice(startIndex);
const endMatch = FINAL_END_RE.exec(afterStart);
const endIndex = endMatch ? endMatch.index : afterStart.length;
return afterStart.slice(0, endIndex);
};
const blockChunking = params.blockReplyChunking; const blockChunking = params.blockReplyChunking;
const blockChunker = blockChunking const blockChunker = blockChunking
@@ -385,34 +332,85 @@ export function subscribeEmbeddedPiSession(params: {
} }
}; };
const stripBlockThinkingSegments = (text: string): string => { const stripBlockTags = (
text: string,
state: { thinking: boolean; final: boolean },
): string => {
if (!text) return text; if (!text) return text;
if (!blockThinkingActive && !THINKING_TAG_SCAN_RE.test(text)) return text;
// 1. Handle <think> blocks (stateful, strip content inside)
let processed = "";
THINKING_TAG_SCAN_RE.lastIndex = 0; THINKING_TAG_SCAN_RE.lastIndex = 0;
let result = "";
let lastIndex = 0; let lastIndex = 0;
let inThinking = blockThinkingActive; let inThinking = state.thinking;
for (const match of text.matchAll(THINKING_TAG_SCAN_RE)) { for (const match of text.matchAll(THINKING_TAG_SCAN_RE)) {
const idx = match.index ?? 0; const idx = match.index ?? 0;
if (!inThinking) { if (!inThinking) {
result += text.slice(lastIndex, idx); processed += text.slice(lastIndex, idx);
} }
const isClose = match[1] === "/"; const isClose = match[1] === "/";
inThinking = !isClose; inThinking = !isClose;
lastIndex = idx + match[0].length; lastIndex = idx + match[0].length;
} }
if (!inThinking) { if (!inThinking) {
result += text.slice(lastIndex); processed += text.slice(lastIndex);
} }
blockThinkingActive = inThinking; state.thinking = inThinking;
return result;
// 2. Handle <final> blocks (stateful, strip content OUTSIDE)
// If enforcement is disabled, we still strip the tags themselves to prevent
// hallucinations (e.g. Minimax copying the style) from leaking, but we
// do not enforce buffering/extraction logic.
if (!params.enforceFinalTag) {
FINAL_TAG_SCAN_RE.lastIndex = 0;
return processed.replace(FINAL_TAG_SCAN_RE, "");
}
// If enforcement is enabled, only return text that appeared inside a <final> block.
let result = "";
FINAL_TAG_SCAN_RE.lastIndex = 0;
let lastFinalIndex = 0;
let inFinal = state.final;
let everInFinal = state.final;
for (const match of processed.matchAll(FINAL_TAG_SCAN_RE)) {
const idx = match.index ?? 0;
const isClose = match[1] === "/";
if (!inFinal && !isClose) {
// Found <final> start tag.
inFinal = true;
everInFinal = true;
lastFinalIndex = idx + match[0].length;
} else if (inFinal && isClose) {
// Found </final> end tag.
result += processed.slice(lastFinalIndex, idx);
inFinal = false;
lastFinalIndex = idx + match[0].length;
}
}
if (inFinal) {
result += processed.slice(lastFinalIndex);
}
state.final = inFinal;
// Strict Mode: If enforcing final tags, we MUST NOT return content unless
// we have seen a <final> tag. Otherwise, we leak "thinking out loud" text
// (e.g. "**Locating Manulife**...") that the model emitted without <think> tags.
if (!everInFinal) {
return "";
}
// Hardened Cleanup: Remove any remaining <final> tags that might have been
// missed (e.g. nested tags or hallucinations) to prevent leakage.
return result.replace(FINAL_TAG_SCAN_RE, "");
}; };
const emitBlockChunk = (text: string) => { const emitBlockChunk = (text: string) => {
if (suppressBlockChunks) return; if (suppressBlockChunks) return;
// Strip <think> blocks across chunk boundaries to avoid leaking reasoning. // Strip <think> and <final> blocks across chunk boundaries to avoid leaking reasoning.
const strippedText = stripBlockThinkingSegments(text); const chunk = stripBlockTags(text, blockState).trimEnd();
const chunk = strippedText.trimEnd();
if (!chunk) return; if (!chunk) return;
if (chunk === lastBlockReplyText) return; if (chunk === lastBlockReplyText) return;
@@ -754,12 +752,10 @@ export function subscribeEmbeddedPiSession(params: {
emitReasoningStream(extractThinkingFromTaggedStream(deltaBuffer)); emitReasoningStream(extractThinkingFromTaggedStream(deltaBuffer));
} }
const cleaned = params.enforceFinalTag const next = stripBlockTags(deltaBuffer, {
? stripThinkingSegments(stripUnpairedThinkingTags(deltaBuffer)) thinking: false,
: stripThinkingSegments(deltaBuffer); final: false,
const next = params.enforceFinalTag }).trim();
? (extractFinalText(cleaned)?.trim() ?? cleaned.trim())
: cleaned.trim();
if (next && next !== lastStreamedAssistant) { if (next && next !== lastStreamedAssistant) {
lastStreamedAssistant = next; lastStreamedAssistant = next;
const { text: cleanedText, mediaUrls } = const { text: cleanedText, mediaUrls } =
@@ -822,13 +818,10 @@ export function subscribeEmbeddedPiSession(params: {
rawText, rawText,
rawThinking: extractAssistantThinking(assistantMessage), rawThinking: extractAssistantThinking(assistantMessage),
}); });
const cleaned = params.enforceFinalTag const text = stripBlockTags(rawText, {
? stripThinkingSegments(stripUnpairedThinkingTags(rawText)) thinking: false,
: stripThinkingSegments(rawText); final: false,
const baseText = });
params.enforceFinalTag && cleaned
? (extractFinalText(cleaned)?.trim() ?? cleaned)
: cleaned;
const rawThinking = const rawThinking =
includeReasoning || streamReasoning includeReasoning || streamReasoning
? extractAssistantThinking(assistantMessage) || ? extractAssistantThinking(assistantMessage) ||
@@ -837,7 +830,6 @@ export function subscribeEmbeddedPiSession(params: {
const formattedReasoning = rawThinking const formattedReasoning = rawThinking
? formatReasoningMessage(rawThinking) ? formatReasoningMessage(rawThinking)
: ""; : "";
const text = baseText;
const addedDuringMessage = const addedDuringMessage =
assistantTexts.length > assistantTextBaseline; assistantTexts.length > assistantTextBaseline;
@@ -919,7 +911,8 @@ export function subscribeEmbeddedPiSession(params: {
deltaBuffer = ""; deltaBuffer = "";
blockBuffer = ""; blockBuffer = "";
blockChunker?.reset(); blockChunker?.reset();
blockThinkingActive = false; blockState.thinking = false;
blockState.final = false;
lastStreamedAssistant = undefined; lastStreamedAssistant = undefined;
} }
} }
@@ -1001,7 +994,8 @@ export function subscribeEmbeddedPiSession(params: {
blockBuffer = ""; blockBuffer = "";
} }
} }
blockThinkingActive = false; blockState.thinking = false;
blockState.final = false;
if (pendingCompactionRetry > 0) { if (pendingCompactionRetry > 0) {
resolveCompactionRetry(); resolveCompactionRetry();
} else { } else {

View File

@@ -40,6 +40,7 @@ import {
import { normalizeMainKey } from "../routing/session-key.js"; import { normalizeMainKey } from "../routing/session-key.js";
import { defaultRuntime } from "../runtime.js"; import { defaultRuntime } from "../runtime.js";
import { INTERNAL_MESSAGE_PROVIDER } from "../utils/message-provider.js"; import { INTERNAL_MESSAGE_PROVIDER } from "../utils/message-provider.js";
import { isReasoningTagProvider } from "../utils/provider-utils.js";
import { resolveCommandAuthorization } from "./command-auth.js"; import { resolveCommandAuthorization } from "./command-auth.js";
import { hasControlCommand } from "./command-detection.js"; import { hasControlCommand } from "./command-detection.js";
import { import {
@@ -1155,6 +1156,7 @@ export async function getReplyFromConfig(
resolvedQueue.mode === "collect" || resolvedQueue.mode === "collect" ||
resolvedQueue.mode === "steer-backlog"; resolvedQueue.mode === "steer-backlog";
const authProfileId = sessionEntry?.authProfileOverride; const authProfileId = sessionEntry?.authProfileOverride;
const followupRun = { const followupRun = {
prompt: queuedBody, prompt: queuedBody,
messageId: sessionCtx.MessageSid, messageId: sessionCtx.MessageSid,
@@ -1193,7 +1195,7 @@ export async function getReplyFromConfig(
ownerNumbers: ownerNumbers:
command.ownerList.length > 0 ? command.ownerList : undefined, command.ownerList.length > 0 ? command.ownerList : undefined,
extraSystemPrompt: extraSystemPrompt || undefined, extraSystemPrompt: extraSystemPrompt || undefined,
...(provider === "ollama" ? { enforceFinalTag: true } : {}), ...(isReasoningTagProvider(provider) ? { enforceFinalTag: true } : {}),
}, },
}; };

View File

@@ -0,0 +1,36 @@
/**
* Utility functions for provider-specific logic and capabilities.
*/
/**
* Returns true if the provider requires reasoning to be wrapped in tags
* (e.g. <think> and <final>) in the text stream, rather than using native
* API fields for reasoning/thinking.
*/
export function isReasoningTagProvider(
provider: string | undefined | null,
): boolean {
if (!provider) return false;
const normalized = provider.trim().toLowerCase();
// Check for exact matches or known prefixes/substrings for reasoning providers
if (
normalized === "ollama" ||
normalized === "google-gemini-cli" ||
normalized === "google-generative-ai"
) {
return true;
}
// Handle google-antigravity and its model variations (e.g. google-antigravity/gemini-3)
if (normalized.includes("google-antigravity")) {
return true;
}
// Handle Minimax (M2.1 is chatty/reasoning-like)
if (normalized.includes("minimax")) {
return true;
}
return false;
}