fix: enforce final tag gating (#754) (thanks @mcinteerj)
This commit is contained in:
@@ -93,6 +93,7 @@
|
|||||||
- CLI/Update: preserve base environment when passing overrides to update subprocesses. (#713) — thanks @danielz1z.
|
- CLI/Update: preserve base environment when passing overrides to update subprocesses. (#713) — thanks @danielz1z.
|
||||||
- Agents: treat message tool errors as failures so fallback replies still send; require `to` + `message` for `action=send`. (#717) — thanks @theglove44.
|
- Agents: treat message tool errors as failures so fallback replies still send; require `to` + `message` for `action=send`. (#717) — thanks @theglove44.
|
||||||
- Agents: preserve reasoning items on tool-only turns.
|
- Agents: preserve reasoning items on tool-only turns.
|
||||||
|
- Agents: enforce `<final>` gating for reasoning-tag providers to prevent tag/reasoning leaks. (#754) — thanks @mcinteerj.
|
||||||
- Agents/Subagents: wait for completion before announcing, align wait timeout with run timeout, and make announce prompts more emphatic.
|
- Agents/Subagents: wait for completion before announcing, align wait timeout with run timeout, and make announce prompts more emphatic.
|
||||||
- Agents: route subagent transcripts to the target agent sessions directory and add regression coverage. (#708) — thanks @xMikeMickelson.
|
- Agents: route subagent transcripts to the target agent sessions directory and add regression coverage. (#708) — thanks @xMikeMickelson.
|
||||||
- Agents/Tools: preserve action enums when flattening tool schemas. (#708) — thanks @xMikeMickelson.
|
- Agents/Tools: preserve action enums when flattening tool schemas. (#708) — thanks @xMikeMickelson.
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ describe("subscribeEmbeddedPiSession", () => {
|
|||||||
{ tag: "thought", open: "<thought>", close: "</thought>" },
|
{ tag: "thought", open: "<thought>", close: "</thought>" },
|
||||||
{ tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
|
{ tag: "antthinking", open: "<antthinking>", close: "</antthinking>" },
|
||||||
] as const;
|
] as const;
|
||||||
it("filters to <final> and falls back when tags are malformed", () => {
|
it("filters to <final> and suppresses output without a start tag", () => {
|
||||||
let handler: ((evt: unknown) => void) | undefined;
|
let handler: ((evt: unknown) => void) | undefined;
|
||||||
const session: StubSession = {
|
const session: StubSession = {
|
||||||
subscribe: (fn) => {
|
subscribe: (fn) => {
|
||||||
@@ -38,6 +38,7 @@ describe("subscribeEmbeddedPiSession", () => {
|
|||||||
onAgentEvent,
|
onAgentEvent,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
handler?.({ type: "message_start", message: { role: "assistant" } });
|
||||||
handler?.({
|
handler?.({
|
||||||
type: "message_update",
|
type: "message_update",
|
||||||
message: { role: "assistant" },
|
message: { role: "assistant" },
|
||||||
@@ -53,11 +54,7 @@ describe("subscribeEmbeddedPiSession", () => {
|
|||||||
|
|
||||||
onPartialReply.mockReset();
|
onPartialReply.mockReset();
|
||||||
|
|
||||||
handler?.({
|
handler?.({ type: "message_start", message: { role: "assistant" } });
|
||||||
type: "message_end",
|
|
||||||
message: { role: "assistant" },
|
|
||||||
});
|
|
||||||
|
|
||||||
handler?.({
|
handler?.({
|
||||||
type: "message_update",
|
type: "message_update",
|
||||||
message: { role: "assistant" },
|
message: { role: "assistant" },
|
||||||
@@ -67,8 +64,7 @@ describe("subscribeEmbeddedPiSession", () => {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const secondPayload = onPartialReply.mock.calls[0][0];
|
expect(onPartialReply).not.toHaveBeenCalled();
|
||||||
expect(secondPayload.text).toContain("Oops no start");
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("does not require <final> when enforcement is off", () => {
|
it("does not require <final> when enforcement is off", () => {
|
||||||
|
|||||||
@@ -36,9 +36,6 @@ import {
|
|||||||
promoteThinkingTagsToBlocks,
|
promoteThinkingTagsToBlocks,
|
||||||
} from "./pi-embedded-utils.js";
|
} from "./pi-embedded-utils.js";
|
||||||
|
|
||||||
const THINKING_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
|
|
||||||
const THINKING_OPEN_RE = /<\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
|
|
||||||
const THINKING_CLOSE_RE = /<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
|
|
||||||
const THINKING_TAG_SCAN_RE =
|
const THINKING_TAG_SCAN_RE =
|
||||||
/<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
|
/<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
|
||||||
const FINAL_TAG_SCAN_RE = /<\s*(\/?)\s*final\s*>/gi;
|
const FINAL_TAG_SCAN_RE = /<\s*(\/?)\s*final\s*>/gi;
|
||||||
@@ -182,11 +179,6 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
}) => void;
|
}) => void;
|
||||||
enforceFinalTag?: boolean;
|
enforceFinalTag?: boolean;
|
||||||
}) {
|
}) {
|
||||||
if (params.enforceFinalTag) {
|
|
||||||
log.debug("subscribeEmbeddedPiSession: enforceFinalTag is ENABLED");
|
|
||||||
} else {
|
|
||||||
log.debug("subscribeEmbeddedPiSession: enforceFinalTag is DISABLED");
|
|
||||||
}
|
|
||||||
const assistantTexts: string[] = [];
|
const assistantTexts: string[] = [];
|
||||||
const toolMetas: Array<{ toolName?: string; meta?: string }> = [];
|
const toolMetas: Array<{ toolName?: string; meta?: string }> = [];
|
||||||
const toolMetaById = new Map<string, string | undefined>();
|
const toolMetaById = new Map<string, string | undefined>();
|
||||||
@@ -202,7 +194,6 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
let blockBuffer = "";
|
let blockBuffer = "";
|
||||||
// Track if a streamed chunk opened a <think> block (stateful across chunks).
|
// Track if a streamed chunk opened a <think> block (stateful across chunks).
|
||||||
const blockState = { thinking: false, final: false };
|
const blockState = { thinking: false, final: false };
|
||||||
const streamState = { thinking: false, final: false };
|
|
||||||
let lastStreamedAssistant: string | undefined;
|
let lastStreamedAssistant: string | undefined;
|
||||||
let lastStreamedReasoning: string | undefined;
|
let lastStreamedReasoning: string | undefined;
|
||||||
let lastBlockReplyText: string | undefined;
|
let lastBlockReplyText: string | undefined;
|
||||||
@@ -220,8 +211,6 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
blockChunker?.reset();
|
blockChunker?.reset();
|
||||||
blockState.thinking = false;
|
blockState.thinking = false;
|
||||||
blockState.final = false;
|
blockState.final = false;
|
||||||
streamState.thinking = false;
|
|
||||||
streamState.final = false;
|
|
||||||
lastStreamedAssistant = undefined;
|
lastStreamedAssistant = undefined;
|
||||||
lastBlockReplyText = undefined;
|
lastBlockReplyText = undefined;
|
||||||
lastStreamedReasoning = undefined;
|
lastStreamedReasoning = undefined;
|
||||||
@@ -373,6 +362,7 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
// hallucinations (e.g. Minimax copying the style) from leaking, but we
|
// hallucinations (e.g. Minimax copying the style) from leaking, but we
|
||||||
// do not enforce buffering/extraction logic.
|
// do not enforce buffering/extraction logic.
|
||||||
if (!params.enforceFinalTag) {
|
if (!params.enforceFinalTag) {
|
||||||
|
FINAL_TAG_SCAN_RE.lastIndex = 0;
|
||||||
return processed.replace(FINAL_TAG_SCAN_RE, "");
|
return processed.replace(FINAL_TAG_SCAN_RE, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -405,19 +395,6 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
}
|
}
|
||||||
state.final = inFinal;
|
state.final = inFinal;
|
||||||
|
|
||||||
// Log the result of the stripping for debugging purposes
|
|
||||||
if (params.enforceFinalTag && (everInFinal || processed.length > 0)) {
|
|
||||||
log.debug(JSON.stringify({
|
|
||||||
raw: processed.slice(0, 100),
|
|
||||||
stripped: result.slice(0, 100),
|
|
||||||
inFinal,
|
|
||||||
everInFinal,
|
|
||||||
rawLen: processed.length,
|
|
||||||
strippedLen: result.length,
|
|
||||||
tag: "DEBUG_STRIP"
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Strict Mode: If enforcing final tags, we MUST NOT return content unless
|
// Strict Mode: If enforcing final tags, we MUST NOT return content unless
|
||||||
// we have seen a <final> tag. Otherwise, we leak "thinking out loud" text
|
// we have seen a <final> tag. Otherwise, we leak "thinking out loud" text
|
||||||
// (e.g. "**Locating Manulife**...") that the model emitted without <think> tags.
|
// (e.g. "**Locating Manulife**...") that the model emitted without <think> tags.
|
||||||
@@ -936,8 +913,6 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
blockChunker?.reset();
|
blockChunker?.reset();
|
||||||
blockState.thinking = false;
|
blockState.thinking = false;
|
||||||
blockState.final = false;
|
blockState.final = false;
|
||||||
streamState.thinking = false;
|
|
||||||
streamState.final = false;
|
|
||||||
lastStreamedAssistant = undefined;
|
lastStreamedAssistant = undefined;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1021,8 +996,6 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
}
|
}
|
||||||
blockState.thinking = false;
|
blockState.thinking = false;
|
||||||
blockState.final = false;
|
blockState.final = false;
|
||||||
streamState.thinking = false;
|
|
||||||
streamState.final = false;
|
|
||||||
if (pendingCompactionRetry > 0) {
|
if (pendingCompactionRetry > 0) {
|
||||||
resolveCompactionRetry();
|
resolveCompactionRetry();
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -1156,9 +1156,6 @@ export async function getReplyFromConfig(
|
|||||||
resolvedQueue.mode === "collect" ||
|
resolvedQueue.mode === "collect" ||
|
||||||
resolvedQueue.mode === "steer-backlog";
|
resolvedQueue.mode === "steer-backlog";
|
||||||
const authProfileId = sessionEntry?.authProfileOverride;
|
const authProfileId = sessionEntry?.authProfileOverride;
|
||||||
// DEBUG: Check provider for reasoning tag
|
|
||||||
const shouldEnforce = isReasoningTagProvider(provider);
|
|
||||||
// defaultRuntime.log(`[DEBUG] reply.ts: provider='${provider}', isReasoningTagProvider=${shouldEnforce}`);
|
|
||||||
|
|
||||||
const followupRun = {
|
const followupRun = {
|
||||||
prompt: queuedBody,
|
prompt: queuedBody,
|
||||||
@@ -1198,15 +1195,7 @@ export async function getReplyFromConfig(
|
|||||||
ownerNumbers:
|
ownerNumbers:
|
||||||
command.ownerList.length > 0 ? command.ownerList : undefined,
|
command.ownerList.length > 0 ? command.ownerList : undefined,
|
||||||
extraSystemPrompt: extraSystemPrompt || undefined,
|
extraSystemPrompt: extraSystemPrompt || undefined,
|
||||||
...(isReasoningTagProvider(provider)
|
...(isReasoningTagProvider(provider) ? { enforceFinalTag: true } : {}),
|
||||||
? (() => {
|
|
||||||
logVerbose(`[reply] Enforcing final tag for provider: ${provider}`);
|
|
||||||
return { enforceFinalTag: true };
|
|
||||||
})()
|
|
||||||
: (() => {
|
|
||||||
logVerbose(`[reply] NOT enforcing final tag for provider: ${provider}`);
|
|
||||||
return {};
|
|
||||||
})()),
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -7,10 +7,12 @@
|
|||||||
* (e.g. <think> and <final>) in the text stream, rather than using native
|
* (e.g. <think> and <final>) in the text stream, rather than using native
|
||||||
* API fields for reasoning/thinking.
|
* API fields for reasoning/thinking.
|
||||||
*/
|
*/
|
||||||
export function isReasoningTagProvider(provider: string | undefined | null): boolean {
|
export function isReasoningTagProvider(
|
||||||
|
provider: string | undefined | null,
|
||||||
|
): boolean {
|
||||||
if (!provider) return false;
|
if (!provider) return false;
|
||||||
const normalized = provider.trim().toLowerCase();
|
const normalized = provider.trim().toLowerCase();
|
||||||
|
|
||||||
// Check for exact matches or known prefixes/substrings for reasoning providers
|
// Check for exact matches or known prefixes/substrings for reasoning providers
|
||||||
if (
|
if (
|
||||||
normalized === "ollama" ||
|
normalized === "ollama" ||
|
||||||
@@ -19,7 +21,7 @@ export function isReasoningTagProvider(provider: string | undefined | null): boo
|
|||||||
) {
|
) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle google-antigravity and its model variations (e.g. google-antigravity/gemini-3)
|
// Handle google-antigravity and its model variations (e.g. google-antigravity/gemini-3)
|
||||||
if (normalized.includes("google-antigravity")) {
|
if (normalized.includes("google-antigravity")) {
|
||||||
return true;
|
return true;
|
||||||
@@ -29,6 +31,6 @@ export function isReasoningTagProvider(provider: string | undefined | null): boo
|
|||||||
if (normalized.includes("minimax")) {
|
if (normalized.includes("minimax")) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user