fix(agents): skip thinking tags in code spans
This commit is contained in:
16
CHANGELOG.md
16
CHANGELOG.md
@@ -61,6 +61,22 @@
|
||||
#### Gateway / Daemon / Sessions
|
||||
- Gateway: forward termination signals to respawned CLI child processes to avoid orphaned systemd runs. (#933) — thanks @roshanasingh4.
|
||||
- Gateway/UI: ship session defaults in the hello snapshot so the Control UI canonicalizes main session keys (no bare `main` alias).
|
||||
- Agents: skip thinking/final tag stripping inside Markdown code spans. (#939) — thanks @ngutman.
|
||||
- Browser: add tests for snapshot labels/efficient query params and labeled image responses.
|
||||
- macOS: ensure launchd log directory exists with a test-only override. (#909) — thanks @roshanasingh4.
|
||||
- macOS: format ConnectionsStore config to satisfy SwiftFormat lint. (#852) — thanks @mneves75.
|
||||
- Packaging: run `pnpm build` on `prepack` so npm publishes include fresh `dist/` output.
|
||||
- Telegram: register dock native commands with underscores to avoid `BOT_COMMAND_INVALID` (#929, fixes #901) — thanks @grp06.
|
||||
- Google: downgrade unsigned thinking blocks before send to avoid missing signature errors.
|
||||
- Agents: make user time zone and 24-hour time explicit in the system prompt. (#859) — thanks @CashWilliams.
|
||||
- Agents: strip downgraded tool call text without eating adjacent replies and filter thinking-tag leaks. (#905) — thanks @erikpr1994.
|
||||
- Agents: cap tool call IDs for OpenAI/OpenRouter to avoid request rejections. (#875) — thanks @j1philli.
|
||||
- Doctor: avoid re-adding WhatsApp config when only legacy ack reactions are set. (#927, fixes #900) — thanks @grp06.
|
||||
- Agents: scrub tuple `items` schemas for Gemini tool calls. (#926, fixes #746) — thanks @grp06.
|
||||
- Agents: stabilize sub-agent announce status from runtime outcomes and normalize Result/Notes. (#835) — thanks @roshanasingh4.
|
||||
- Apps: use canonical main session keys from gateway defaults across macOS/iOS/Android to avoid creating bare `main` sessions.
|
||||
- Embedded runner: suppress raw API error payloads from replies. (#924) — thanks @grp06.
|
||||
- Auth: normalize Claude Code CLI profile mode to oauth and auto-migrate config. (#855) — thanks @sebslight.
|
||||
- Daemon: clear persisted launchd disabled state before bootstrap (fixes `daemon install` after uninstall). (#849) — thanks @ndraiman.
|
||||
- Sessions: return deep clones (`structuredClone`) so cached session entries can't be mutated. (#934) — thanks @ronak-guliani.
|
||||
- Heartbeat: keep `updatedAt` monotonic when restoring heartbeat sessions. (#934) — thanks @ronak-guliani.
|
||||
|
||||
103
src/agents/pi-embedded-subscribe.code-span-awareness.test.ts
Normal file
103
src/agents/pi-embedded-subscribe.code-span-awareness.test.ts
Normal file
@@ -0,0 +1,103 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { subscribeEmbeddedPiSession } from "./pi-embedded-subscribe.js";
|
||||
|
||||
type StubSession = {
|
||||
subscribe: (fn: (evt: unknown) => void) => () => void;
|
||||
};
|
||||
|
||||
describe("subscribeEmbeddedPiSession thinking tag code span awareness", () => {
|
||||
it("does not strip thinking tags inside inline code backticks", () => {
|
||||
let handler: ((evt: unknown) => void) | undefined;
|
||||
const session: StubSession = {
|
||||
subscribe: (fn) => {
|
||||
handler = fn;
|
||||
return () => {};
|
||||
},
|
||||
};
|
||||
|
||||
const onPartialReply = vi.fn();
|
||||
|
||||
subscribeEmbeddedPiSession({
|
||||
session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
|
||||
runId: "run",
|
||||
onPartialReply,
|
||||
});
|
||||
|
||||
handler?.({
|
||||
type: "message_update",
|
||||
message: { role: "assistant" },
|
||||
assistantMessageEvent: {
|
||||
type: "text_delta",
|
||||
delta: "The fix strips leaked `<thinking>` tags from messages.",
|
||||
},
|
||||
});
|
||||
|
||||
expect(onPartialReply).toHaveBeenCalled();
|
||||
const lastCall = onPartialReply.mock.calls[onPartialReply.mock.calls.length - 1];
|
||||
expect(lastCall[0].text).toContain("`<thinking>`");
|
||||
});
|
||||
|
||||
it("does not strip thinking tags inside fenced code blocks", () => {
|
||||
let handler: ((evt: unknown) => void) | undefined;
|
||||
const session: StubSession = {
|
||||
subscribe: (fn) => {
|
||||
handler = fn;
|
||||
return () => {};
|
||||
},
|
||||
};
|
||||
|
||||
const onPartialReply = vi.fn();
|
||||
|
||||
subscribeEmbeddedPiSession({
|
||||
session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
|
||||
runId: "run",
|
||||
onPartialReply,
|
||||
});
|
||||
|
||||
handler?.({
|
||||
type: "message_update",
|
||||
message: { role: "assistant" },
|
||||
assistantMessageEvent: {
|
||||
type: "text_delta",
|
||||
delta: "Example:\n ````\n<thinking>code example</thinking>\n ````\nDone.",
|
||||
},
|
||||
});
|
||||
|
||||
expect(onPartialReply).toHaveBeenCalled();
|
||||
const lastCall = onPartialReply.mock.calls[onPartialReply.mock.calls.length - 1];
|
||||
expect(lastCall[0].text).toContain("<thinking>code example</thinking>");
|
||||
});
|
||||
|
||||
it("still strips actual thinking tags outside code spans", () => {
|
||||
let handler: ((evt: unknown) => void) | undefined;
|
||||
const session: StubSession = {
|
||||
subscribe: (fn) => {
|
||||
handler = fn;
|
||||
return () => {};
|
||||
},
|
||||
};
|
||||
|
||||
const onPartialReply = vi.fn();
|
||||
|
||||
subscribeEmbeddedPiSession({
|
||||
session: session as unknown as Parameters<typeof subscribeEmbeddedPiSession>[0]["session"],
|
||||
runId: "run",
|
||||
onPartialReply,
|
||||
});
|
||||
|
||||
handler?.({
|
||||
type: "message_update",
|
||||
message: { role: "assistant" },
|
||||
assistantMessageEvent: {
|
||||
type: "text_delta",
|
||||
delta: "Hello <thinking>internal thought</thinking> world",
|
||||
},
|
||||
});
|
||||
|
||||
expect(onPartialReply).toHaveBeenCalled();
|
||||
const lastCall = onPartialReply.mock.calls[onPartialReply.mock.calls.length - 1];
|
||||
expect(lastCall[0].text).not.toContain("internal thought");
|
||||
expect(lastCall[0].text).toContain("Hello");
|
||||
expect(lastCall[0].text).toContain("world");
|
||||
});
|
||||
});
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { AgentEvent } from "@mariozechner/pi-agent-core";
|
||||
|
||||
import { emitAgentEvent } from "../infra/agent-events.js";
|
||||
import { createInlineCodeState } from "../markdown/code-spans.js";
|
||||
import type { EmbeddedPiSubscribeContext } from "./pi-embedded-subscribe.handlers.types.js";
|
||||
|
||||
export function handleAgentStart(ctx: EmbeddedPiSubscribeContext) {
|
||||
@@ -75,6 +76,7 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) {
|
||||
|
||||
ctx.state.blockState.thinking = false;
|
||||
ctx.state.blockState.final = false;
|
||||
ctx.state.blockState.inlineCode = createInlineCodeState();
|
||||
|
||||
if (ctx.state.pendingCompactionRetry > 0) {
|
||||
ctx.resolveCompactionRetry();
|
||||
|
||||
@@ -17,6 +17,7 @@ import {
|
||||
formatReasoningMessage,
|
||||
promoteThinkingTagsToBlocks,
|
||||
} from "./pi-embedded-utils.js";
|
||||
import { createInlineCodeState } from "../markdown/code-spans.js";
|
||||
|
||||
export function handleMessageStart(
|
||||
ctx: EmbeddedPiSubscribeContext,
|
||||
@@ -103,6 +104,7 @@ export function handleMessageUpdate(
|
||||
.stripBlockTags(ctx.state.deltaBuffer, {
|
||||
thinking: false,
|
||||
final: false,
|
||||
inlineCode: createInlineCodeState(),
|
||||
})
|
||||
.trim();
|
||||
if (next && next !== ctx.state.lastStreamedAssistant) {
|
||||
@@ -240,5 +242,6 @@ export function handleMessageEnd(
|
||||
ctx.blockChunker?.reset();
|
||||
ctx.state.blockState.thinking = false;
|
||||
ctx.state.blockState.final = false;
|
||||
ctx.state.blockState.inlineCode = createInlineCodeState();
|
||||
ctx.state.lastStreamedAssistant = undefined;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { AgentEvent, AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
|
||||
import type { ReasoningLevel } from "../auto-reply/thinking.js";
|
||||
import type { InlineCodeState } from "../markdown/code-spans.js";
|
||||
import type { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";
|
||||
import type { MessagingToolSend } from "./pi-embedded-messaging.js";
|
||||
import type {
|
||||
@@ -27,7 +28,7 @@ export type EmbeddedPiSubscribeState = {
|
||||
|
||||
deltaBuffer: string;
|
||||
blockBuffer: string;
|
||||
blockState: { thinking: boolean; final: boolean };
|
||||
blockState: { thinking: boolean; final: boolean; inlineCode: InlineCodeState };
|
||||
lastStreamedAssistant?: string;
|
||||
lastStreamedReasoning?: string;
|
||||
lastBlockReplyText?: string;
|
||||
@@ -56,7 +57,10 @@ export type EmbeddedPiSubscribeContext = {
|
||||
|
||||
shouldEmitToolResult: () => boolean;
|
||||
emitToolSummary: (toolName?: string, meta?: string) => void;
|
||||
stripBlockTags: (text: string, state: { thinking: boolean; final: boolean }) => string;
|
||||
stripBlockTags: (
|
||||
text: string,
|
||||
state: { thinking: boolean; final: boolean; inlineCode?: InlineCodeState },
|
||||
) => string;
|
||||
emitBlockChunk: (text: string) => void;
|
||||
flushBlockReplyBuffer: () => void;
|
||||
emitReasoningStream: (text: string) => void;
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js";
|
||||
import { formatToolAggregate } from "../auto-reply/tool-meta.js";
|
||||
import { createSubsystemLogger } from "../logging.js";
|
||||
import type { InlineCodeState } from "../markdown/code-spans.js";
|
||||
import { buildCodeSpanIndex, createInlineCodeState } from "../markdown/code-spans.js";
|
||||
import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";
|
||||
import {
|
||||
isMessagingToolDuplicateNormalized,
|
||||
@@ -38,7 +40,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
|
||||
deltaBuffer: "",
|
||||
blockBuffer: "",
|
||||
// Track if a streamed chunk opened a <think> block (stateful across chunks).
|
||||
blockState: { thinking: false, final: false },
|
||||
blockState: { thinking: false, final: false, inlineCode: createInlineCodeState() },
|
||||
lastStreamedAssistant: undefined,
|
||||
lastStreamedReasoning: undefined,
|
||||
lastBlockReplyText: undefined,
|
||||
@@ -72,6 +74,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
|
||||
blockChunker?.reset();
|
||||
state.blockState.thinking = false;
|
||||
state.blockState.final = false;
|
||||
state.blockState.inlineCode = createInlineCodeState();
|
||||
state.lastStreamedAssistant = undefined;
|
||||
state.lastBlockReplyText = undefined;
|
||||
state.lastStreamedReasoning = undefined;
|
||||
@@ -185,9 +188,15 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
|
||||
}
|
||||
};
|
||||
|
||||
const stripBlockTags = (text: string, state: { thinking: boolean; final: boolean }): string => {
|
||||
const stripBlockTags = (
|
||||
text: string,
|
||||
state: { thinking: boolean; final: boolean; inlineCode?: InlineCodeState },
|
||||
): string => {
|
||||
if (!text) return text;
|
||||
|
||||
const inlineStateStart = state.inlineCode ?? createInlineCodeState();
|
||||
const codeSpans = buildCodeSpanIndex(text, inlineStateStart);
|
||||
|
||||
// 1. Handle <think> blocks (stateful, strip content inside)
|
||||
let processed = "";
|
||||
THINKING_TAG_SCAN_RE.lastIndex = 0;
|
||||
@@ -195,6 +204,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
|
||||
let inThinking = state.thinking;
|
||||
for (const match of text.matchAll(THINKING_TAG_SCAN_RE)) {
|
||||
const idx = match.index ?? 0;
|
||||
if (codeSpans.isInside(idx)) continue;
|
||||
if (!inThinking) {
|
||||
processed += text.slice(lastIndex, idx);
|
||||
}
|
||||
@@ -211,9 +221,11 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
|
||||
// If enforcement is disabled, we still strip the tags themselves to prevent
|
||||
// hallucinations (e.g. Minimax copying the style) from leaking, but we
|
||||
// do not enforce buffering/extraction logic.
|
||||
const finalCodeSpans = buildCodeSpanIndex(processed, inlineStateStart);
|
||||
if (!params.enforceFinalTag) {
|
||||
state.inlineCode = finalCodeSpans.inlineState;
|
||||
FINAL_TAG_SCAN_RE.lastIndex = 0;
|
||||
return processed.replace(FINAL_TAG_SCAN_RE, "");
|
||||
return stripTagsOutsideCodeSpans(processed, FINAL_TAG_SCAN_RE, finalCodeSpans.isInside);
|
||||
}
|
||||
|
||||
// If enforcement is enabled, only return text that appeared inside a <final> block.
|
||||
@@ -225,6 +237,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
|
||||
|
||||
for (const match of processed.matchAll(FINAL_TAG_SCAN_RE)) {
|
||||
const idx = match.index ?? 0;
|
||||
if (finalCodeSpans.isInside(idx)) continue;
|
||||
const isClose = match[1] === "/";
|
||||
|
||||
if (!inFinal && !isClose) {
|
||||
@@ -254,7 +267,27 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
|
||||
|
||||
// Hardened Cleanup: Remove any remaining <final> tags that might have been
|
||||
// missed (e.g. nested tags or hallucinations) to prevent leakage.
|
||||
return result.replace(FINAL_TAG_SCAN_RE, "");
|
||||
const resultCodeSpans = buildCodeSpanIndex(result, inlineStateStart);
|
||||
state.inlineCode = resultCodeSpans.inlineState;
|
||||
return stripTagsOutsideCodeSpans(result, FINAL_TAG_SCAN_RE, resultCodeSpans.isInside);
|
||||
};
|
||||
|
||||
const stripTagsOutsideCodeSpans = (
|
||||
text: string,
|
||||
pattern: RegExp,
|
||||
isInside: (index: number) => boolean,
|
||||
) => {
|
||||
let output = "";
|
||||
let lastIndex = 0;
|
||||
pattern.lastIndex = 0;
|
||||
for (const match of text.matchAll(pattern)) {
|
||||
const idx = match.index ?? 0;
|
||||
if (isInside(idx)) continue;
|
||||
output += text.slice(lastIndex, idx);
|
||||
lastIndex = idx + match[0].length;
|
||||
}
|
||||
output += text.slice(lastIndex);
|
||||
return output;
|
||||
};
|
||||
|
||||
const emitBlockChunk = (text: string) => {
|
||||
|
||||
108
src/markdown/code-spans.ts
Normal file
108
src/markdown/code-spans.ts
Normal file
@@ -0,0 +1,108 @@
|
||||
import { parseFenceSpans, type FenceSpan } from "./fences.js";
|
||||
|
||||
export type InlineCodeState = {
|
||||
open: boolean;
|
||||
ticks: number;
|
||||
};
|
||||
|
||||
export function createInlineCodeState(): InlineCodeState {
|
||||
return { open: false, ticks: 0 };
|
||||
}
|
||||
|
||||
type InlineCodeSpansResult = {
|
||||
spans: Array<[number, number]>;
|
||||
state: InlineCodeState;
|
||||
};
|
||||
|
||||
export type CodeSpanIndex = {
|
||||
inlineState: InlineCodeState;
|
||||
isInside: (index: number) => boolean;
|
||||
};
|
||||
|
||||
export function buildCodeSpanIndex(
|
||||
text: string,
|
||||
inlineState?: InlineCodeState,
|
||||
): CodeSpanIndex {
|
||||
const fenceSpans = parseFenceSpans(text);
|
||||
const startState = inlineState
|
||||
? { open: inlineState.open, ticks: inlineState.ticks }
|
||||
: createInlineCodeState();
|
||||
const { spans: inlineSpans, state: nextInlineState } = parseInlineCodeSpans(
|
||||
text,
|
||||
fenceSpans,
|
||||
startState,
|
||||
);
|
||||
|
||||
return {
|
||||
inlineState: nextInlineState,
|
||||
isInside: (index: number) =>
|
||||
isInsideFenceSpan(index, fenceSpans) || isInsideInlineSpan(index, inlineSpans),
|
||||
};
|
||||
}
|
||||
|
||||
function parseInlineCodeSpans(
|
||||
text: string,
|
||||
fenceSpans: FenceSpan[],
|
||||
initialState: InlineCodeState,
|
||||
): InlineCodeSpansResult {
|
||||
const spans: Array<[number, number]> = [];
|
||||
let open = initialState.open;
|
||||
let ticks = initialState.ticks;
|
||||
let openStart = open ? 0 : -1;
|
||||
|
||||
let i = 0;
|
||||
while (i < text.length) {
|
||||
const fence = findFenceSpanAtInclusive(fenceSpans, i);
|
||||
if (fence) {
|
||||
i = fence.end;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (text[i] !== "`") {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const runStart = i;
|
||||
let runLength = 0;
|
||||
while (i < text.length && text[i] === "`") {
|
||||
runLength += 1;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
if (!open) {
|
||||
open = true;
|
||||
ticks = runLength;
|
||||
openStart = runStart;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (runLength === ticks) {
|
||||
spans.push([openStart, i]);
|
||||
open = false;
|
||||
ticks = 0;
|
||||
openStart = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (open) {
|
||||
spans.push([openStart, text.length]);
|
||||
}
|
||||
|
||||
return {
|
||||
spans,
|
||||
state: { open, ticks },
|
||||
};
|
||||
}
|
||||
|
||||
function findFenceSpanAtInclusive(spans: FenceSpan[], index: number): FenceSpan | undefined {
|
||||
return spans.find((span) => index >= span.start && index < span.end);
|
||||
}
|
||||
|
||||
function isInsideFenceSpan(index: number, spans: FenceSpan[]): boolean {
|
||||
return spans.some((span) => index >= span.start && index < span.end);
|
||||
}
|
||||
|
||||
function isInsideInlineSpan(index: number, spans: Array<[number, number]>): boolean {
|
||||
return spans.some(([start, end]) => index >= start && index < end);
|
||||
}
|
||||
Reference in New Issue
Block a user