diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c33025aa..76377ecda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,22 @@ #### Gateway / Daemon / Sessions - Gateway: forward termination signals to respawned CLI child processes to avoid orphaned systemd runs. (#933) — thanks @roshanasingh4. - Gateway/UI: ship session defaults in the hello snapshot so the Control UI canonicalizes main session keys (no bare `main` alias). +- Agents: skip thinking/final tag stripping inside Markdown code spans. (#939) — thanks @ngutman. +- Browser: add tests for snapshot labels/efficient query params and labeled image responses. +- macOS: ensure launchd log directory exists with a test-only override. (#909) — thanks @roshanasingh4. +- macOS: format ConnectionsStore config to satisfy SwiftFormat lint. (#852) — thanks @mneves75. +- Packaging: run `pnpm build` on `prepack` so npm publishes include fresh `dist/` output. +- Telegram: register dock native commands with underscores to avoid `BOT_COMMAND_INVALID` (#929, fixes #901) — thanks @grp06. +- Google: downgrade unsigned thinking blocks before send to avoid missing signature errors. +- Agents: make user time zone and 24-hour time explicit in the system prompt. (#859) — thanks @CashWilliams. +- Agents: strip downgraded tool call text without eating adjacent replies and filter thinking-tag leaks. (#905) — thanks @erikpr1994. +- Agents: cap tool call IDs for OpenAI/OpenRouter to avoid request rejections. (#875) — thanks @j1philli. +- Doctor: avoid re-adding WhatsApp config when only legacy ack reactions are set. (#927, fixes #900) — thanks @grp06. +- Agents: scrub tuple `items` schemas for Gemini tool calls. (#926, fixes #746) — thanks @grp06. +- Agents: stabilize sub-agent announce status from runtime outcomes and normalize Result/Notes. (#835) — thanks @roshanasingh4. +- Apps: use canonical main session keys from gateway defaults across macOS/iOS/Android to avoid creating bare `main` sessions. +- Embedded runner: suppress raw API error payloads from replies. (#924) — thanks @grp06. +- Auth: normalize Claude Code CLI profile mode to oauth and auto-migrate config. (#855) — thanks @sebslight. - Daemon: clear persisted launchd disabled state before bootstrap (fixes `daemon install` after uninstall). (#849) — thanks @ndraiman. - Sessions: return deep clones (`structuredClone`) so cached session entries can't be mutated. (#934) — thanks @ronak-guliani. - Heartbeat: keep `updatedAt` monotonic when restoring heartbeat sessions. (#934) — thanks @ronak-guliani. diff --git a/src/agents/pi-embedded-subscribe.code-span-awareness.test.ts b/src/agents/pi-embedded-subscribe.code-span-awareness.test.ts new file mode 100644 index 000000000..f74a579ef --- /dev/null +++ b/src/agents/pi-embedded-subscribe.code-span-awareness.test.ts @@ -0,0 +1,103 @@ +import { describe, expect, it, vi } from "vitest"; +import { subscribeEmbeddedPiSession } from "./pi-embedded-subscribe.js"; + +type StubSession = { + subscribe: (fn: (evt: unknown) => void) => () => void; +}; + +describe("subscribeEmbeddedPiSession thinking tag code span awareness", () => { + it("does not strip thinking tags inside inline code backticks", () => { + let handler: ((evt: unknown) => void) | undefined; + const session: StubSession = { + subscribe: (fn) => { + handler = fn; + return () => {}; + }, + }; + + const onPartialReply = vi.fn(); + + subscribeEmbeddedPiSession({ + session: session as unknown as Parameters[0]["session"], + runId: "run", + onPartialReply, + }); + + handler?.({ + type: "message_update", + message: { role: "assistant" }, + assistantMessageEvent: { + type: "text_delta", + delta: "The fix strips leaked `` tags from messages.", + }, + }); + + expect(onPartialReply).toHaveBeenCalled(); + const lastCall = onPartialReply.mock.calls[onPartialReply.mock.calls.length - 1]; + expect(lastCall[0].text).toContain("``"); + }); + + it("does not strip thinking tags inside fenced code blocks", () => { + let handler: ((evt: unknown) => void) | undefined; + const session: StubSession = { + subscribe: (fn) => { + handler = fn; + return () => {}; + }, + }; + + const onPartialReply = vi.fn(); + + subscribeEmbeddedPiSession({ + session: session as unknown as Parameters[0]["session"], + runId: "run", + onPartialReply, + }); + + handler?.({ + type: "message_update", + message: { role: "assistant" }, + assistantMessageEvent: { + type: "text_delta", + delta: "Example:\n ````\ncode example\n ````\nDone.", + }, + }); + + expect(onPartialReply).toHaveBeenCalled(); + const lastCall = onPartialReply.mock.calls[onPartialReply.mock.calls.length - 1]; + expect(lastCall[0].text).toContain("code example"); + }); + + it("still strips actual thinking tags outside code spans", () => { + let handler: ((evt: unknown) => void) | undefined; + const session: StubSession = { + subscribe: (fn) => { + handler = fn; + return () => {}; + }, + }; + + const onPartialReply = vi.fn(); + + subscribeEmbeddedPiSession({ + session: session as unknown as Parameters[0]["session"], + runId: "run", + onPartialReply, + }); + + handler?.({ + type: "message_update", + message: { role: "assistant" }, + assistantMessageEvent: { + type: "text_delta", + delta: "Hello internal thought world", + }, + }); + + expect(onPartialReply).toHaveBeenCalled(); + const lastCall = onPartialReply.mock.calls[onPartialReply.mock.calls.length - 1]; + expect(lastCall[0].text).not.toContain("internal thought"); + expect(lastCall[0].text).toContain("Hello"); + expect(lastCall[0].text).toContain("world"); + }); +}); diff --git a/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts b/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts index 53038b6ad..bd6a3b00d 100644 --- a/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts +++ b/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts @@ -1,6 +1,7 @@ import type { AgentEvent } from "@mariozechner/pi-agent-core"; import { emitAgentEvent } from "../infra/agent-events.js"; +import { createInlineCodeState } from "../markdown/code-spans.js"; import type { EmbeddedPiSubscribeContext } from "./pi-embedded-subscribe.handlers.types.js"; export function handleAgentStart(ctx: EmbeddedPiSubscribeContext) { @@ -75,6 +76,7 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) { ctx.state.blockState.thinking = false; ctx.state.blockState.final = false; + ctx.state.blockState.inlineCode = createInlineCodeState(); if (ctx.state.pendingCompactionRetry > 0) { ctx.resolveCompactionRetry(); diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.ts b/src/agents/pi-embedded-subscribe.handlers.messages.ts index 98f937141..9e17a1700 100644 --- a/src/agents/pi-embedded-subscribe.handlers.messages.ts +++ b/src/agents/pi-embedded-subscribe.handlers.messages.ts @@ -17,6 +17,7 @@ import { formatReasoningMessage, promoteThinkingTagsToBlocks, } from "./pi-embedded-utils.js"; +import { createInlineCodeState } from "../markdown/code-spans.js"; export function handleMessageStart( ctx: EmbeddedPiSubscribeContext, @@ -103,6 +104,7 @@ export function handleMessageUpdate( .stripBlockTags(ctx.state.deltaBuffer, { thinking: false, final: false, + inlineCode: createInlineCodeState(), }) .trim(); if (next && next !== ctx.state.lastStreamedAssistant) { @@ -240,5 +242,6 @@ export function handleMessageEnd( ctx.blockChunker?.reset(); ctx.state.blockState.thinking = false; ctx.state.blockState.final = false; + ctx.state.blockState.inlineCode = createInlineCodeState(); ctx.state.lastStreamedAssistant = undefined; } diff --git a/src/agents/pi-embedded-subscribe.handlers.types.ts b/src/agents/pi-embedded-subscribe.handlers.types.ts index 48963e3ff..1bc9c4af0 100644 --- a/src/agents/pi-embedded-subscribe.handlers.types.ts +++ b/src/agents/pi-embedded-subscribe.handlers.types.ts @@ -1,6 +1,7 @@ import type { AgentEvent, AgentMessage } from "@mariozechner/pi-agent-core"; import type { ReasoningLevel } from "../auto-reply/thinking.js"; +import type { InlineCodeState } from "../markdown/code-spans.js"; import type { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js"; import type { MessagingToolSend } from "./pi-embedded-messaging.js"; import type { @@ -27,7 +28,7 @@ export type EmbeddedPiSubscribeState = { deltaBuffer: string; blockBuffer: string; - blockState: { thinking: boolean; final: boolean }; + blockState: { thinking: boolean; final: boolean; inlineCode: InlineCodeState }; lastStreamedAssistant?: string; lastStreamedReasoning?: string; lastBlockReplyText?: string; @@ -56,7 +57,10 @@ export type EmbeddedPiSubscribeContext = { shouldEmitToolResult: () => boolean; emitToolSummary: (toolName?: string, meta?: string) => void; - stripBlockTags: (text: string, state: { thinking: boolean; final: boolean }) => string; + stripBlockTags: ( + text: string, + state: { thinking: boolean; final: boolean; inlineCode?: InlineCodeState }, + ) => string; emitBlockChunk: (text: string) => void; flushBlockReplyBuffer: () => void; emitReasoningStream: (text: string) => void; diff --git a/src/agents/pi-embedded-subscribe.ts b/src/agents/pi-embedded-subscribe.ts index 0475a19f8..0de93e941 100644 --- a/src/agents/pi-embedded-subscribe.ts +++ b/src/agents/pi-embedded-subscribe.ts @@ -1,6 +1,8 @@ import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js"; import { formatToolAggregate } from "../auto-reply/tool-meta.js"; import { createSubsystemLogger } from "../logging.js"; +import type { InlineCodeState } from "../markdown/code-spans.js"; +import { buildCodeSpanIndex, createInlineCodeState } from "../markdown/code-spans.js"; import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js"; import { isMessagingToolDuplicateNormalized, @@ -38,7 +40,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar deltaBuffer: "", blockBuffer: "", // Track if a streamed chunk opened a block (stateful across chunks). - blockState: { thinking: false, final: false }, + blockState: { thinking: false, final: false, inlineCode: createInlineCodeState() }, lastStreamedAssistant: undefined, lastStreamedReasoning: undefined, lastBlockReplyText: undefined, @@ -72,6 +74,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar blockChunker?.reset(); state.blockState.thinking = false; state.blockState.final = false; + state.blockState.inlineCode = createInlineCodeState(); state.lastStreamedAssistant = undefined; state.lastBlockReplyText = undefined; state.lastStreamedReasoning = undefined; @@ -185,9 +188,15 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar } }; - const stripBlockTags = (text: string, state: { thinking: boolean; final: boolean }): string => { + const stripBlockTags = ( + text: string, + state: { thinking: boolean; final: boolean; inlineCode?: InlineCodeState }, + ): string => { if (!text) return text; + const inlineStateStart = state.inlineCode ?? createInlineCodeState(); + const codeSpans = buildCodeSpanIndex(text, inlineStateStart); + // 1. Handle blocks (stateful, strip content inside) let processed = ""; THINKING_TAG_SCAN_RE.lastIndex = 0; @@ -195,6 +204,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar let inThinking = state.thinking; for (const match of text.matchAll(THINKING_TAG_SCAN_RE)) { const idx = match.index ?? 0; + if (codeSpans.isInside(idx)) continue; if (!inThinking) { processed += text.slice(lastIndex, idx); } @@ -211,9 +221,11 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar // If enforcement is disabled, we still strip the tags themselves to prevent // hallucinations (e.g. Minimax copying the style) from leaking, but we // do not enforce buffering/extraction logic. + const finalCodeSpans = buildCodeSpanIndex(processed, inlineStateStart); if (!params.enforceFinalTag) { + state.inlineCode = finalCodeSpans.inlineState; FINAL_TAG_SCAN_RE.lastIndex = 0; - return processed.replace(FINAL_TAG_SCAN_RE, ""); + return stripTagsOutsideCodeSpans(processed, FINAL_TAG_SCAN_RE, finalCodeSpans.isInside); } // If enforcement is enabled, only return text that appeared inside a block. @@ -225,6 +237,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar for (const match of processed.matchAll(FINAL_TAG_SCAN_RE)) { const idx = match.index ?? 0; + if (finalCodeSpans.isInside(idx)) continue; const isClose = match[1] === "/"; if (!inFinal && !isClose) { @@ -254,7 +267,27 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar // Hardened Cleanup: Remove any remaining tags that might have been // missed (e.g. nested tags or hallucinations) to prevent leakage. - return result.replace(FINAL_TAG_SCAN_RE, ""); + const resultCodeSpans = buildCodeSpanIndex(result, inlineStateStart); + state.inlineCode = resultCodeSpans.inlineState; + return stripTagsOutsideCodeSpans(result, FINAL_TAG_SCAN_RE, resultCodeSpans.isInside); + }; + + const stripTagsOutsideCodeSpans = ( + text: string, + pattern: RegExp, + isInside: (index: number) => boolean, + ) => { + let output = ""; + let lastIndex = 0; + pattern.lastIndex = 0; + for (const match of text.matchAll(pattern)) { + const idx = match.index ?? 0; + if (isInside(idx)) continue; + output += text.slice(lastIndex, idx); + lastIndex = idx + match[0].length; + } + output += text.slice(lastIndex); + return output; }; const emitBlockChunk = (text: string) => { diff --git a/src/markdown/code-spans.ts b/src/markdown/code-spans.ts new file mode 100644 index 000000000..8644c90a0 --- /dev/null +++ b/src/markdown/code-spans.ts @@ -0,0 +1,108 @@ +import { parseFenceSpans, type FenceSpan } from "./fences.js"; + +export type InlineCodeState = { + open: boolean; + ticks: number; +}; + +export function createInlineCodeState(): InlineCodeState { + return { open: false, ticks: 0 }; +} + +type InlineCodeSpansResult = { + spans: Array<[number, number]>; + state: InlineCodeState; +}; + +export type CodeSpanIndex = { + inlineState: InlineCodeState; + isInside: (index: number) => boolean; +}; + +export function buildCodeSpanIndex( + text: string, + inlineState?: InlineCodeState, +): CodeSpanIndex { + const fenceSpans = parseFenceSpans(text); + const startState = inlineState + ? { open: inlineState.open, ticks: inlineState.ticks } + : createInlineCodeState(); + const { spans: inlineSpans, state: nextInlineState } = parseInlineCodeSpans( + text, + fenceSpans, + startState, + ); + + return { + inlineState: nextInlineState, + isInside: (index: number) => + isInsideFenceSpan(index, fenceSpans) || isInsideInlineSpan(index, inlineSpans), + }; +} + +function parseInlineCodeSpans( + text: string, + fenceSpans: FenceSpan[], + initialState: InlineCodeState, +): InlineCodeSpansResult { + const spans: Array<[number, number]> = []; + let open = initialState.open; + let ticks = initialState.ticks; + let openStart = open ? 0 : -1; + + let i = 0; + while (i < text.length) { + const fence = findFenceSpanAtInclusive(fenceSpans, i); + if (fence) { + i = fence.end; + continue; + } + + if (text[i] !== "`") { + i += 1; + continue; + } + + const runStart = i; + let runLength = 0; + while (i < text.length && text[i] === "`") { + runLength += 1; + i += 1; + } + + if (!open) { + open = true; + ticks = runLength; + openStart = runStart; + continue; + } + + if (runLength === ticks) { + spans.push([openStart, i]); + open = false; + ticks = 0; + openStart = -1; + } + } + + if (open) { + spans.push([openStart, text.length]); + } + + return { + spans, + state: { open, ticks }, + }; +} + +function findFenceSpanAtInclusive(spans: FenceSpan[], index: number): FenceSpan | undefined { + return spans.find((span) => index >= span.start && index < span.end); +} + +function isInsideFenceSpan(index: number, spans: FenceSpan[]): boolean { + return spans.some((span) => index >= span.start && index < span.end); +} + +function isInsideInlineSpan(index: number, spans: Array<[number, number]>): boolean { + return spans.some(([start, end]) => index >= start && index < end); +}