From 2bd9e84851b7169b9909ee6b7d0326cd77415f05 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 15 Jan 2026 05:57:44 +0000 Subject: [PATCH] fix(agents): strip tool leak text (#905) Thanks @erikpr1994. Co-authored-by: Erik Pastor Rios --- CHANGELOG.md | 1 + src/agents/pi-embedded-utils.test.ts | 37 ++++++++ src/agents/pi-embedded-utils.ts | 127 +++++++++++++++++++++++++-- 3 files changed, 159 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75f140711..419acfe99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ - Browser: add tests for snapshot labels/efficient query params and labeled image responses. - Telegram: register dock native commands with underscores to avoid `BOT_COMMAND_INVALID` (#929, fixes #901) — thanks @grp06. - Google: downgrade unsigned thinking blocks before send to avoid missing signature errors. +- Agents: strip downgraded tool call text without eating adjacent replies and filter thinking-tag leaks. (#905) — thanks @erikpr1994. - Agents: cap tool call IDs for OpenAI/OpenRouter to avoid request rejections. (#875) — thanks @j1philli. - Doctor: avoid re-adding WhatsApp config when only legacy ack reactions are set. (#927, fixes #900) — thanks @grp06. - Agents: scrub tuple `items` schemas for Gemini tool calls. (#926, fixes #746) — thanks @grp06. diff --git a/src/agents/pi-embedded-utils.test.ts b/src/agents/pi-embedded-utils.test.ts index a3d322757..92717a2a7 100644 --- a/src/agents/pi-embedded-utils.test.ts +++ b/src/agents/pi-embedded-utils.test.ts @@ -329,6 +329,27 @@ Arguments: { "action": "act", "request": "click button" }`, expect(result).toBe("Let me check that for you."); }); + it("preserves trailing text after downgraded tool call blocks", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: `Intro text. +[Tool Call: read (ID: toolu_1)] +Arguments: { + "path": "/tmp/file.txt" +} +Back to the user.`, + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe("Intro text.\nBack to the user."); + }); + it("handles multiple text blocks with tool calls and results", () => { const msg: AssistantMessage = { role: "assistant", @@ -375,6 +396,22 @@ File contents here`, expect(result).toBe("Aquí está tu respuesta."); }); + it("strips thinking tags with attributes", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: `HiddenVisible`, + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe("Visible"); + }); + it("strips thinking tags without closing tag", () => { const msg: AssistantMessage = { role: "assistant", diff --git a/src/agents/pi-embedded-utils.ts b/src/agents/pi-embedded-utils.ts index d3ccd4485..814221a28 100644 --- a/src/agents/pi-embedded-utils.ts +++ b/src/agents/pi-embedded-utils.ts @@ -31,12 +31,127 @@ function stripDowngradedToolCallText(text: string): string { if (!text) return text; if (!/\[Tool (?:Call|Result)/i.test(text)) return text; + const consumeJsonish = ( + input: string, + start: number, + options?: { allowLeadingNewlines?: boolean }, + ): number | null => { + const { allowLeadingNewlines = false } = options ?? {}; + let index = start; + while (index < input.length) { + const ch = input[index]; + if (ch === " " || ch === "\t") { + index += 1; + continue; + } + if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) { + index += 1; + continue; + } + break; + } + if (index >= input.length) return null; + + const startChar = input[index]; + if (startChar === "{" || startChar === "[") { + let depth = 0; + let inString = false; + let escape = false; + for (let i = index; i < input.length; i += 1) { + const ch = input[i]; + if (inString) { + if (escape) { + escape = false; + } else if (ch === "\\") { + escape = true; + } else if (ch === "\"") { + inString = false; + } + continue; + } + if (ch === "\"") { + inString = true; + continue; + } + if (ch === "{" || ch === "[") { + depth += 1; + continue; + } + if (ch === "}" || ch === "]") { + depth -= 1; + if (depth === 0) return i + 1; + } + } + return null; + } + + if (startChar === "\"") { + let escape = false; + for (let i = index + 1; i < input.length; i += 1) { + const ch = input[i]; + if (escape) { + escape = false; + continue; + } + if (ch === "\\") { + escape = true; + continue; + } + if (ch === "\"") return i + 1; + } + return null; + } + + let end = index; + while (end < input.length && input[end] !== "\n" && input[end] !== "\r") { + end += 1; + } + return end; + }; + + const stripToolCalls = (input: string): string => { + const markerRe = /\[Tool Call:[^\]]*\]/gi; + let result = ""; + let cursor = 0; + for (const match of input.matchAll(markerRe)) { + const start = match.index ?? 0; + if (start < cursor) continue; + result += input.slice(cursor, start); + let index = start + match[0].length; + while (index < input.length && (input[index] === " " || input[index] === "\t")) { + index += 1; + } + if (input[index] === "\r") { + index += 1; + if (input[index] === "\n") index += 1; + } else if (input[index] === "\n") { + index += 1; + } + while (index < input.length && (input[index] === " " || input[index] === "\t")) { + index += 1; + } + if (input.slice(index, index + 9).toLowerCase() === "arguments") { + index += 9; + if (input[index] === ":") index += 1; + if (input[index] === " ") index += 1; + const end = consumeJsonish(input, index, { allowLeadingNewlines: true }); + if (end !== null) index = end; + } + if ( + (input[index] === "\n" || input[index] === "\r") && + (result.endsWith("\n") || result.endsWith("\r") || result.length === 0) + ) { + if (input[index] === "\r") index += 1; + if (input[index] === "\n") index += 1; + } + cursor = index; + } + result += input.slice(cursor); + return result; + }; + // Remove [Tool Call: name (ID: ...)] blocks and their Arguments. - // Match until the next [Tool marker or end of string. - let cleaned = text.replace( - /\[Tool Call:[^\]]*\]\n?(?:Arguments:[\s\S]*?)?(?=\n*\[Tool |\n*$)/gi, - "", - ); + let cleaned = stripToolCalls(text); // Remove [Tool Result for ID ...] blocks and their content. cleaned = cleaned.replace( @@ -57,7 +172,7 @@ function stripThinkingTagsFromText(text: string): string { // Quick check to avoid regex overhead when no tags present. if (!/(?:think(?:ing)?|thought|antthinking)/i.test(text)) return text; - const tagRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi; + const tagRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi; let result = ""; let lastIndex = 0; let inThinking = false;