fix(agents): strip tool leak text (#905)

Thanks @erikpr1994. Co-authored-by: Erik Pastor Rios <erikpastorrios1994@gmail.com>
2026-01-15 05:57:44 +00:00
parent 5c2eedc340
commit 2bd9e84851
3 changed files with 159 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,7 @@
 - Browser: add tests for snapshot labels/efficient query params and labeled image responses.
 - Telegram: register dock native commands with underscores to avoid `BOT_COMMAND_INVALID` (#929, fixes #901) — thanks @grp06.
 - Google: downgrade unsigned thinking blocks before send to avoid missing signature errors.
+- Agents: strip downgraded tool call text without eating adjacent replies and filter thinking-tag leaks. (#905) — thanks @erikpr1994.
 - Agents: cap tool call IDs for OpenAI/OpenRouter to avoid request rejections. (#875) — thanks @j1philli.
 - Doctor: avoid re-adding WhatsApp config when only legacy ack reactions are set. (#927, fixes #900) — thanks @grp06.
 - Agents: scrub tuple `items` schemas for Gemini tool calls. (#926, fixes #746) — thanks @grp06.
--- a/src/agents/pi-embedded-utils.test.ts
+++ b/src/agents/pi-embedded-utils.test.ts
@@ -329,6 +329,27 @@ Arguments: { "action": "act", "request": "click button" }`,
    expect(result).toBe("Let me check that for you.");
  });

+  it("preserves trailing text after downgraded tool call blocks", () => {
+    const msg: AssistantMessage = {
+      role: "assistant",
+      content: [
+        {
+          type: "text",
+          text: `Intro text.
+[Tool Call: read (ID: toolu_1)]
+Arguments: {
+  "path": "/tmp/file.txt"
+}
+Back to the user.`,
+        },
+      ],
+      timestamp: Date.now(),
+    };
+
+    const result = extractAssistantText(msg);
+    expect(result).toBe("Intro text.\nBack to the user.");
+  });
+
  it("handles multiple text blocks with tool calls and results", () => {
    const msg: AssistantMessage = {
      role: "assistant",
@@ -375,6 +396,22 @@ File contents here`,
    expect(result).toBe("Aquí está tu respuesta.");
  });

+  it("strips thinking tags with attributes", () => {
+    const msg: AssistantMessage = {
+      role: "assistant",
+      content: [
+        {
+          type: "text",
+          text: `<think reason="deliberate">Hidden</think>Visible`,
+        },
+      ],
+      timestamp: Date.now(),
+    };
+
+    const result = extractAssistantText(msg);
+    expect(result).toBe("Visible");
+  });
+
  it("strips thinking tags without closing tag", () => {
    const msg: AssistantMessage = {
      role: "assistant",
--- a/src/agents/pi-embedded-utils.ts
+++ b/src/agents/pi-embedded-utils.ts
@@ -31,12 +31,127 @@ function stripDowngradedToolCallText(text: string): string {
  if (!text) return text;
  if (!/\[Tool (?:Call|Result)/i.test(text)) return text;

+  const consumeJsonish = (
+    input: string,
+    start: number,
+    options?: { allowLeadingNewlines?: boolean },
+  ): number | null => {
+    const { allowLeadingNewlines = false } = options ?? {};
+    let index = start;
+    while (index < input.length) {
+      const ch = input[index];
+      if (ch === " " || ch === "\t") {
+        index += 1;
+        continue;
+      }
+      if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) {
+        index += 1;
+        continue;
+      }
+      break;
+    }
+    if (index >= input.length) return null;
+
+    const startChar = input[index];
+    if (startChar === "{" || startChar === "[") {
+      let depth = 0;
+      let inString = false;
+      let escape = false;
+      for (let i = index; i < input.length; i += 1) {
+        const ch = input[i];
+        if (inString) {
+          if (escape) {
+            escape = false;
+          } else if (ch === "\\") {
+            escape = true;
+          } else if (ch === "\"") {
+            inString = false;
+          }
+          continue;
+        }
+        if (ch === "\"") {
+          inString = true;
+          continue;
+        }
+        if (ch === "{" || ch === "[") {
+          depth += 1;
+          continue;
+        }
+        if (ch === "}" || ch === "]") {
+          depth -= 1;
+          if (depth === 0) return i + 1;
+        }
+      }
+      return null;
+    }
+
+    if (startChar === "\"") {
+      let escape = false;
+      for (let i = index + 1; i < input.length; i += 1) {
+        const ch = input[i];
+        if (escape) {
+          escape = false;
+          continue;
+        }
+        if (ch === "\\") {
+          escape = true;
+          continue;
+        }
+        if (ch === "\"") return i + 1;
+      }
+      return null;
+    }
+
+    let end = index;
+    while (end < input.length && input[end] !== "\n" && input[end] !== "\r") {
+      end += 1;
+    }
+    return end;
+  };
+
+  const stripToolCalls = (input: string): string => {
+    const markerRe = /\[Tool Call:[^\]]*\]/gi;
+    let result = "";
+    let cursor = 0;
+    for (const match of input.matchAll(markerRe)) {
+      const start = match.index ?? 0;
+      if (start < cursor) continue;
+      result += input.slice(cursor, start);
+      let index = start + match[0].length;
+      while (index < input.length && (input[index] === " " || input[index] === "\t")) {
+        index += 1;
+      }
+      if (input[index] === "\r") {
+        index += 1;
+        if (input[index] === "\n") index += 1;
+      } else if (input[index] === "\n") {
+        index += 1;
+      }
+      while (index < input.length && (input[index] === " " || input[index] === "\t")) {
+        index += 1;
+      }
+      if (input.slice(index, index + 9).toLowerCase() === "arguments") {
+        index += 9;
+        if (input[index] === ":") index += 1;
+        if (input[index] === " ") index += 1;
+        const end = consumeJsonish(input, index, { allowLeadingNewlines: true });
+        if (end !== null) index = end;
+      }
+      if (
+        (input[index] === "\n" || input[index] === "\r") &&
+        (result.endsWith("\n") || result.endsWith("\r") || result.length === 0)
+      ) {
+        if (input[index] === "\r") index += 1;
+        if (input[index] === "\n") index += 1;
+      }
+      cursor = index;
+    }
+    result += input.slice(cursor);
+    return result;
+  };
+
  // Remove [Tool Call: name (ID: ...)] blocks and their Arguments.
-  // Match until the next [Tool marker or end of string.
-  let cleaned = text.replace(
-    /\[Tool Call:[^\]]*\]\n?(?:Arguments:[\s\S]*?)?(?=\n*\[Tool |\n*$)/gi,
-    "",
-  );
+  let cleaned = stripToolCalls(text);

  // Remove [Tool Result for ID ...] blocks and their content.
  cleaned = cleaned.replace(
@@ -57,7 +172,7 @@ function stripThinkingTagsFromText(text: string): string {
  // Quick check to avoid regex overhead when no tags present.
  if (!/(?:think(?:ing)?|thought|antthinking)/i.test(text)) return text;

-  const tagRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
+  const tagRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi;
  let result = "";
  let lastIndex = 0;
  let inThinking = false;