diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c0e4f459..5c0501029 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,8 +23,9 @@ - Sandbox: support tool-policy groups in `tools.sandbox.tools` (e.g. `group:memory`, `group:fs`) to reduce config churn. ### Fixes +- Models/MiniMax: strip malformed tool invocation XML (`...` and ``) from assistant text to prevent tool call leaks into user messages. (#809 — thanks @latitudeki5223) - Tools/Models: MiniMax vision now uses the Coding Plan VLM endpoint (`/v1/coding_plan/vlm`) so the `image` tool works with MiniMax keys (also accepts `@/path/to/file.png`-style inputs). -- Gateway/macOS: reduce noisy loopback WS “closed before connect” logs during tests. +- Gateway/macOS: reduce noisy loopback WS "closed before connect" logs during tests. - Auto-reply: resolve ambiguous `/model` fuzzy matches by picking the best candidate instead of erroring. ## 2026.1.12-1 diff --git a/src/agents/pi-embedded-utils.test.ts b/src/agents/pi-embedded-utils.test.ts new file mode 100644 index 000000000..2b32c1466 --- /dev/null +++ b/src/agents/pi-embedded-utils.test.ts @@ -0,0 +1,196 @@ +import type { AssistantMessage } from "@mariozechner/pi-ai"; +import { describe, expect, it } from "vitest"; +import { extractAssistantText } from "./pi-embedded-utils.js"; + +describe("extractAssistantText", () => { + it("strips Minimax tool invocation XML from text", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: ` +netstat -tlnp | grep 18789 + +`, + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe(""); + }); + + it("strips multiple tool invocations", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: `Let me check that. +/home/admin/test.txt + +`, + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe("Let me check that."); + }); + + it("keeps invoke snippets without Minimax markers", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: `Example:\n\nls\n`, + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe( + `Example:\n\nls\n`, + ); + }); + + it("preserves normal text without tool invocations", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: "This is a normal response without any tool calls.", + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe("This is a normal response without any tool calls."); + }); + + it("strips Minimax tool invocations with extra attributes", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: `Before\nls\n\nAfter`, + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe("Before\nAfter"); + }); + + it("strips tool XML mixed with regular content", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: `I'll help you with that. +ls -la + +Here are the results.`, + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe("I'll help you with that.\nHere are the results."); + }); + + it("handles multiple invoke blocks in one message", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: `First check. +file1.txt + +Second check. +pwd + +Done.`, + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe("First check.\nSecond check.\nDone."); + }); + + it("handles stray closing tags without opening tags", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: "Some text here.More text.", + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe("Some text here.More text."); + }); + + it("returns empty string when message is only tool invocations", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: ` +test + +`, + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe(""); + }); + + it("handles multiple text blocks", () => { + const msg: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: "First block.", + }, + { + type: "text", + text: ` +ls + +`, + }, + { + type: "text", + text: "Third block.", + }, + ], + timestamp: Date.now(), + }; + + const result = extractAssistantText(msg); + expect(result).toBe("First block.\nThird block."); + }); +}); diff --git a/src/agents/pi-embedded-utils.ts b/src/agents/pi-embedded-utils.ts index 40c3ec1ce..19e7afd62 100644 --- a/src/agents/pi-embedded-utils.ts +++ b/src/agents/pi-embedded-utils.ts @@ -1,6 +1,26 @@ import type { AssistantMessage } from "@mariozechner/pi-ai"; import { formatToolDetail, resolveToolDisplay } from "./tool-display.js"; +/** + * Strip malformed Minimax tool invocations that leak into text content. + * Minimax sometimes embeds tool calls as XML in text blocks instead of + * proper structured tool calls. This removes: + * - ... blocks + * - closing tags + */ +function stripMinimaxToolCallXml(text: string): string { + if (!text) return text; + if (!/minimax:tool_call/i.test(text)) return text; + + // Remove ... blocks (non-greedy to handle multiple). + let cleaned = text.replace(/]*>[\s\S]*?<\/invoke>/gi, ""); + + // Remove stray minimax tool tags. + cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, ""); + + return cleaned; +} + export function extractAssistantText(msg: AssistantMessage): string { const isTextBlock = ( block: unknown, @@ -13,7 +33,7 @@ export function extractAssistantText(msg: AssistantMessage): string { const blocks = Array.isArray(msg.content) ? msg.content .filter(isTextBlock) - .map((c) => c.text.trim()) + .map((c) => stripMinimaxToolCallXml(c.text).trim()) .filter(Boolean) : []; return blocks.join("\n").trim();