import type { AssistantMessage } from "@mariozechner/pi-ai"; import { sanitizeUserFacingText } from "./pi-embedded-helpers.js"; import { formatToolDetail, resolveToolDisplay } from "./tool-display.js"; /** * Strip malformed Minimax tool invocations that leak into text content. * Minimax sometimes embeds tool calls as XML in text blocks instead of * proper structured tool calls. This removes: * - ... blocks * - closing tags */ function stripMinimaxToolCallXml(text: string): string { if (!text) return text; if (!/minimax:tool_call/i.test(text)) return text; // Remove ... blocks (non-greedy to handle multiple). let cleaned = text.replace(/]*>[\s\S]*?<\/invoke>/gi, ""); // Remove stray minimax tool tags. cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, ""); return cleaned; } /** * Strip downgraded tool call text representations that leak into text content. * When replaying history to Gemini, tool calls without `thought_signature` are * downgraded to text blocks like `[Tool Call: name (ID: ...)]`. These should * not be shown to users. */ function stripDowngradedToolCallText(text: string): string { if (!text) return text; if (!/\[Tool (?:Call|Result)/i.test(text)) return text; const consumeJsonish = ( input: string, start: number, options?: { allowLeadingNewlines?: boolean }, ): number | null => { const { allowLeadingNewlines = false } = options ?? {}; let index = start; while (index < input.length) { const ch = input[index]; if (ch === " " || ch === "\t") { index += 1; continue; } if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) { index += 1; continue; } break; } if (index >= input.length) return null; const startChar = input[index]; if (startChar === "{" || startChar === "[") { let depth = 0; let inString = false; let escape = false; for (let i = index; i < input.length; i += 1) { const ch = input[i]; if (inString) { if (escape) { escape = false; } else if (ch === "\\") { escape = true; } else if (ch === '"') { inString = false; } continue; } if (ch === '"') { inString = true; continue; } if (ch === "{" || ch === "[") { depth += 1; continue; } if (ch === "}" || ch === "]") { depth -= 1; if (depth === 0) return i + 1; } } return null; } if (startChar === '"') { let escape = false; for (let i = index + 1; i < input.length; i += 1) { const ch = input[i]; if (escape) { escape = false; continue; } if (ch === "\\") { escape = true; continue; } if (ch === '"') return i + 1; } return null; } let end = index; while (end < input.length && input[end] !== "\n" && input[end] !== "\r") { end += 1; } return end; }; const stripToolCalls = (input: string): string => { const markerRe = /\[Tool Call:[^\]]*\]/gi; let result = ""; let cursor = 0; for (const match of input.matchAll(markerRe)) { const start = match.index ?? 0; if (start < cursor) continue; result += input.slice(cursor, start); let index = start + match[0].length; while (index < input.length && (input[index] === " " || input[index] === "\t")) { index += 1; } if (input[index] === "\r") { index += 1; if (input[index] === "\n") index += 1; } else if (input[index] === "\n") { index += 1; } while (index < input.length && (input[index] === " " || input[index] === "\t")) { index += 1; } if (input.slice(index, index + 9).toLowerCase() === "arguments") { index += 9; if (input[index] === ":") index += 1; if (input[index] === " ") index += 1; const end = consumeJsonish(input, index, { allowLeadingNewlines: true }); if (end !== null) index = end; } if ( (input[index] === "\n" || input[index] === "\r") && (result.endsWith("\n") || result.endsWith("\r") || result.length === 0) ) { if (input[index] === "\r") index += 1; if (input[index] === "\n") index += 1; } cursor = index; } result += input.slice(cursor); return result; }; // Remove [Tool Call: name (ID: ...)] blocks and their Arguments. let cleaned = stripToolCalls(text); // Remove [Tool Result for ID ...] blocks and their content. cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, ""); return cleaned.trim(); } /** * Strip thinking tags and their content from text. * This is a safety net for cases where the model outputs tags * that slip through other filtering mechanisms. */ function stripThinkingTagsFromText(text: string): string { if (!text) return text; // Quick check to avoid regex overhead when no tags present. if (!/(?:think(?:ing)?|thought|antthinking)/i.test(text)) return text; const tagRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi; let result = ""; let lastIndex = 0; let inThinking = false; for (const match of text.matchAll(tagRe)) { const idx = match.index ?? 0; const isClose = match[1] === "/"; if (!inThinking && !isClose) { // Opening tag - save text before it. result += text.slice(lastIndex, idx); inThinking = true; } else if (inThinking && isClose) { // Closing tag - skip content inside. inThinking = false; } lastIndex = idx + match[0].length; } // Append remaining text if we're not inside thinking. if (!inThinking) { result += text.slice(lastIndex); } return result.trim(); } export function extractAssistantText(msg: AssistantMessage): string { const isTextBlock = (block: unknown): block is { type: "text"; text: string } => { if (!block || typeof block !== "object") return false; const rec = block as Record; return rec.type === "text" && typeof rec.text === "string"; }; const blocks = Array.isArray(msg.content) ? msg.content .filter(isTextBlock) .map((c) => stripThinkingTagsFromText( stripDowngradedToolCallText(stripMinimaxToolCallXml(c.text)), ).trim(), ) .filter(Boolean) : []; const extracted = blocks.join("\n").trim(); return sanitizeUserFacingText(extracted); } export function extractAssistantThinking(msg: AssistantMessage): string { if (!Array.isArray(msg.content)) return ""; const blocks = msg.content .map((block) => { if (!block || typeof block !== "object") return ""; const record = block as unknown as Record; if (record.type === "thinking" && typeof record.thinking === "string") { return record.thinking.trim(); } return ""; }) .filter(Boolean); return blocks.join("\n").trim(); } export function formatReasoningMessage(text: string): string { const trimmed = text.trim(); if (!trimmed) return ""; // Show reasoning in italics (cursive) for markdown-friendly surfaces (Discord, etc.). // Keep the plain "Reasoning:" prefix so existing parsing/detection keeps working. return `Reasoning:\n_${trimmed}_`; } type ThinkTaggedSplitBlock = | { type: "thinking"; thinking: string } | { type: "text"; text: string }; export function splitThinkingTaggedText(text: string): ThinkTaggedSplitBlock[] | null { const trimmedStart = text.trimStart(); // Avoid false positives: only treat it as structured thinking when it begins // with a think tag (common for local/OpenAI-compat providers that emulate // reasoning blocks via tags). if (!trimmedStart.startsWith("<")) return null; const openRe = /<\s*(?:think(?:ing)?|thought|antthinking)\s*>/i; const closeRe = /<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/i; if (!openRe.test(trimmedStart)) return null; if (!closeRe.test(text)) return null; const scanRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi; let inThinking = false; let cursor = 0; let thinkingStart = 0; const blocks: ThinkTaggedSplitBlock[] = []; const pushText = (value: string) => { if (!value) return; blocks.push({ type: "text", text: value }); }; const pushThinking = (value: string) => { const cleaned = value.trim(); if (!cleaned) return; blocks.push({ type: "thinking", thinking: cleaned }); }; for (const match of text.matchAll(scanRe)) { const index = match.index ?? 0; const isClose = Boolean(match[1]?.includes("/")); if (!inThinking && !isClose) { pushText(text.slice(cursor, index)); thinkingStart = index + match[0].length; inThinking = true; continue; } if (inThinking && isClose) { pushThinking(text.slice(thinkingStart, index)); cursor = index + match[0].length; inThinking = false; } } if (inThinking) return null; pushText(text.slice(cursor)); const hasThinking = blocks.some((b) => b.type === "thinking"); if (!hasThinking) return null; return blocks; } export function promoteThinkingTagsToBlocks(message: AssistantMessage): void { if (!Array.isArray(message.content)) return; const hasThinkingBlock = message.content.some((block) => block.type === "thinking"); if (hasThinkingBlock) return; const next: AssistantMessage["content"] = []; let changed = false; for (const block of message.content) { if (block.type !== "text") { next.push(block); continue; } const split = splitThinkingTaggedText(block.text); if (!split) { next.push(block); continue; } changed = true; for (const part of split) { if (part.type === "thinking") { next.push({ type: "thinking", thinking: part.thinking }); } else if (part.type === "text") { const cleaned = part.text.trimStart(); if (cleaned) next.push({ type: "text", text: cleaned }); } } } if (!changed) return; message.content = next; } export function extractThinkingFromTaggedText(text: string): string { if (!text) return ""; const scanRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi; let result = ""; let lastIndex = 0; let inThinking = false; for (const match of text.matchAll(scanRe)) { const idx = match.index ?? 0; if (inThinking) { result += text.slice(lastIndex, idx); } const isClose = match[1] === "/"; inThinking = !isClose; lastIndex = idx + match[0].length; } return result.trim(); } export function extractThinkingFromTaggedStream(text: string): string { if (!text) return ""; const closed = extractThinkingFromTaggedText(text); if (closed) return closed; const openRe = /<\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi; const closeRe = /<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi; const openMatches = [...text.matchAll(openRe)]; if (openMatches.length === 0) return ""; const closeMatches = [...text.matchAll(closeRe)]; const lastOpen = openMatches[openMatches.length - 1]; const lastClose = closeMatches[closeMatches.length - 1]; if (lastClose && (lastClose.index ?? -1) > (lastOpen.index ?? -1)) { return closed; } const start = (lastOpen.index ?? 0) + lastOpen[0].length; return text.slice(start).trim(); } export function inferToolMetaFromArgs(toolName: string, args: unknown): string | undefined { const display = resolveToolDisplay({ name: toolName, args }); return formatToolDetail(display); }