376 lines
12 KiB
TypeScript
376 lines
12 KiB
TypeScript
import type { AssistantMessage } from "@mariozechner/pi-ai";
|
|
import { sanitizeUserFacingText } from "./pi-embedded-helpers.js";
|
|
import { formatToolDetail, resolveToolDisplay } from "./tool-display.js";
|
|
|
|
/**
|
|
* Strip malformed Minimax tool invocations that leak into text content.
|
|
* Minimax sometimes embeds tool calls as XML in text blocks instead of
|
|
* proper structured tool calls. This removes:
|
|
* - <invoke name="...">...</invoke> blocks
|
|
* - </minimax:tool_call> closing tags
|
|
*/
|
|
function stripMinimaxToolCallXml(text: string): string {
|
|
if (!text) return text;
|
|
if (!/minimax:tool_call/i.test(text)) return text;
|
|
|
|
// Remove <invoke ...>...</invoke> blocks (non-greedy to handle multiple).
|
|
let cleaned = text.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
|
|
|
|
// Remove stray minimax tool tags.
|
|
cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, "");
|
|
|
|
return cleaned;
|
|
}
|
|
|
|
/**
|
|
* Strip downgraded tool call text representations that leak into text content.
|
|
* When replaying history to Gemini, tool calls without `thought_signature` are
|
|
* downgraded to text blocks like `[Tool Call: name (ID: ...)]`. These should
|
|
* not be shown to users.
|
|
*/
|
|
function stripDowngradedToolCallText(text: string): string {
|
|
if (!text) return text;
|
|
if (!/\[Tool (?:Call|Result)/i.test(text)) return text;
|
|
|
|
const consumeJsonish = (
|
|
input: string,
|
|
start: number,
|
|
options?: { allowLeadingNewlines?: boolean },
|
|
): number | null => {
|
|
const { allowLeadingNewlines = false } = options ?? {};
|
|
let index = start;
|
|
while (index < input.length) {
|
|
const ch = input[index];
|
|
if (ch === " " || ch === "\t") {
|
|
index += 1;
|
|
continue;
|
|
}
|
|
if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) {
|
|
index += 1;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if (index >= input.length) return null;
|
|
|
|
const startChar = input[index];
|
|
if (startChar === "{" || startChar === "[") {
|
|
let depth = 0;
|
|
let inString = false;
|
|
let escape = false;
|
|
for (let i = index; i < input.length; i += 1) {
|
|
const ch = input[i];
|
|
if (inString) {
|
|
if (escape) {
|
|
escape = false;
|
|
} else if (ch === "\\") {
|
|
escape = true;
|
|
} else if (ch === '"') {
|
|
inString = false;
|
|
}
|
|
continue;
|
|
}
|
|
if (ch === '"') {
|
|
inString = true;
|
|
continue;
|
|
}
|
|
if (ch === "{" || ch === "[") {
|
|
depth += 1;
|
|
continue;
|
|
}
|
|
if (ch === "}" || ch === "]") {
|
|
depth -= 1;
|
|
if (depth === 0) return i + 1;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
if (startChar === '"') {
|
|
let escape = false;
|
|
for (let i = index + 1; i < input.length; i += 1) {
|
|
const ch = input[i];
|
|
if (escape) {
|
|
escape = false;
|
|
continue;
|
|
}
|
|
if (ch === "\\") {
|
|
escape = true;
|
|
continue;
|
|
}
|
|
if (ch === '"') return i + 1;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
let end = index;
|
|
while (end < input.length && input[end] !== "\n" && input[end] !== "\r") {
|
|
end += 1;
|
|
}
|
|
return end;
|
|
};
|
|
|
|
const stripToolCalls = (input: string): string => {
|
|
const markerRe = /\[Tool Call:[^\]]*\]/gi;
|
|
let result = "";
|
|
let cursor = 0;
|
|
for (const match of input.matchAll(markerRe)) {
|
|
const start = match.index ?? 0;
|
|
if (start < cursor) continue;
|
|
result += input.slice(cursor, start);
|
|
let index = start + match[0].length;
|
|
while (index < input.length && (input[index] === " " || input[index] === "\t")) {
|
|
index += 1;
|
|
}
|
|
if (input[index] === "\r") {
|
|
index += 1;
|
|
if (input[index] === "\n") index += 1;
|
|
} else if (input[index] === "\n") {
|
|
index += 1;
|
|
}
|
|
while (index < input.length && (input[index] === " " || input[index] === "\t")) {
|
|
index += 1;
|
|
}
|
|
if (input.slice(index, index + 9).toLowerCase() === "arguments") {
|
|
index += 9;
|
|
if (input[index] === ":") index += 1;
|
|
if (input[index] === " ") index += 1;
|
|
const end = consumeJsonish(input, index, { allowLeadingNewlines: true });
|
|
if (end !== null) index = end;
|
|
}
|
|
if (
|
|
(input[index] === "\n" || input[index] === "\r") &&
|
|
(result.endsWith("\n") || result.endsWith("\r") || result.length === 0)
|
|
) {
|
|
if (input[index] === "\r") index += 1;
|
|
if (input[index] === "\n") index += 1;
|
|
}
|
|
cursor = index;
|
|
}
|
|
result += input.slice(cursor);
|
|
return result;
|
|
};
|
|
|
|
// Remove [Tool Call: name (ID: ...)] blocks and their Arguments.
|
|
let cleaned = stripToolCalls(text);
|
|
|
|
// Remove [Tool Result for ID ...] blocks and their content.
|
|
cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, "");
|
|
|
|
return cleaned.trim();
|
|
}
|
|
|
|
/**
|
|
* Strip thinking tags and their content from text.
|
|
* This is a safety net for cases where the model outputs <think> tags
|
|
* that slip through other filtering mechanisms.
|
|
*/
|
|
function stripThinkingTagsFromText(text: string): string {
|
|
if (!text) return text;
|
|
// Quick check to avoid regex overhead when no tags present.
|
|
if (!/(?:think(?:ing)?|thought|antthinking)/i.test(text)) return text;
|
|
|
|
const tagRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi;
|
|
let result = "";
|
|
let lastIndex = 0;
|
|
let inThinking = false;
|
|
|
|
for (const match of text.matchAll(tagRe)) {
|
|
const idx = match.index ?? 0;
|
|
const isClose = match[1] === "/";
|
|
|
|
if (!inThinking && !isClose) {
|
|
// Opening tag - save text before it.
|
|
result += text.slice(lastIndex, idx);
|
|
inThinking = true;
|
|
} else if (inThinking && isClose) {
|
|
// Closing tag - skip content inside.
|
|
inThinking = false;
|
|
}
|
|
lastIndex = idx + match[0].length;
|
|
}
|
|
|
|
// Append remaining text if we're not inside thinking.
|
|
if (!inThinking) {
|
|
result += text.slice(lastIndex);
|
|
}
|
|
|
|
return result.trim();
|
|
}
|
|
|
|
export function extractAssistantText(msg: AssistantMessage): string {
|
|
const isTextBlock = (block: unknown): block is { type: "text"; text: string } => {
|
|
if (!block || typeof block !== "object") return false;
|
|
const rec = block as Record<string, unknown>;
|
|
return rec.type === "text" && typeof rec.text === "string";
|
|
};
|
|
|
|
const blocks = Array.isArray(msg.content)
|
|
? msg.content
|
|
.filter(isTextBlock)
|
|
.map((c) =>
|
|
stripThinkingTagsFromText(
|
|
stripDowngradedToolCallText(stripMinimaxToolCallXml(c.text)),
|
|
).trim(),
|
|
)
|
|
.filter(Boolean)
|
|
: [];
|
|
const extracted = blocks.join("\n").trim();
|
|
return sanitizeUserFacingText(extracted);
|
|
}
|
|
|
|
export function extractAssistantThinking(msg: AssistantMessage): string {
|
|
if (!Array.isArray(msg.content)) return "";
|
|
const blocks = msg.content
|
|
.map((block) => {
|
|
if (!block || typeof block !== "object") return "";
|
|
const record = block as unknown as Record<string, unknown>;
|
|
if (record.type === "thinking" && typeof record.thinking === "string") {
|
|
return record.thinking.trim();
|
|
}
|
|
return "";
|
|
})
|
|
.filter(Boolean);
|
|
return blocks.join("\n").trim();
|
|
}
|
|
|
|
export function formatReasoningMessage(text: string): string {
|
|
const trimmed = text.trim();
|
|
if (!trimmed) return "";
|
|
// Show reasoning in italics (cursive) for markdown-friendly surfaces (Discord, etc.).
|
|
// Keep the plain "Reasoning:" prefix so existing parsing/detection keeps working.
|
|
return `Reasoning:\n_${trimmed}_`;
|
|
}
|
|
|
|
type ThinkTaggedSplitBlock =
|
|
| { type: "thinking"; thinking: string }
|
|
| { type: "text"; text: string };
|
|
|
|
export function splitThinkingTaggedText(text: string): ThinkTaggedSplitBlock[] | null {
|
|
const trimmedStart = text.trimStart();
|
|
// Avoid false positives: only treat it as structured thinking when it begins
|
|
// with a think tag (common for local/OpenAI-compat providers that emulate
|
|
// reasoning blocks via tags).
|
|
if (!trimmedStart.startsWith("<")) return null;
|
|
const openRe = /<\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
|
|
const closeRe = /<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
|
|
if (!openRe.test(trimmedStart)) return null;
|
|
if (!closeRe.test(text)) return null;
|
|
|
|
const scanRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
|
|
let inThinking = false;
|
|
let cursor = 0;
|
|
let thinkingStart = 0;
|
|
const blocks: ThinkTaggedSplitBlock[] = [];
|
|
|
|
const pushText = (value: string) => {
|
|
if (!value) return;
|
|
blocks.push({ type: "text", text: value });
|
|
};
|
|
const pushThinking = (value: string) => {
|
|
const cleaned = value.trim();
|
|
if (!cleaned) return;
|
|
blocks.push({ type: "thinking", thinking: cleaned });
|
|
};
|
|
|
|
for (const match of text.matchAll(scanRe)) {
|
|
const index = match.index ?? 0;
|
|
const isClose = Boolean(match[1]?.includes("/"));
|
|
|
|
if (!inThinking && !isClose) {
|
|
pushText(text.slice(cursor, index));
|
|
thinkingStart = index + match[0].length;
|
|
inThinking = true;
|
|
continue;
|
|
}
|
|
|
|
if (inThinking && isClose) {
|
|
pushThinking(text.slice(thinkingStart, index));
|
|
cursor = index + match[0].length;
|
|
inThinking = false;
|
|
}
|
|
}
|
|
|
|
if (inThinking) return null;
|
|
pushText(text.slice(cursor));
|
|
|
|
const hasThinking = blocks.some((b) => b.type === "thinking");
|
|
if (!hasThinking) return null;
|
|
return blocks;
|
|
}
|
|
|
|
export function promoteThinkingTagsToBlocks(message: AssistantMessage): void {
|
|
if (!Array.isArray(message.content)) return;
|
|
const hasThinkingBlock = message.content.some((block) => block.type === "thinking");
|
|
if (hasThinkingBlock) return;
|
|
|
|
const next: AssistantMessage["content"] = [];
|
|
let changed = false;
|
|
|
|
for (const block of message.content) {
|
|
if (block.type !== "text") {
|
|
next.push(block);
|
|
continue;
|
|
}
|
|
const split = splitThinkingTaggedText(block.text);
|
|
if (!split) {
|
|
next.push(block);
|
|
continue;
|
|
}
|
|
changed = true;
|
|
for (const part of split) {
|
|
if (part.type === "thinking") {
|
|
next.push({ type: "thinking", thinking: part.thinking });
|
|
} else if (part.type === "text") {
|
|
const cleaned = part.text.trimStart();
|
|
if (cleaned) next.push({ type: "text", text: cleaned });
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!changed) return;
|
|
message.content = next;
|
|
}
|
|
|
|
export function extractThinkingFromTaggedText(text: string): string {
|
|
if (!text) return "";
|
|
const scanRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
|
|
let result = "";
|
|
let lastIndex = 0;
|
|
let inThinking = false;
|
|
for (const match of text.matchAll(scanRe)) {
|
|
const idx = match.index ?? 0;
|
|
if (inThinking) {
|
|
result += text.slice(lastIndex, idx);
|
|
}
|
|
const isClose = match[1] === "/";
|
|
inThinking = !isClose;
|
|
lastIndex = idx + match[0].length;
|
|
}
|
|
return result.trim();
|
|
}
|
|
|
|
export function extractThinkingFromTaggedStream(text: string): string {
|
|
if (!text) return "";
|
|
const closed = extractThinkingFromTaggedText(text);
|
|
if (closed) return closed;
|
|
|
|
const openRe = /<\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
|
|
const closeRe = /<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
|
|
const openMatches = [...text.matchAll(openRe)];
|
|
if (openMatches.length === 0) return "";
|
|
const closeMatches = [...text.matchAll(closeRe)];
|
|
const lastOpen = openMatches[openMatches.length - 1];
|
|
const lastClose = closeMatches[closeMatches.length - 1];
|
|
if (lastClose && (lastClose.index ?? -1) > (lastOpen.index ?? -1)) {
|
|
return closed;
|
|
}
|
|
const start = (lastOpen.index ?? 0) + lastOpen[0].length;
|
|
return text.slice(start).trim();
|
|
}
|
|
|
|
export function inferToolMetaFromArgs(toolName: string, args: unknown): string | undefined {
|
|
const display = resolveToolDisplay({ name: toolName, args });
|
|
return formatToolDetail(display);
|
|
}
|