Files
clawdbot/src/agents/pi-embedded-utils.ts
2026-01-16 03:01:23 +00:00

376 lines
12 KiB
TypeScript

import type { AssistantMessage } from "@mariozechner/pi-ai";
import { sanitizeUserFacingText } from "./pi-embedded-helpers.js";
import { formatToolDetail, resolveToolDisplay } from "./tool-display.js";
/**
* Strip malformed Minimax tool invocations that leak into text content.
* Minimax sometimes embeds tool calls as XML in text blocks instead of
* proper structured tool calls. This removes:
* - <invoke name="...">...</invoke> blocks
* - </minimax:tool_call> closing tags
*/
function stripMinimaxToolCallXml(text: string): string {
if (!text) return text;
if (!/minimax:tool_call/i.test(text)) return text;
// Remove <invoke ...>...</invoke> blocks (non-greedy to handle multiple).
let cleaned = text.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
// Remove stray minimax tool tags.
cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, "");
return cleaned;
}
/**
* Strip downgraded tool call text representations that leak into text content.
* When replaying history to Gemini, tool calls without `thought_signature` are
* downgraded to text blocks like `[Tool Call: name (ID: ...)]`. These should
* not be shown to users.
*/
function stripDowngradedToolCallText(text: string): string {
if (!text) return text;
if (!/\[Tool (?:Call|Result)/i.test(text)) return text;
const consumeJsonish = (
input: string,
start: number,
options?: { allowLeadingNewlines?: boolean },
): number | null => {
const { allowLeadingNewlines = false } = options ?? {};
let index = start;
while (index < input.length) {
const ch = input[index];
if (ch === " " || ch === "\t") {
index += 1;
continue;
}
if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) {
index += 1;
continue;
}
break;
}
if (index >= input.length) return null;
const startChar = input[index];
if (startChar === "{" || startChar === "[") {
let depth = 0;
let inString = false;
let escape = false;
for (let i = index; i < input.length; i += 1) {
const ch = input[i];
if (inString) {
if (escape) {
escape = false;
} else if (ch === "\\") {
escape = true;
} else if (ch === '"') {
inString = false;
}
continue;
}
if (ch === '"') {
inString = true;
continue;
}
if (ch === "{" || ch === "[") {
depth += 1;
continue;
}
if (ch === "}" || ch === "]") {
depth -= 1;
if (depth === 0) return i + 1;
}
}
return null;
}
if (startChar === '"') {
let escape = false;
for (let i = index + 1; i < input.length; i += 1) {
const ch = input[i];
if (escape) {
escape = false;
continue;
}
if (ch === "\\") {
escape = true;
continue;
}
if (ch === '"') return i + 1;
}
return null;
}
let end = index;
while (end < input.length && input[end] !== "\n" && input[end] !== "\r") {
end += 1;
}
return end;
};
const stripToolCalls = (input: string): string => {
const markerRe = /\[Tool Call:[^\]]*\]/gi;
let result = "";
let cursor = 0;
for (const match of input.matchAll(markerRe)) {
const start = match.index ?? 0;
if (start < cursor) continue;
result += input.slice(cursor, start);
let index = start + match[0].length;
while (index < input.length && (input[index] === " " || input[index] === "\t")) {
index += 1;
}
if (input[index] === "\r") {
index += 1;
if (input[index] === "\n") index += 1;
} else if (input[index] === "\n") {
index += 1;
}
while (index < input.length && (input[index] === " " || input[index] === "\t")) {
index += 1;
}
if (input.slice(index, index + 9).toLowerCase() === "arguments") {
index += 9;
if (input[index] === ":") index += 1;
if (input[index] === " ") index += 1;
const end = consumeJsonish(input, index, { allowLeadingNewlines: true });
if (end !== null) index = end;
}
if (
(input[index] === "\n" || input[index] === "\r") &&
(result.endsWith("\n") || result.endsWith("\r") || result.length === 0)
) {
if (input[index] === "\r") index += 1;
if (input[index] === "\n") index += 1;
}
cursor = index;
}
result += input.slice(cursor);
return result;
};
// Remove [Tool Call: name (ID: ...)] blocks and their Arguments.
let cleaned = stripToolCalls(text);
// Remove [Tool Result for ID ...] blocks and their content.
cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, "");
return cleaned.trim();
}
/**
* Strip thinking tags and their content from text.
* This is a safety net for cases where the model outputs <think> tags
* that slip through other filtering mechanisms.
*/
function stripThinkingTagsFromText(text: string): string {
if (!text) return text;
// Quick check to avoid regex overhead when no tags present.
if (!/(?:think(?:ing)?|thought|antthinking)/i.test(text)) return text;
const tagRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi;
let result = "";
let lastIndex = 0;
let inThinking = false;
for (const match of text.matchAll(tagRe)) {
const idx = match.index ?? 0;
const isClose = match[1] === "/";
if (!inThinking && !isClose) {
// Opening tag - save text before it.
result += text.slice(lastIndex, idx);
inThinking = true;
} else if (inThinking && isClose) {
// Closing tag - skip content inside.
inThinking = false;
}
lastIndex = idx + match[0].length;
}
// Append remaining text if we're not inside thinking.
if (!inThinking) {
result += text.slice(lastIndex);
}
return result.trim();
}
export function extractAssistantText(msg: AssistantMessage): string {
const isTextBlock = (block: unknown): block is { type: "text"; text: string } => {
if (!block || typeof block !== "object") return false;
const rec = block as Record<string, unknown>;
return rec.type === "text" && typeof rec.text === "string";
};
const blocks = Array.isArray(msg.content)
? msg.content
.filter(isTextBlock)
.map((c) =>
stripThinkingTagsFromText(
stripDowngradedToolCallText(stripMinimaxToolCallXml(c.text)),
).trim(),
)
.filter(Boolean)
: [];
const extracted = blocks.join("\n").trim();
return sanitizeUserFacingText(extracted);
}
export function extractAssistantThinking(msg: AssistantMessage): string {
if (!Array.isArray(msg.content)) return "";
const blocks = msg.content
.map((block) => {
if (!block || typeof block !== "object") return "";
const record = block as unknown as Record<string, unknown>;
if (record.type === "thinking" && typeof record.thinking === "string") {
return record.thinking.trim();
}
return "";
})
.filter(Boolean);
return blocks.join("\n").trim();
}
export function formatReasoningMessage(text: string): string {
const trimmed = text.trim();
if (!trimmed) return "";
// Show reasoning in italics (cursive) for markdown-friendly surfaces (Discord, etc.).
// Keep the plain "Reasoning:" prefix so existing parsing/detection keeps working.
return `Reasoning:\n_${trimmed}_`;
}
type ThinkTaggedSplitBlock =
| { type: "thinking"; thinking: string }
| { type: "text"; text: string };
export function splitThinkingTaggedText(text: string): ThinkTaggedSplitBlock[] | null {
const trimmedStart = text.trimStart();
// Avoid false positives: only treat it as structured thinking when it begins
// with a think tag (common for local/OpenAI-compat providers that emulate
// reasoning blocks via tags).
if (!trimmedStart.startsWith("<")) return null;
const openRe = /<\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
const closeRe = /<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
if (!openRe.test(trimmedStart)) return null;
if (!closeRe.test(text)) return null;
const scanRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
let inThinking = false;
let cursor = 0;
let thinkingStart = 0;
const blocks: ThinkTaggedSplitBlock[] = [];
const pushText = (value: string) => {
if (!value) return;
blocks.push({ type: "text", text: value });
};
const pushThinking = (value: string) => {
const cleaned = value.trim();
if (!cleaned) return;
blocks.push({ type: "thinking", thinking: cleaned });
};
for (const match of text.matchAll(scanRe)) {
const index = match.index ?? 0;
const isClose = Boolean(match[1]?.includes("/"));
if (!inThinking && !isClose) {
pushText(text.slice(cursor, index));
thinkingStart = index + match[0].length;
inThinking = true;
continue;
}
if (inThinking && isClose) {
pushThinking(text.slice(thinkingStart, index));
cursor = index + match[0].length;
inThinking = false;
}
}
if (inThinking) return null;
pushText(text.slice(cursor));
const hasThinking = blocks.some((b) => b.type === "thinking");
if (!hasThinking) return null;
return blocks;
}
export function promoteThinkingTagsToBlocks(message: AssistantMessage): void {
if (!Array.isArray(message.content)) return;
const hasThinkingBlock = message.content.some((block) => block.type === "thinking");
if (hasThinkingBlock) return;
const next: AssistantMessage["content"] = [];
let changed = false;
for (const block of message.content) {
if (block.type !== "text") {
next.push(block);
continue;
}
const split = splitThinkingTaggedText(block.text);
if (!split) {
next.push(block);
continue;
}
changed = true;
for (const part of split) {
if (part.type === "thinking") {
next.push({ type: "thinking", thinking: part.thinking });
} else if (part.type === "text") {
const cleaned = part.text.trimStart();
if (cleaned) next.push({ type: "text", text: cleaned });
}
}
}
if (!changed) return;
message.content = next;
}
export function extractThinkingFromTaggedText(text: string): string {
if (!text) return "";
const scanRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
let result = "";
let lastIndex = 0;
let inThinking = false;
for (const match of text.matchAll(scanRe)) {
const idx = match.index ?? 0;
if (inThinking) {
result += text.slice(lastIndex, idx);
}
const isClose = match[1] === "/";
inThinking = !isClose;
lastIndex = idx + match[0].length;
}
return result.trim();
}
export function extractThinkingFromTaggedStream(text: string): string {
if (!text) return "";
const closed = extractThinkingFromTaggedText(text);
if (closed) return closed;
const openRe = /<\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
const closeRe = /<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
const openMatches = [...text.matchAll(openRe)];
if (openMatches.length === 0) return "";
const closeMatches = [...text.matchAll(closeRe)];
const lastOpen = openMatches[openMatches.length - 1];
const lastClose = closeMatches[closeMatches.length - 1];
if (lastClose && (lastClose.index ?? -1) > (lastOpen.index ?? -1)) {
return closed;
}
const start = (lastOpen.index ?? 0) + lastOpen[0].length;
return text.slice(start).trim();
}
export function inferToolMetaFromArgs(toolName: string, args: unknown): string | undefined {
const display = resolveToolDisplay({ name: toolName, args });
return formatToolDetail(display);
}