114 lines
3.7 KiB
TypeScript
114 lines
3.7 KiB
TypeScript
import type { AgentTool, AgentToolResult } from "@mariozechner/pi-ai";
|
|
import { codingTools, readTool } from "@mariozechner/pi-coding-agent";
|
|
|
|
import { detectMime } from "../media/mime.js";
|
|
|
|
// TODO(steipete): Remove this wrapper once pi-mono ships file-magic MIME detection
|
|
// for `read` image payloads in `@mariozechner/pi-coding-agent` (then switch back to `codingTools` directly).
|
|
type ToolContentBlock = AgentToolResult<unknown>["content"][number];
|
|
type ImageContentBlock = Extract<ToolContentBlock, { type: "image" }>;
|
|
type TextContentBlock = Extract<ToolContentBlock, { type: "text" }>;
|
|
|
|
function sniffMimeFromBase64(base64: string): string | undefined {
|
|
const trimmed = base64.trim();
|
|
if (!trimmed) return undefined;
|
|
|
|
const take = Math.min(256, trimmed.length);
|
|
const sliceLen = take - (take % 4);
|
|
if (sliceLen < 8) return undefined;
|
|
|
|
try {
|
|
const head = Buffer.from(trimmed.slice(0, sliceLen), "base64");
|
|
return detectMime({ buffer: head });
|
|
} catch {
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
function rewriteReadImageHeader(text: string, mimeType: string): string {
|
|
// pi-coding-agent uses: "Read image file [image/png]"
|
|
if (text.startsWith("Read image file [") && text.endsWith("]")) {
|
|
return `Read image file [${mimeType}]`;
|
|
}
|
|
return text;
|
|
}
|
|
|
|
function normalizeReadImageResult(
|
|
result: AgentToolResult<unknown>,
|
|
filePath: string,
|
|
): AgentToolResult<unknown> {
|
|
const content = Array.isArray(result.content) ? result.content : [];
|
|
|
|
const image = content.find(
|
|
(b): b is ImageContentBlock =>
|
|
!!b &&
|
|
typeof b === "object" &&
|
|
(b as { type?: unknown }).type === "image" &&
|
|
typeof (b as { data?: unknown }).data === "string" &&
|
|
typeof (b as { mimeType?: unknown }).mimeType === "string",
|
|
);
|
|
if (!image) return result;
|
|
|
|
if (!image.data.trim()) {
|
|
throw new Error(`read: image payload is empty (${filePath})`);
|
|
}
|
|
|
|
const sniffed = sniffMimeFromBase64(image.data);
|
|
if (!sniffed) return result;
|
|
|
|
if (!sniffed.startsWith("image/")) {
|
|
throw new Error(
|
|
`read: file looks like ${sniffed} but was treated as ${image.mimeType} (${filePath})`,
|
|
);
|
|
}
|
|
|
|
if (sniffed === image.mimeType) return result;
|
|
|
|
const nextContent = content.map((block) => {
|
|
if (
|
|
block &&
|
|
typeof block === "object" &&
|
|
(block as { type?: unknown }).type === "image"
|
|
) {
|
|
const b = block as ImageContentBlock & { mimeType: string };
|
|
return { ...b, mimeType: sniffed } satisfies ImageContentBlock;
|
|
}
|
|
if (
|
|
block &&
|
|
typeof block === "object" &&
|
|
(block as { type?: unknown }).type === "text" &&
|
|
typeof (block as { text?: unknown }).text === "string"
|
|
) {
|
|
const b = block as TextContentBlock & { text: string };
|
|
return { ...b, text: rewriteReadImageHeader(b.text, sniffed) } satisfies TextContentBlock;
|
|
}
|
|
return block;
|
|
});
|
|
|
|
return { ...result, content: nextContent };
|
|
}
|
|
|
|
type AnyAgentTool = AgentTool<any, any>;
|
|
|
|
function createClawdisReadTool(base: AnyAgentTool): AnyAgentTool {
|
|
return {
|
|
...base,
|
|
execute: async (toolCallId, params, signal) => {
|
|
const result = (await base.execute(toolCallId, params as any, signal)) as AgentToolResult<unknown>;
|
|
const record =
|
|
params && typeof params === "object"
|
|
? (params as Record<string, unknown>)
|
|
: undefined;
|
|
const filePath =
|
|
typeof record?.path === "string" ? String(record.path) : "<unknown>";
|
|
return normalizeReadImageResult(result, filePath);
|
|
},
|
|
};
|
|
}
|
|
|
|
export function createClawdisCodingTools(): AnyAgentTool[] {
|
|
return (codingTools as unknown as AnyAgentTool[]).map((tool) =>
|
|
tool.name === readTool.name ? createClawdisReadTool(tool) : (tool as AnyAgentTool),
|
|
);
|
|
}
|