fix(pi): harden image read mime
This commit is contained in:
44
src/agents/pi-embedded-utils.ts
Normal file
44
src/agents/pi-embedded-utils.ts
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
import type { AssistantMessage } from "@mariozechner/pi-ai";
|
||||||
|
|
||||||
|
export function extractAssistantText(msg: AssistantMessage): string {
|
||||||
|
const isTextBlock = (
|
||||||
|
block: unknown,
|
||||||
|
): block is { type: "text"; text: string } => {
|
||||||
|
if (!block || typeof block !== "object") return false;
|
||||||
|
const rec = block as Record<string, unknown>;
|
||||||
|
return rec.type === "text" && typeof rec.text === "string";
|
||||||
|
};
|
||||||
|
|
||||||
|
const blocks = Array.isArray(msg.content)
|
||||||
|
? msg.content
|
||||||
|
.filter(isTextBlock)
|
||||||
|
.map((c) => c.text.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
: [];
|
||||||
|
return blocks.join("\n").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function inferToolMetaFromArgs(
|
||||||
|
toolName: string,
|
||||||
|
args: unknown,
|
||||||
|
): string | undefined {
|
||||||
|
if (!args || typeof args !== "object") return undefined;
|
||||||
|
const record = args as Record<string, unknown>;
|
||||||
|
|
||||||
|
const p = typeof record.path === "string" ? record.path : undefined;
|
||||||
|
const command =
|
||||||
|
typeof record.command === "string" ? record.command : undefined;
|
||||||
|
|
||||||
|
if (toolName === "read" && p) {
|
||||||
|
const offset =
|
||||||
|
typeof record.offset === "number" ? record.offset : undefined;
|
||||||
|
const limit = typeof record.limit === "number" ? record.limit : undefined;
|
||||||
|
if (offset !== undefined && limit !== undefined) {
|
||||||
|
return `${p}:${offset}-${offset + limit}`;
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
if ((toolName === "edit" || toolName === "write") && p) return p;
|
||||||
|
if (toolName === "bash" && command) return command;
|
||||||
|
return p ?? command;
|
||||||
|
}
|
||||||
@@ -19,7 +19,6 @@ import {
|
|||||||
} from "@mariozechner/pi-ai";
|
} from "@mariozechner/pi-ai";
|
||||||
import {
|
import {
|
||||||
AgentSession,
|
AgentSession,
|
||||||
codingTools,
|
|
||||||
messageTransformer,
|
messageTransformer,
|
||||||
SessionManager,
|
SessionManager,
|
||||||
SettingsManager,
|
SettingsManager,
|
||||||
@@ -34,7 +33,12 @@ import { splitMediaFromOutput } from "../media/parse.js";
|
|||||||
import { enqueueCommand } from "../process/command-queue.js";
|
import { enqueueCommand } from "../process/command-queue.js";
|
||||||
import { resolveUserPath } from "../utils.js";
|
import { resolveUserPath } from "../utils.js";
|
||||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
|
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
|
||||||
|
import {
|
||||||
|
extractAssistantText,
|
||||||
|
inferToolMetaFromArgs,
|
||||||
|
} from "./pi-embedded-utils.js";
|
||||||
import { getAnthropicOAuthToken } from "./pi-oauth.js";
|
import { getAnthropicOAuthToken } from "./pi-oauth.js";
|
||||||
|
import { createClawdisCodingTools } from "./pi-tools.js";
|
||||||
import { buildAgentSystemPrompt } from "./system-prompt.js";
|
import { buildAgentSystemPrompt } from "./system-prompt.js";
|
||||||
import { loadWorkspaceBootstrapFiles } from "./workspace.js";
|
import { loadWorkspaceBootstrapFiles } from "./workspace.js";
|
||||||
|
|
||||||
@@ -86,49 +90,6 @@ function resolveModel(
|
|||||||
return model as Model<Api> | undefined;
|
return model as Model<Api> | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractAssistantText(msg: AssistantMessage): string {
|
|
||||||
const isTextBlock = (
|
|
||||||
block: unknown,
|
|
||||||
): block is { type: "text"; text: string } => {
|
|
||||||
if (!block || typeof block !== "object") return false;
|
|
||||||
const rec = block as Record<string, unknown>;
|
|
||||||
return rec.type === "text" && typeof rec.text === "string";
|
|
||||||
};
|
|
||||||
|
|
||||||
const blocks = Array.isArray(msg.content)
|
|
||||||
? msg.content
|
|
||||||
.filter(isTextBlock)
|
|
||||||
.map((c) => c.text.trim())
|
|
||||||
.filter(Boolean)
|
|
||||||
: [];
|
|
||||||
return blocks.join("\n").trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
function inferToolMetaFromArgs(
|
|
||||||
toolName: string,
|
|
||||||
args: unknown,
|
|
||||||
): string | undefined {
|
|
||||||
if (!args || typeof args !== "object") return undefined;
|
|
||||||
const record = args as Record<string, unknown>;
|
|
||||||
|
|
||||||
const p = typeof record.path === "string" ? record.path : undefined;
|
|
||||||
const command =
|
|
||||||
typeof record.command === "string" ? record.command : undefined;
|
|
||||||
|
|
||||||
if (toolName === "read" && p) {
|
|
||||||
const offset =
|
|
||||||
typeof record.offset === "number" ? record.offset : undefined;
|
|
||||||
const limit = typeof record.limit === "number" ? record.limit : undefined;
|
|
||||||
if (offset !== undefined && limit !== undefined) {
|
|
||||||
return `${p}:${offset}-${offset + limit}`;
|
|
||||||
}
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
if ((toolName === "edit" || toolName === "write") && p) return p;
|
|
||||||
if (toolName === "bash" && command) return command;
|
|
||||||
return p ?? command;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function ensureSessionHeader(params: {
|
async function ensureSessionHeader(params: {
|
||||||
sessionFile: string;
|
sessionFile: string;
|
||||||
sessionId: string;
|
sessionId: string;
|
||||||
@@ -239,7 +200,7 @@ export async function runEmbeddedPiAgent(params: {
|
|||||||
systemPrompt,
|
systemPrompt,
|
||||||
model,
|
model,
|
||||||
thinkingLevel,
|
thinkingLevel,
|
||||||
tools: codingTools,
|
tools: createClawdisCodingTools(),
|
||||||
},
|
},
|
||||||
messageTransformer,
|
messageTransformer,
|
||||||
queueMode: settingsManager.getQueueMode(),
|
queueMode: settingsManager.getQueueMode(),
|
||||||
|
|||||||
32
src/agents/pi-tools.test.ts
Normal file
32
src/agents/pi-tools.test.ts
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import fs from "node:fs/promises";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import { createClawdisCodingTools } from "./pi-tools.js";
|
||||||
|
|
||||||
|
const PNG_1x1 =
|
||||||
|
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
|
||||||
|
|
||||||
|
describe("createClawdisCodingTools", () => {
|
||||||
|
it("sniffs mime from bytes when extension lies", async () => {
|
||||||
|
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-pi-"));
|
||||||
|
const filePath = path.join(tmpDir, "image.jpg"); // actually PNG bytes
|
||||||
|
await fs.writeFile(filePath, Buffer.from(PNG_1x1, "base64"));
|
||||||
|
|
||||||
|
const read = createClawdisCodingTools().find((t) => t.name === "read");
|
||||||
|
expect(read).toBeTruthy();
|
||||||
|
if (!read) throw new Error("read tool missing");
|
||||||
|
|
||||||
|
const res = await read.execute("toolCallId", { path: filePath });
|
||||||
|
const image = res.content.find(
|
||||||
|
(b): b is { type: "image"; mimeType: string } =>
|
||||||
|
!!b &&
|
||||||
|
typeof b === "object" &&
|
||||||
|
(b as Record<string, unknown>).type === "image" &&
|
||||||
|
typeof (b as Record<string, unknown>).mimeType === "string",
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(image?.mimeType).toBe("image/png");
|
||||||
|
});
|
||||||
|
});
|
||||||
127
src/agents/pi-tools.ts
Normal file
127
src/agents/pi-tools.ts
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
import type { AgentTool } from "@mariozechner/pi-ai";
|
||||||
|
import { codingTools, readTool } from "@mariozechner/pi-coding-agent";
|
||||||
|
|
||||||
|
import { detectMime } from "../media/mime.js";
|
||||||
|
|
||||||
|
type ImageContentBlock = {
|
||||||
|
type: "image";
|
||||||
|
data: string;
|
||||||
|
mimeType: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type TextContentBlock = {
|
||||||
|
type: "text";
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type ToolResult = {
|
||||||
|
content: Array<
|
||||||
|
ImageContentBlock | TextContentBlock | Record<string, unknown>
|
||||||
|
>;
|
||||||
|
details?: unknown;
|
||||||
|
};
|
||||||
|
|
||||||
|
function sniffMimeFromBase64(base64: string): string | undefined {
|
||||||
|
const trimmed = base64.trim();
|
||||||
|
if (!trimmed) return undefined;
|
||||||
|
|
||||||
|
const take = Math.min(256, trimmed.length);
|
||||||
|
const sliceLen = take - (take % 4);
|
||||||
|
if (sliceLen < 8) return undefined;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const head = Buffer.from(trimmed.slice(0, sliceLen), "base64");
|
||||||
|
return detectMime({ buffer: head });
|
||||||
|
} catch {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function rewriteReadImageHeader(text: string, mimeType: string): string {
|
||||||
|
// pi-coding-agent uses: "Read image file [image/png]"
|
||||||
|
if (text.startsWith("Read image file [") && text.endsWith("]")) {
|
||||||
|
return `Read image file [${mimeType}]`;
|
||||||
|
}
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeReadImageResult(
|
||||||
|
result: ToolResult,
|
||||||
|
filePath: string,
|
||||||
|
): ToolResult {
|
||||||
|
const content = Array.isArray(result.content) ? result.content : [];
|
||||||
|
|
||||||
|
const image = content.find(
|
||||||
|
(b): b is ImageContentBlock =>
|
||||||
|
!!b &&
|
||||||
|
typeof b === "object" &&
|
||||||
|
(b as ImageContentBlock).type === "image" &&
|
||||||
|
typeof (b as ImageContentBlock).data === "string" &&
|
||||||
|
typeof (b as ImageContentBlock).mimeType === "string",
|
||||||
|
);
|
||||||
|
if (!image) return result;
|
||||||
|
|
||||||
|
if (!image.data.trim()) {
|
||||||
|
throw new Error(`read: image payload is empty (${filePath})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const sniffed = sniffMimeFromBase64(image.data);
|
||||||
|
if (!sniffed) return result;
|
||||||
|
|
||||||
|
if (!sniffed.startsWith("image/")) {
|
||||||
|
throw new Error(
|
||||||
|
`read: file looks like ${sniffed} but was treated as ${image.mimeType} (${filePath})`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sniffed === image.mimeType) return result;
|
||||||
|
|
||||||
|
const nextContent = content.map((block) => {
|
||||||
|
if (
|
||||||
|
block &&
|
||||||
|
typeof block === "object" &&
|
||||||
|
(block as ImageContentBlock).type === "image"
|
||||||
|
) {
|
||||||
|
const b = block as ImageContentBlock;
|
||||||
|
return { ...b, mimeType: sniffed };
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
block &&
|
||||||
|
typeof block === "object" &&
|
||||||
|
(block as TextContentBlock).type === "text" &&
|
||||||
|
typeof (block as TextContentBlock).text === "string"
|
||||||
|
) {
|
||||||
|
const b = block as TextContentBlock;
|
||||||
|
return { ...b, text: rewriteReadImageHeader(b.text, sniffed) };
|
||||||
|
}
|
||||||
|
return block;
|
||||||
|
});
|
||||||
|
|
||||||
|
return { ...result, content: nextContent };
|
||||||
|
}
|
||||||
|
|
||||||
|
function createClawdisReadTool(base: AgentTool): AgentTool {
|
||||||
|
return {
|
||||||
|
...base,
|
||||||
|
execute: async (toolCallId, params, signal) => {
|
||||||
|
const result = (await base.execute(
|
||||||
|
toolCallId,
|
||||||
|
params,
|
||||||
|
signal,
|
||||||
|
)) as ToolResult;
|
||||||
|
const record =
|
||||||
|
params && typeof params === "object"
|
||||||
|
? (params as Record<string, unknown>)
|
||||||
|
: undefined;
|
||||||
|
const filePath =
|
||||||
|
typeof record?.path === "string" ? String(record.path) : "<unknown>";
|
||||||
|
return normalizeReadImageResult(result, filePath);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createClawdisCodingTools(): AgentTool[] {
|
||||||
|
return codingTools.map((tool) =>
|
||||||
|
tool.name === readTool.name ? createClawdisReadTool(tool) : tool,
|
||||||
|
);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user