fix: resolve camera tool handling

This commit is contained in:
Peter Steinberger
2026-01-02 17:44:25 +00:00
parent 8e48cffe3b
commit 43f6b9ef32
7 changed files with 105 additions and 37 deletions

View File

@@ -1,6 +1,8 @@
import { beforeEach, describe, expect, it, vi } from "vitest"; import { beforeEach, describe, expect, it, vi } from "vitest";
const callGateway = vi.fn(); const { callGateway } = vi.hoisted(() => ({
callGateway: vi.fn(),
}));
vi.mock("../gateway/call.js", () => ({ callGateway })); vi.mock("../gateway/call.js", () => ({ callGateway }));
vi.mock("../media/image-ops.js", () => ({ vi.mock("../media/image-ops.js", () => ({

View File

@@ -865,20 +865,6 @@ function createCanvasTool(): AnyAgentTool {
Number.isFinite(params.quality) Number.isFinite(params.quality)
? params.quality ? params.quality
: undefined; : undefined;
const delayMs =
typeof params.delayMs === "number" &&
Number.isFinite(params.delayMs)
? params.delayMs
: undefined;
const deviceId =
typeof params.deviceId === "string" && params.deviceId.trim()
? params.deviceId.trim()
: undefined;
const delayMs =
typeof params.delayMs === "number" &&
Number.isFinite(params.delayMs)
? params.delayMs
: undefined;
const raw = (await invoke("canvas.snapshot", { const raw = (await invoke("canvas.snapshot", {
format, format,
maxWidth, maxWidth,
@@ -889,8 +875,7 @@ function createCanvasTool(): AnyAgentTool {
ext: payload.format === "jpeg" ? "jpg" : payload.format, ext: payload.format === "jpeg" ? "jpg" : payload.format,
}); });
await writeBase64ToFile(filePath, payload.base64); await writeBase64ToFile(filePath, payload.base64);
const mimeType = const mimeType = imageMimeFromFormat(payload.format) ?? "image/png";
imageMimeFromFormat(payload.format) ?? "image/png";
return await imageResult({ return await imageResult({
label: "canvas:snapshot", label: "canvas:snapshot",
path: filePath, path: filePath,
@@ -1139,6 +1124,15 @@ function createNodesTool(): AnyAgentTool {
Number.isFinite(params.quality) Number.isFinite(params.quality)
? params.quality ? params.quality
: undefined; : undefined;
const delayMs =
typeof params.delayMs === "number" &&
Number.isFinite(params.delayMs)
? params.delayMs
: undefined;
const deviceId =
typeof params.deviceId === "string" && params.deviceId.trim()
? params.deviceId.trim()
: undefined;
const content: AgentToolResult<unknown>["content"] = []; const content: AgentToolResult<unknown>["content"] = [];
const details: Array<Record<string, unknown>> = []; const details: Array<Record<string, unknown>> = [];
@@ -1158,10 +1152,23 @@ function createNodesTool(): AnyAgentTool {
idempotencyKey: crypto.randomUUID(), idempotencyKey: crypto.randomUUID(),
})) as { payload?: unknown }; })) as { payload?: unknown };
const payload = parseCameraSnapPayload(raw?.payload); const payload = parseCameraSnapPayload(raw?.payload);
const normalizedFormat = payload.format.toLowerCase();
if (
normalizedFormat !== "jpg" &&
normalizedFormat !== "jpeg" &&
normalizedFormat !== "png"
) {
throw new Error(
`unsupported camera.snap format: ${payload.format}`,
);
}
const isJpeg =
normalizedFormat === "jpg" || normalizedFormat === "jpeg";
const filePath = cameraTempPath({ const filePath = cameraTempPath({
kind: "snap", kind: "snap",
facing, facing,
ext: payload.format === "jpeg" ? "jpg" : payload.format, ext: isJpeg ? "jpg" : "png",
}); });
await writeBase64ToFile(filePath, payload.base64); await writeBase64ToFile(filePath, payload.base64);
content.push({ type: "text", text: `MEDIA:${filePath}` }); content.push({ type: "text", text: `MEDIA:${filePath}` });
@@ -1169,7 +1176,8 @@ function createNodesTool(): AnyAgentTool {
type: "image", type: "image",
data: payload.base64, data: payload.base64,
mimeType: mimeType:
imageMimeFromFormat(payload.format) ?? "image/png", imageMimeFromFormat(payload.format) ??
(isJpeg ? "image/jpeg" : "image/png"),
}); });
details.push({ details.push({
facing, facing,

View File

@@ -32,4 +32,32 @@ describe("tool image sanitizing", () => {
expect(size).toBeLessThanOrEqual(5 * 1024 * 1024); expect(size).toBeLessThanOrEqual(5 * 1024 * 1024);
expect(image.mimeType).toBe("image/jpeg"); expect(image.mimeType).toBe("image/jpeg");
}, 20_000); }, 20_000);
it("corrects mismatched jpeg mimeType", async () => {
const jpeg = await sharp({
create: {
width: 10,
height: 10,
channels: 3,
background: { r: 255, g: 0, b: 0 },
},
})
.jpeg()
.toBuffer();
const blocks = [
{
type: "image" as const,
data: jpeg.toString("base64"),
mimeType: "image/png",
},
];
const out = await sanitizeContentBlocksImages(blocks, "test");
const image = out.find((b) => b.type === "image");
if (!image || image.type !== "image") {
throw new Error("expected image block");
}
expect(image.mimeType).toBe("image/jpeg");
});
}); });

View File

@@ -31,6 +31,15 @@ function isTextBlock(block: unknown): block is TextContentBlock {
return rec.type === "text" && typeof rec.text === "string"; return rec.type === "text" && typeof rec.text === "string";
} }
function inferMimeTypeFromBase64(base64: string): string | undefined {
const trimmed = base64.trim();
if (!trimmed) return undefined;
if (trimmed.startsWith("/9j/")) return "image/jpeg";
if (trimmed.startsWith("iVBOR")) return "image/png";
if (trimmed.startsWith("R0lGOD")) return "image/gif";
return undefined;
}
async function resizeImageBase64IfNeeded(params: { async function resizeImageBase64IfNeeded(params: {
base64: string; base64: string;
mimeType: string; mimeType: string;
@@ -127,13 +136,19 @@ export async function sanitizeContentBlocksImages(
} }
try { try {
const inferredMimeType = inferMimeTypeFromBase64(data);
const mimeType = inferredMimeType ?? block.mimeType;
const resized = await resizeImageBase64IfNeeded({ const resized = await resizeImageBase64IfNeeded({
base64: data, base64: data,
mimeType: block.mimeType, mimeType,
maxDimensionPx, maxDimensionPx,
maxBytes, maxBytes,
}); });
out.push({ ...block, data: resized.base64, mimeType: resized.mimeType }); out.push({
...block,
data: resized.base64,
mimeType: resized.resized ? resized.mimeType : mimeType,
});
} catch (err) { } catch (err) {
out.push({ out.push({
type: "text", type: "text",

View File

@@ -43,6 +43,8 @@ type NodesRpcOpts = {
format?: string; format?: string;
maxWidth?: string; maxWidth?: string;
quality?: string; quality?: string;
delayMs?: string;
deviceId?: string;
duration?: string; duration?: string;
screen?: string; screen?: string;
fps?: string; fps?: string;
@@ -888,7 +890,9 @@ export function registerNodesCli(program: Command) {
const name = const name =
typeof device.name === "string" ? device.name : "Unknown Camera"; typeof device.name === "string" ? device.name : "Unknown Camera";
const position = const position =
typeof device.position === "string" ? device.position : "unspecified"; typeof device.position === "string"
? device.position
: "unspecified";
defaultRuntime.log(`${name} (${position})${id ? `${id}` : ""}`); defaultRuntime.log(`${name} (${position})${id ? `${id}` : ""}`);
} }
} catch (err) { } catch (err) {
@@ -908,7 +912,10 @@ export function registerNodesCli(program: Command) {
.option("--device-id <id>", "Camera device id (from nodes camera list)") .option("--device-id <id>", "Camera device id (from nodes camera list)")
.option("--max-width <px>", "Max width in px (optional)") .option("--max-width <px>", "Max width in px (optional)")
.option("--quality <0-1>", "JPEG quality (default 0.9)") .option("--quality <0-1>", "JPEG quality (default 0.9)")
.option("--delay-ms <ms>", "Delay before capture in ms (macOS default 2000)") .option(
"--delay-ms <ms>",
"Delay before capture in ms (macOS default 2000)",
)
.option( .option(
"--invoke-timeout <ms>", "--invoke-timeout <ms>",
"Node invoke timeout in ms (default 20000)", "Node invoke timeout in ms (default 20000)",
@@ -940,7 +947,9 @@ export function registerNodesCli(program: Command) {
const delayMs = opts.delayMs const delayMs = opts.delayMs
? Number.parseInt(String(opts.delayMs), 10) ? Number.parseInt(String(opts.delayMs), 10)
: undefined; : undefined;
const deviceId = opts.deviceId ? String(opts.deviceId).trim() : undefined; const deviceId = opts.deviceId
? String(opts.deviceId).trim()
: undefined;
const timeoutMs = opts.invokeTimeout const timeoutMs = opts.invokeTimeout
? Number.parseInt(String(opts.invokeTimeout), 10) ? Number.parseInt(String(opts.invokeTimeout), 10)
: undefined; : undefined;
@@ -1037,20 +1046,22 @@ export function registerNodesCli(program: Command) {
const timeoutMs = opts.invokeTimeout const timeoutMs = opts.invokeTimeout
? Number.parseInt(String(opts.invokeTimeout), 10) ? Number.parseInt(String(opts.invokeTimeout), 10)
: undefined; : undefined;
const deviceId = opts.deviceId ? String(opts.deviceId).trim() : undefined; const deviceId = opts.deviceId
? String(opts.deviceId).trim()
: undefined;
const invokeParams: Record<string, unknown> = { const invokeParams: Record<string, unknown> = {
nodeId, nodeId,
command: "camera.clip", command: "camera.clip",
params: { params: {
facing, facing,
durationMs: Number.isFinite(durationMs) ? durationMs : undefined, durationMs: Number.isFinite(durationMs) ? durationMs : undefined,
includeAudio, includeAudio,
format: "mp4", format: "mp4",
deviceId: deviceId || undefined, deviceId: deviceId || undefined,
}, },
idempotencyKey: randomIdempotencyKey(), idempotencyKey: randomIdempotencyKey(),
}; };
if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) { if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) {
invokeParams.timeoutMs = timeoutMs; invokeParams.timeoutMs = timeoutMs;
} }

View File

@@ -419,7 +419,9 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
return; return;
} }
const channelName = const channelName =
interaction.channel && "name" in interaction.channel interaction.channel &&
"name" in interaction.channel &&
typeof interaction.channel.name === "string"
? interaction.channel.name ? interaction.channel.name
: undefined; : undefined;
const channelSlug = channelName const channelSlug = channelName
@@ -459,7 +461,9 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
} }
} else if (isGroupDm) { } else if (isGroupDm) {
const channelName = const channelName =
interaction.channel && "name" in interaction.channel interaction.channel &&
"name" in interaction.channel &&
typeof interaction.channel.name === "string"
? interaction.channel.name ? interaction.channel.name
: undefined; : undefined;
const channelSlug = channelName const channelSlug = channelName

View File

@@ -26,7 +26,7 @@ export async function callGateway<T = unknown>(
const timeoutMs = opts.timeoutMs ?? 10_000; const timeoutMs = opts.timeoutMs ?? 10_000;
const config = loadConfig(); const config = loadConfig();
const isRemoteMode = config.gateway?.mode === "remote"; const isRemoteMode = config.gateway?.mode === "remote";
const remote = isRemoteMode ? config.gateway.remote : undefined; const remote = isRemoteMode ? config.gateway?.remote : undefined;
const url = const url =
(typeof opts.url === "string" && opts.url.trim().length > 0 (typeof opts.url === "string" && opts.url.trim().length > 0
? opts.url.trim() ? opts.url.trim()