diff --git a/src/agents/clawdis-tools.camera.test.ts b/src/agents/clawdis-tools.camera.test.ts index e41b09d91..a97bfde64 100644 --- a/src/agents/clawdis-tools.camera.test.ts +++ b/src/agents/clawdis-tools.camera.test.ts @@ -1,6 +1,8 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; -const callGateway = vi.fn(); +const { callGateway } = vi.hoisted(() => ({ + callGateway: vi.fn(), +})); vi.mock("../gateway/call.js", () => ({ callGateway })); vi.mock("../media/image-ops.js", () => ({ diff --git a/src/agents/clawdis-tools.ts b/src/agents/clawdis-tools.ts index d62e16cc6..b538c244d 100644 --- a/src/agents/clawdis-tools.ts +++ b/src/agents/clawdis-tools.ts @@ -865,20 +865,6 @@ function createCanvasTool(): AnyAgentTool { Number.isFinite(params.quality) ? params.quality : undefined; - const delayMs = - typeof params.delayMs === "number" && - Number.isFinite(params.delayMs) - ? params.delayMs - : undefined; - const deviceId = - typeof params.deviceId === "string" && params.deviceId.trim() - ? params.deviceId.trim() - : undefined; - const delayMs = - typeof params.delayMs === "number" && - Number.isFinite(params.delayMs) - ? params.delayMs - : undefined; const raw = (await invoke("canvas.snapshot", { format, maxWidth, @@ -889,8 +875,7 @@ function createCanvasTool(): AnyAgentTool { ext: payload.format === "jpeg" ? "jpg" : payload.format, }); await writeBase64ToFile(filePath, payload.base64); - const mimeType = - imageMimeFromFormat(payload.format) ?? "image/png"; + const mimeType = imageMimeFromFormat(payload.format) ?? "image/png"; return await imageResult({ label: "canvas:snapshot", path: filePath, @@ -1139,6 +1124,15 @@ function createNodesTool(): AnyAgentTool { Number.isFinite(params.quality) ? params.quality : undefined; + const delayMs = + typeof params.delayMs === "number" && + Number.isFinite(params.delayMs) + ? params.delayMs + : undefined; + const deviceId = + typeof params.deviceId === "string" && params.deviceId.trim() + ? params.deviceId.trim() + : undefined; const content: AgentToolResult["content"] = []; const details: Array> = []; @@ -1158,10 +1152,23 @@ function createNodesTool(): AnyAgentTool { idempotencyKey: crypto.randomUUID(), })) as { payload?: unknown }; const payload = parseCameraSnapPayload(raw?.payload); + const normalizedFormat = payload.format.toLowerCase(); + if ( + normalizedFormat !== "jpg" && + normalizedFormat !== "jpeg" && + normalizedFormat !== "png" + ) { + throw new Error( + `unsupported camera.snap format: ${payload.format}`, + ); + } + + const isJpeg = + normalizedFormat === "jpg" || normalizedFormat === "jpeg"; const filePath = cameraTempPath({ kind: "snap", facing, - ext: payload.format === "jpeg" ? "jpg" : payload.format, + ext: isJpeg ? "jpg" : "png", }); await writeBase64ToFile(filePath, payload.base64); content.push({ type: "text", text: `MEDIA:${filePath}` }); @@ -1169,7 +1176,8 @@ function createNodesTool(): AnyAgentTool { type: "image", data: payload.base64, mimeType: - imageMimeFromFormat(payload.format) ?? "image/png", + imageMimeFromFormat(payload.format) ?? + (isJpeg ? "image/jpeg" : "image/png"), }); details.push({ facing, diff --git a/src/agents/tool-images.test.ts b/src/agents/tool-images.test.ts index c6a5baffd..8a0e5f0c6 100644 --- a/src/agents/tool-images.test.ts +++ b/src/agents/tool-images.test.ts @@ -32,4 +32,32 @@ describe("tool image sanitizing", () => { expect(size).toBeLessThanOrEqual(5 * 1024 * 1024); expect(image.mimeType).toBe("image/jpeg"); }, 20_000); + + it("corrects mismatched jpeg mimeType", async () => { + const jpeg = await sharp({ + create: { + width: 10, + height: 10, + channels: 3, + background: { r: 255, g: 0, b: 0 }, + }, + }) + .jpeg() + .toBuffer(); + + const blocks = [ + { + type: "image" as const, + data: jpeg.toString("base64"), + mimeType: "image/png", + }, + ]; + + const out = await sanitizeContentBlocksImages(blocks, "test"); + const image = out.find((b) => b.type === "image"); + if (!image || image.type !== "image") { + throw new Error("expected image block"); + } + expect(image.mimeType).toBe("image/jpeg"); + }); }); diff --git a/src/agents/tool-images.ts b/src/agents/tool-images.ts index 5182d5f3c..a5915957d 100644 --- a/src/agents/tool-images.ts +++ b/src/agents/tool-images.ts @@ -31,6 +31,15 @@ function isTextBlock(block: unknown): block is TextContentBlock { return rec.type === "text" && typeof rec.text === "string"; } +function inferMimeTypeFromBase64(base64: string): string | undefined { + const trimmed = base64.trim(); + if (!trimmed) return undefined; + if (trimmed.startsWith("/9j/")) return "image/jpeg"; + if (trimmed.startsWith("iVBOR")) return "image/png"; + if (trimmed.startsWith("R0lGOD")) return "image/gif"; + return undefined; +} + async function resizeImageBase64IfNeeded(params: { base64: string; mimeType: string; @@ -127,13 +136,19 @@ export async function sanitizeContentBlocksImages( } try { + const inferredMimeType = inferMimeTypeFromBase64(data); + const mimeType = inferredMimeType ?? block.mimeType; const resized = await resizeImageBase64IfNeeded({ base64: data, - mimeType: block.mimeType, + mimeType, maxDimensionPx, maxBytes, }); - out.push({ ...block, data: resized.base64, mimeType: resized.mimeType }); + out.push({ + ...block, + data: resized.base64, + mimeType: resized.resized ? resized.mimeType : mimeType, + }); } catch (err) { out.push({ type: "text", diff --git a/src/cli/nodes-cli.ts b/src/cli/nodes-cli.ts index 4661b6ca6..e5bc2ecd3 100644 --- a/src/cli/nodes-cli.ts +++ b/src/cli/nodes-cli.ts @@ -43,6 +43,8 @@ type NodesRpcOpts = { format?: string; maxWidth?: string; quality?: string; + delayMs?: string; + deviceId?: string; duration?: string; screen?: string; fps?: string; @@ -888,7 +890,9 @@ export function registerNodesCli(program: Command) { const name = typeof device.name === "string" ? device.name : "Unknown Camera"; const position = - typeof device.position === "string" ? device.position : "unspecified"; + typeof device.position === "string" + ? device.position + : "unspecified"; defaultRuntime.log(`${name} (${position})${id ? ` — ${id}` : ""}`); } } catch (err) { @@ -908,7 +912,10 @@ export function registerNodesCli(program: Command) { .option("--device-id ", "Camera device id (from nodes camera list)") .option("--max-width ", "Max width in px (optional)") .option("--quality <0-1>", "JPEG quality (default 0.9)") - .option("--delay-ms ", "Delay before capture in ms (macOS default 2000)") + .option( + "--delay-ms ", + "Delay before capture in ms (macOS default 2000)", + ) .option( "--invoke-timeout ", "Node invoke timeout in ms (default 20000)", @@ -940,7 +947,9 @@ export function registerNodesCli(program: Command) { const delayMs = opts.delayMs ? Number.parseInt(String(opts.delayMs), 10) : undefined; - const deviceId = opts.deviceId ? String(opts.deviceId).trim() : undefined; + const deviceId = opts.deviceId + ? String(opts.deviceId).trim() + : undefined; const timeoutMs = opts.invokeTimeout ? Number.parseInt(String(opts.invokeTimeout), 10) : undefined; @@ -1037,20 +1046,22 @@ export function registerNodesCli(program: Command) { const timeoutMs = opts.invokeTimeout ? Number.parseInt(String(opts.invokeTimeout), 10) : undefined; - const deviceId = opts.deviceId ? String(opts.deviceId).trim() : undefined; + const deviceId = opts.deviceId + ? String(opts.deviceId).trim() + : undefined; const invokeParams: Record = { nodeId, command: "camera.clip", - params: { - facing, - durationMs: Number.isFinite(durationMs) ? durationMs : undefined, - includeAudio, - format: "mp4", - deviceId: deviceId || undefined, - }, - idempotencyKey: randomIdempotencyKey(), - }; + params: { + facing, + durationMs: Number.isFinite(durationMs) ? durationMs : undefined, + includeAudio, + format: "mp4", + deviceId: deviceId || undefined, + }, + idempotencyKey: randomIdempotencyKey(), + }; if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) { invokeParams.timeoutMs = timeoutMs; } diff --git a/src/discord/monitor.ts b/src/discord/monitor.ts index e5449074a..619b4c9dc 100644 --- a/src/discord/monitor.ts +++ b/src/discord/monitor.ts @@ -419,7 +419,9 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { return; } const channelName = - interaction.channel && "name" in interaction.channel + interaction.channel && + "name" in interaction.channel && + typeof interaction.channel.name === "string" ? interaction.channel.name : undefined; const channelSlug = channelName @@ -459,7 +461,9 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { } } else if (isGroupDm) { const channelName = - interaction.channel && "name" in interaction.channel + interaction.channel && + "name" in interaction.channel && + typeof interaction.channel.name === "string" ? interaction.channel.name : undefined; const channelSlug = channelName diff --git a/src/gateway/call.ts b/src/gateway/call.ts index 32399258e..bcef4c613 100644 --- a/src/gateway/call.ts +++ b/src/gateway/call.ts @@ -26,7 +26,7 @@ export async function callGateway( const timeoutMs = opts.timeoutMs ?? 10_000; const config = loadConfig(); const isRemoteMode = config.gateway?.mode === "remote"; - const remote = isRemoteMode ? config.gateway.remote : undefined; + const remote = isRemoteMode ? config.gateway?.remote : undefined; const url = (typeof opts.url === "string" && opts.url.trim().length > 0 ? opts.url.trim()