diff --git a/CHANGELOG.md b/CHANGELOG.md index 25a1392ac..5054c3542 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - Onboarding/Gateway: persist non-interactive gateway token auth in config; add WS wizard + gateway tool-calling regression coverage. - Gateway/Control UI: make `chat.send` non-blocking, wire Stop to `chat.abort`, and treat `/stop` as an out-of-band abort. (#653) - Gateway/Control UI: allow `chat.abort` without `runId` (abort active runs), suppress post-abort chat streaming, and prune stuck chat runs. (#653) +- Gateway/Control UI: sniff image attachments for chat.send, drop non-images, and log mismatches. (#670) — thanks @cristip73. - CLI: `clawdbot sessions` now includes `elev:*` + `usage:*` flags in the table output. - CLI/Pairing: accept positional provider for `pairing list|approve` (npm-run compatible); update docs/bot hints. - Branding: normalize user-facing “ClawdBot”/“CLAWDBOT” → “Clawdbot” (CLI, status, docs). diff --git a/src/gateway/chat-attachments.test.ts b/src/gateway/chat-attachments.test.ts index e07116636..2cc47fb48 100644 --- a/src/gateway/chat-attachments.test.ts +++ b/src/gateway/chat-attachments.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest"; import { buildMessageWithAttachments, type ChatAttachment, + parseMessageWithAttachments, } from "./chat-attachments.js"; const PNG_1x1 = @@ -56,3 +57,65 @@ describe("buildMessageWithAttachments", () => { ).toThrow(/exceeds size limit/i); }); }); + +describe("parseMessageWithAttachments", () => { + it("sniffs mime when missing", async () => { + const logs: string[] = []; + const parsed = await parseMessageWithAttachments( + "see this", + [ + { + type: "image", + fileName: "dot.png", + content: PNG_1x1, + }, + ], + { log: { warn: (message) => logs.push(message) } }, + ); + expect(parsed.message).toBe("see this"); + expect(parsed.images).toHaveLength(1); + expect(parsed.images[0]?.mimeType).toBe("image/png"); + expect(parsed.images[0]?.data).toBe(PNG_1x1); + expect(logs).toHaveLength(0); + }); + + it("drops non-image payloads and logs", async () => { + const logs: string[] = []; + const pdf = Buffer.from("%PDF-1.4\n").toString("base64"); + const parsed = await parseMessageWithAttachments( + "x", + [ + { + type: "file", + mimeType: "image/png", + fileName: "not-image.pdf", + content: pdf, + }, + ], + { log: { warn: (message) => logs.push(message) } }, + ); + expect(parsed.images).toHaveLength(0); + expect(logs).toHaveLength(1); + expect(logs[0]).toMatch(/non-image/i); + }); + + it("prefers sniffed mime type and logs mismatch", async () => { + const logs: string[] = []; + const parsed = await parseMessageWithAttachments( + "x", + [ + { + type: "image", + mimeType: "image/jpeg", + fileName: "dot.png", + content: PNG_1x1, + }, + ], + { log: { warn: (message) => logs.push(message) } }, + ); + expect(parsed.images).toHaveLength(1); + expect(parsed.images[0]?.mimeType).toBe("image/png"); + expect(logs).toHaveLength(1); + expect(logs[0]).toMatch(/mime mismatch/i); + }); +}); diff --git a/src/gateway/chat-attachments.ts b/src/gateway/chat-attachments.ts index 50082dc93..e24fc4e2c 100644 --- a/src/gateway/chat-attachments.ts +++ b/src/gateway/chat-attachments.ts @@ -1,3 +1,5 @@ +import { detectMime } from "../media/mime.js"; + export type ChatAttachment = { type?: string; mimeType?: string; @@ -16,17 +18,50 @@ export type ParsedMessageWithImages = { images: ChatImageContent[]; }; +type AttachmentLog = { + warn: (message: string) => void; +}; + +function normalizeMime(mime?: string): string | undefined { + if (!mime) return undefined; + const cleaned = mime.split(";")[0]?.trim().toLowerCase(); + return cleaned || undefined; +} + +async function sniffMimeFromBase64( + base64: string, +): Promise { + const trimmed = base64.trim(); + if (!trimmed) return undefined; + + const take = Math.min(256, trimmed.length); + const sliceLen = take - (take % 4); + if (sliceLen < 8) return undefined; + + try { + const head = Buffer.from(trimmed.slice(0, sliceLen), "base64"); + return await detectMime({ buffer: head }); + } catch { + return undefined; + } +} + +function isImageMime(mime?: string): boolean { + return typeof mime === "string" && mime.startsWith("image/"); +} + /** * Parse attachments and extract images as structured content blocks. * Returns the message text and an array of image content blocks * compatible with Claude API's image format. */ -export function parseMessageWithAttachments( +export async function parseMessageWithAttachments( message: string, attachments: ChatAttachment[] | undefined, - opts?: { maxBytes?: number }, -): ParsedMessageWithImages { + opts?: { maxBytes?: number; log?: AttachmentLog }, +): Promise { const maxBytes = opts?.maxBytes ?? 5_000_000; // 5 MB + const log = opts?.log; if (!attachments || attachments.length === 0) { return { message, images: [] }; } @@ -42,9 +77,6 @@ export function parseMessageWithAttachments( if (typeof content !== "string") { throw new Error(`attachment ${label}: content must be base64 string`); } - if (!mime.startsWith("image/")) { - throw new Error(`attachment ${label}: only image/* supported`); - } let sizeBytes = 0; let b64 = content.trim(); @@ -68,10 +100,30 @@ export function parseMessageWithAttachments( ); } + const providedMime = normalizeMime(mime); + const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64)); + if (sniffedMime && !isImageMime(sniffedMime)) { + log?.warn( + `attachment ${label}: detected non-image (${sniffedMime}), dropping`, + ); + continue; + } + if (!sniffedMime && !isImageMime(providedMime)) { + log?.warn( + `attachment ${label}: unable to detect image mime type, dropping`, + ); + continue; + } + if (sniffedMime && providedMime && sniffedMime !== providedMime) { + log?.warn( + `attachment ${label}: mime mismatch (${providedMime} -> ${sniffedMime}), using sniffed`, + ); + } + images.push({ type: "image", data: b64, - mimeType: mime, + mimeType: sniffedMime ?? providedMime ?? mime, }); } diff --git a/src/gateway/server-bridge.ts b/src/gateway/server-bridge.ts index bd24912b8..87ac5a955 100644 --- a/src/gateway/server-bridge.ts +++ b/src/gateway/server-bridge.ts @@ -814,16 +814,16 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) { ).toString("base64") : undefined, })) - .filter((a) => a.content && a.mimeType) ?? []; + .filter((a) => a.content) ?? []; let parsedMessage = p.message; let parsedImages: ChatImageContent[] = []; if (normalizedAttachments.length > 0) { try { - const parsed = parseMessageWithAttachments( + const parsed = await parseMessageWithAttachments( p.message, normalizedAttachments, - { maxBytes: 5_000_000 }, + { maxBytes: 5_000_000, log: ctx.logBridge }, ); parsedMessage = parsed.message; parsedImages = parsed.images; diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index eacabe0c6..c3afb65d4 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -200,15 +200,15 @@ export const chatHandlers: GatewayRequestHandlers = { ).toString("base64") : undefined, })) - .filter((a) => a.content && a.mimeType) ?? []; + .filter((a) => a.content) ?? []; let parsedMessage = p.message; let parsedImages: ChatImageContent[] = []; if (normalizedAttachments.length > 0) { try { - const parsed = parseMessageWithAttachments( + const parsed = await parseMessageWithAttachments( p.message, normalizedAttachments, - { maxBytes: 5_000_000 }, + { maxBytes: 5_000_000, log: context.logGateway }, ); parsedMessage = parsed.message; parsedImages = parsed.images; diff --git a/src/gateway/server-methods/types.ts b/src/gateway/server-methods/types.ts index 79545fe80..613faa32a 100644 --- a/src/gateway/server-methods/types.ts +++ b/src/gateway/server-methods/types.ts @@ -32,6 +32,7 @@ export type GatewayRequestContext = { getHealthCache: () => HealthSummary | null; refreshHealthSnapshot: (opts?: { probe?: boolean }) => Promise; logHealth: { error: (message: string) => void }; + logGateway: { warn: (message: string) => void }; incrementPresenceVersion: () => number; getHealthVersion: () => number; broadcast: ( diff --git a/src/gateway/server.ts b/src/gateway/server.ts index a56f07605..2f77446bc 100644 --- a/src/gateway/server.ts +++ b/src/gateway/server.ts @@ -1674,6 +1674,7 @@ export async function startGatewayServer( getHealthCache: () => healthCache, refreshHealthSnapshot, logHealth, + logGateway: log, incrementPresenceVersion: () => { presenceVersion += 1; return presenceVersion;