fix: sniff chat attachment mime (#670) (thanks @cristip73)

This commit is contained in:
Peter Steinberger
2026-01-10 20:06:33 +01:00
parent c4e76eb635
commit 193ebba657
7 changed files with 131 additions and 13 deletions

View File

@@ -21,6 +21,7 @@
- Onboarding/Gateway: persist non-interactive gateway token auth in config; add WS wizard + gateway tool-calling regression coverage. - Onboarding/Gateway: persist non-interactive gateway token auth in config; add WS wizard + gateway tool-calling regression coverage.
- Gateway/Control UI: make `chat.send` non-blocking, wire Stop to `chat.abort`, and treat `/stop` as an out-of-band abort. (#653) - Gateway/Control UI: make `chat.send` non-blocking, wire Stop to `chat.abort`, and treat `/stop` as an out-of-band abort. (#653)
- Gateway/Control UI: allow `chat.abort` without `runId` (abort active runs), suppress post-abort chat streaming, and prune stuck chat runs. (#653) - Gateway/Control UI: allow `chat.abort` without `runId` (abort active runs), suppress post-abort chat streaming, and prune stuck chat runs. (#653)
- Gateway/Control UI: sniff image attachments for chat.send, drop non-images, and log mismatches. (#670) — thanks @cristip73.
- CLI: `clawdbot sessions` now includes `elev:*` + `usage:*` flags in the table output. - CLI: `clawdbot sessions` now includes `elev:*` + `usage:*` flags in the table output.
- CLI/Pairing: accept positional provider for `pairing list|approve` (npm-run compatible); update docs/bot hints. - CLI/Pairing: accept positional provider for `pairing list|approve` (npm-run compatible); update docs/bot hints.
- Branding: normalize user-facing “ClawdBot”/“CLAWDBOT” → “Clawdbot” (CLI, status, docs). - Branding: normalize user-facing “ClawdBot”/“CLAWDBOT” → “Clawdbot” (CLI, status, docs).

View File

@@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
import { import {
buildMessageWithAttachments, buildMessageWithAttachments,
type ChatAttachment, type ChatAttachment,
parseMessageWithAttachments,
} from "./chat-attachments.js"; } from "./chat-attachments.js";
const PNG_1x1 = const PNG_1x1 =
@@ -56,3 +57,65 @@ describe("buildMessageWithAttachments", () => {
).toThrow(/exceeds size limit/i); ).toThrow(/exceeds size limit/i);
}); });
}); });
describe("parseMessageWithAttachments", () => {
it("sniffs mime when missing", async () => {
const logs: string[] = [];
const parsed = await parseMessageWithAttachments(
"see this",
[
{
type: "image",
fileName: "dot.png",
content: PNG_1x1,
},
],
{ log: { warn: (message) => logs.push(message) } },
);
expect(parsed.message).toBe("see this");
expect(parsed.images).toHaveLength(1);
expect(parsed.images[0]?.mimeType).toBe("image/png");
expect(parsed.images[0]?.data).toBe(PNG_1x1);
expect(logs).toHaveLength(0);
});
it("drops non-image payloads and logs", async () => {
const logs: string[] = [];
const pdf = Buffer.from("%PDF-1.4\n").toString("base64");
const parsed = await parseMessageWithAttachments(
"x",
[
{
type: "file",
mimeType: "image/png",
fileName: "not-image.pdf",
content: pdf,
},
],
{ log: { warn: (message) => logs.push(message) } },
);
expect(parsed.images).toHaveLength(0);
expect(logs).toHaveLength(1);
expect(logs[0]).toMatch(/non-image/i);
});
it("prefers sniffed mime type and logs mismatch", async () => {
const logs: string[] = [];
const parsed = await parseMessageWithAttachments(
"x",
[
{
type: "image",
mimeType: "image/jpeg",
fileName: "dot.png",
content: PNG_1x1,
},
],
{ log: { warn: (message) => logs.push(message) } },
);
expect(parsed.images).toHaveLength(1);
expect(parsed.images[0]?.mimeType).toBe("image/png");
expect(logs).toHaveLength(1);
expect(logs[0]).toMatch(/mime mismatch/i);
});
});

View File

@@ -1,3 +1,5 @@
import { detectMime } from "../media/mime.js";
export type ChatAttachment = { export type ChatAttachment = {
type?: string; type?: string;
mimeType?: string; mimeType?: string;
@@ -16,17 +18,50 @@ export type ParsedMessageWithImages = {
images: ChatImageContent[]; images: ChatImageContent[];
}; };
type AttachmentLog = {
warn: (message: string) => void;
};
function normalizeMime(mime?: string): string | undefined {
if (!mime) return undefined;
const cleaned = mime.split(";")[0]?.trim().toLowerCase();
return cleaned || undefined;
}
async function sniffMimeFromBase64(
base64: string,
): Promise<string | undefined> {
const trimmed = base64.trim();
if (!trimmed) return undefined;
const take = Math.min(256, trimmed.length);
const sliceLen = take - (take % 4);
if (sliceLen < 8) return undefined;
try {
const head = Buffer.from(trimmed.slice(0, sliceLen), "base64");
return await detectMime({ buffer: head });
} catch {
return undefined;
}
}
function isImageMime(mime?: string): boolean {
return typeof mime === "string" && mime.startsWith("image/");
}
/** /**
* Parse attachments and extract images as structured content blocks. * Parse attachments and extract images as structured content blocks.
* Returns the message text and an array of image content blocks * Returns the message text and an array of image content blocks
* compatible with Claude API's image format. * compatible with Claude API's image format.
*/ */
export function parseMessageWithAttachments( export async function parseMessageWithAttachments(
message: string, message: string,
attachments: ChatAttachment[] | undefined, attachments: ChatAttachment[] | undefined,
opts?: { maxBytes?: number }, opts?: { maxBytes?: number; log?: AttachmentLog },
): ParsedMessageWithImages { ): Promise<ParsedMessageWithImages> {
const maxBytes = opts?.maxBytes ?? 5_000_000; // 5 MB const maxBytes = opts?.maxBytes ?? 5_000_000; // 5 MB
const log = opts?.log;
if (!attachments || attachments.length === 0) { if (!attachments || attachments.length === 0) {
return { message, images: [] }; return { message, images: [] };
} }
@@ -42,9 +77,6 @@ export function parseMessageWithAttachments(
if (typeof content !== "string") { if (typeof content !== "string") {
throw new Error(`attachment ${label}: content must be base64 string`); throw new Error(`attachment ${label}: content must be base64 string`);
} }
if (!mime.startsWith("image/")) {
throw new Error(`attachment ${label}: only image/* supported`);
}
let sizeBytes = 0; let sizeBytes = 0;
let b64 = content.trim(); let b64 = content.trim();
@@ -68,10 +100,30 @@ export function parseMessageWithAttachments(
); );
} }
const providedMime = normalizeMime(mime);
const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64));
if (sniffedMime && !isImageMime(sniffedMime)) {
log?.warn(
`attachment ${label}: detected non-image (${sniffedMime}), dropping`,
);
continue;
}
if (!sniffedMime && !isImageMime(providedMime)) {
log?.warn(
`attachment ${label}: unable to detect image mime type, dropping`,
);
continue;
}
if (sniffedMime && providedMime && sniffedMime !== providedMime) {
log?.warn(
`attachment ${label}: mime mismatch (${providedMime} -> ${sniffedMime}), using sniffed`,
);
}
images.push({ images.push({
type: "image", type: "image",
data: b64, data: b64,
mimeType: mime, mimeType: sniffedMime ?? providedMime ?? mime,
}); });
} }

View File

@@ -814,16 +814,16 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) {
).toString("base64") ).toString("base64")
: undefined, : undefined,
})) }))
.filter((a) => a.content && a.mimeType) ?? []; .filter((a) => a.content) ?? [];
let parsedMessage = p.message; let parsedMessage = p.message;
let parsedImages: ChatImageContent[] = []; let parsedImages: ChatImageContent[] = [];
if (normalizedAttachments.length > 0) { if (normalizedAttachments.length > 0) {
try { try {
const parsed = parseMessageWithAttachments( const parsed = await parseMessageWithAttachments(
p.message, p.message,
normalizedAttachments, normalizedAttachments,
{ maxBytes: 5_000_000 }, { maxBytes: 5_000_000, log: ctx.logBridge },
); );
parsedMessage = parsed.message; parsedMessage = parsed.message;
parsedImages = parsed.images; parsedImages = parsed.images;

View File

@@ -200,15 +200,15 @@ export const chatHandlers: GatewayRequestHandlers = {
).toString("base64") ).toString("base64")
: undefined, : undefined,
})) }))
.filter((a) => a.content && a.mimeType) ?? []; .filter((a) => a.content) ?? [];
let parsedMessage = p.message; let parsedMessage = p.message;
let parsedImages: ChatImageContent[] = []; let parsedImages: ChatImageContent[] = [];
if (normalizedAttachments.length > 0) { if (normalizedAttachments.length > 0) {
try { try {
const parsed = parseMessageWithAttachments( const parsed = await parseMessageWithAttachments(
p.message, p.message,
normalizedAttachments, normalizedAttachments,
{ maxBytes: 5_000_000 }, { maxBytes: 5_000_000, log: context.logGateway },
); );
parsedMessage = parsed.message; parsedMessage = parsed.message;
parsedImages = parsed.images; parsedImages = parsed.images;

View File

@@ -32,6 +32,7 @@ export type GatewayRequestContext = {
getHealthCache: () => HealthSummary | null; getHealthCache: () => HealthSummary | null;
refreshHealthSnapshot: (opts?: { probe?: boolean }) => Promise<HealthSummary>; refreshHealthSnapshot: (opts?: { probe?: boolean }) => Promise<HealthSummary>;
logHealth: { error: (message: string) => void }; logHealth: { error: (message: string) => void };
logGateway: { warn: (message: string) => void };
incrementPresenceVersion: () => number; incrementPresenceVersion: () => number;
getHealthVersion: () => number; getHealthVersion: () => number;
broadcast: ( broadcast: (

View File

@@ -1674,6 +1674,7 @@ export async function startGatewayServer(
getHealthCache: () => healthCache, getHealthCache: () => healthCache,
refreshHealthSnapshot, refreshHealthSnapshot,
logHealth, logHealth,
logGateway: log,
incrementPresenceVersion: () => { incrementPresenceVersion: () => {
presenceVersion += 1; presenceVersion += 1;
return presenceVersion; return presenceVersion;