Merge pull request #670 from cristip73/fix/ios-image-attachments
fix: enable image attachments in chat messages for Claude API
This commit is contained in:
@@ -21,6 +21,7 @@
|
||||
- Onboarding/Gateway: persist non-interactive gateway token auth in config; add WS wizard + gateway tool-calling regression coverage.
|
||||
- Gateway/Control UI: make `chat.send` non-blocking, wire Stop to `chat.abort`, and treat `/stop` as an out-of-band abort. (#653)
|
||||
- Gateway/Control UI: allow `chat.abort` without `runId` (abort active runs), suppress post-abort chat streaming, and prune stuck chat runs. (#653)
|
||||
- Gateway/Control UI: sniff image attachments for chat.send, drop non-images, and log mismatches. (#670) — thanks @cristip73.
|
||||
- CLI: `clawdbot sessions` now includes `elev:*` + `usage:*` flags in the table output.
|
||||
- CLI/Pairing: accept positional provider for `pairing list|approve` (npm-run compatible); update docs/bot hints.
|
||||
- Branding: normalize user-facing “ClawdBot”/“CLAWDBOT” → “Clawdbot” (CLI, status, docs).
|
||||
|
||||
@@ -8,7 +8,12 @@ import type {
|
||||
AgentTool,
|
||||
ThinkingLevel,
|
||||
} from "@mariozechner/pi-agent-core";
|
||||
import type { Api, AssistantMessage, Model } from "@mariozechner/pi-ai";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
ImageContent,
|
||||
Model,
|
||||
} from "@mariozechner/pi-ai";
|
||||
import {
|
||||
createAgentSession,
|
||||
discoverAuthStorage,
|
||||
@@ -1009,6 +1014,8 @@ export async function runEmbeddedPiAgent(params: {
|
||||
config?: ClawdbotConfig;
|
||||
skillsSnapshot?: SkillSnapshot;
|
||||
prompt: string;
|
||||
/** Optional image attachments for multimodal messages. */
|
||||
images?: ImageContent[];
|
||||
provider?: string;
|
||||
model?: string;
|
||||
authProfileId?: string;
|
||||
@@ -1434,7 +1441,9 @@ export async function runEmbeddedPiAgent(params: {
|
||||
`embedded run prompt start: runId=${params.runId} sessionId=${params.sessionId}`,
|
||||
);
|
||||
try {
|
||||
await session.prompt(params.prompt);
|
||||
await session.prompt(params.prompt, {
|
||||
images: params.images,
|
||||
});
|
||||
} catch (err) {
|
||||
promptError = err;
|
||||
} finally {
|
||||
|
||||
@@ -66,8 +66,17 @@ import {
|
||||
} from "../utils/message-provider.js";
|
||||
import { normalizeE164 } from "../utils.js";
|
||||
|
||||
/** Image content block for Claude API multimodal messages. */
|
||||
type ImageContent = {
|
||||
type: "image";
|
||||
data: string;
|
||||
mimeType: string;
|
||||
};
|
||||
|
||||
type AgentCommandOpts = {
|
||||
message: string;
|
||||
/** Optional image attachments for multimodal messages. */
|
||||
images?: ImageContent[];
|
||||
to?: string;
|
||||
sessionId?: string;
|
||||
sessionKey?: string;
|
||||
@@ -450,6 +459,7 @@ export async function agentCommand(
|
||||
config: cfg,
|
||||
skillsSnapshot,
|
||||
prompt: body,
|
||||
images: opts.images,
|
||||
provider: providerOverride,
|
||||
model: modelOverride,
|
||||
authProfileId: sessionEntry?.authProfileOverride,
|
||||
|
||||
@@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
buildMessageWithAttachments,
|
||||
type ChatAttachment,
|
||||
parseMessageWithAttachments,
|
||||
} from "./chat-attachments.js";
|
||||
|
||||
const PNG_1x1 =
|
||||
@@ -56,3 +57,65 @@ describe("buildMessageWithAttachments", () => {
|
||||
).toThrow(/exceeds size limit/i);
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseMessageWithAttachments", () => {
|
||||
it("sniffs mime when missing", async () => {
|
||||
const logs: string[] = [];
|
||||
const parsed = await parseMessageWithAttachments(
|
||||
"see this",
|
||||
[
|
||||
{
|
||||
type: "image",
|
||||
fileName: "dot.png",
|
||||
content: PNG_1x1,
|
||||
},
|
||||
],
|
||||
{ log: { warn: (message) => logs.push(message) } },
|
||||
);
|
||||
expect(parsed.message).toBe("see this");
|
||||
expect(parsed.images).toHaveLength(1);
|
||||
expect(parsed.images[0]?.mimeType).toBe("image/png");
|
||||
expect(parsed.images[0]?.data).toBe(PNG_1x1);
|
||||
expect(logs).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("drops non-image payloads and logs", async () => {
|
||||
const logs: string[] = [];
|
||||
const pdf = Buffer.from("%PDF-1.4\n").toString("base64");
|
||||
const parsed = await parseMessageWithAttachments(
|
||||
"x",
|
||||
[
|
||||
{
|
||||
type: "file",
|
||||
mimeType: "image/png",
|
||||
fileName: "not-image.pdf",
|
||||
content: pdf,
|
||||
},
|
||||
],
|
||||
{ log: { warn: (message) => logs.push(message) } },
|
||||
);
|
||||
expect(parsed.images).toHaveLength(0);
|
||||
expect(logs).toHaveLength(1);
|
||||
expect(logs[0]).toMatch(/non-image/i);
|
||||
});
|
||||
|
||||
it("prefers sniffed mime type and logs mismatch", async () => {
|
||||
const logs: string[] = [];
|
||||
const parsed = await parseMessageWithAttachments(
|
||||
"x",
|
||||
[
|
||||
{
|
||||
type: "image",
|
||||
mimeType: "image/jpeg",
|
||||
fileName: "dot.png",
|
||||
content: PNG_1x1,
|
||||
},
|
||||
],
|
||||
{ log: { warn: (message) => logs.push(message) } },
|
||||
);
|
||||
expect(parsed.images).toHaveLength(1);
|
||||
expect(parsed.images[0]?.mimeType).toBe("image/png");
|
||||
expect(logs).toHaveLength(1);
|
||||
expect(logs[0]).toMatch(/mime mismatch/i);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import { detectMime } from "../media/mime.js";
|
||||
|
||||
export type ChatAttachment = {
|
||||
type?: string;
|
||||
mimeType?: string;
|
||||
@@ -5,6 +7,133 @@ export type ChatAttachment = {
|
||||
content?: unknown;
|
||||
};
|
||||
|
||||
export type ChatImageContent = {
|
||||
type: "image";
|
||||
data: string;
|
||||
mimeType: string;
|
||||
};
|
||||
|
||||
export type ParsedMessageWithImages = {
|
||||
message: string;
|
||||
images: ChatImageContent[];
|
||||
};
|
||||
|
||||
type AttachmentLog = {
|
||||
warn: (message: string) => void;
|
||||
};
|
||||
|
||||
function normalizeMime(mime?: string): string | undefined {
|
||||
if (!mime) return undefined;
|
||||
const cleaned = mime.split(";")[0]?.trim().toLowerCase();
|
||||
return cleaned || undefined;
|
||||
}
|
||||
|
||||
async function sniffMimeFromBase64(
|
||||
base64: string,
|
||||
): Promise<string | undefined> {
|
||||
const trimmed = base64.trim();
|
||||
if (!trimmed) return undefined;
|
||||
|
||||
const take = Math.min(256, trimmed.length);
|
||||
const sliceLen = take - (take % 4);
|
||||
if (sliceLen < 8) return undefined;
|
||||
|
||||
try {
|
||||
const head = Buffer.from(trimmed.slice(0, sliceLen), "base64");
|
||||
return await detectMime({ buffer: head });
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function isImageMime(mime?: string): boolean {
|
||||
return typeof mime === "string" && mime.startsWith("image/");
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse attachments and extract images as structured content blocks.
|
||||
* Returns the message text and an array of image content blocks
|
||||
* compatible with Claude API's image format.
|
||||
*/
|
||||
export async function parseMessageWithAttachments(
|
||||
message: string,
|
||||
attachments: ChatAttachment[] | undefined,
|
||||
opts?: { maxBytes?: number; log?: AttachmentLog },
|
||||
): Promise<ParsedMessageWithImages> {
|
||||
const maxBytes = opts?.maxBytes ?? 5_000_000; // 5 MB
|
||||
const log = opts?.log;
|
||||
if (!attachments || attachments.length === 0) {
|
||||
return { message, images: [] };
|
||||
}
|
||||
|
||||
const images: ChatImageContent[] = [];
|
||||
|
||||
for (const [idx, att] of attachments.entries()) {
|
||||
if (!att) continue;
|
||||
const mime = att.mimeType ?? "";
|
||||
const content = att.content;
|
||||
const label = att.fileName || att.type || `attachment-${idx + 1}`;
|
||||
|
||||
if (typeof content !== "string") {
|
||||
throw new Error(`attachment ${label}: content must be base64 string`);
|
||||
}
|
||||
|
||||
let sizeBytes = 0;
|
||||
let b64 = content.trim();
|
||||
// Strip data URL prefix if present (e.g., "data:image/jpeg;base64,...")
|
||||
const dataUrlMatch = /^data:[^;]+;base64,(.*)$/.exec(b64);
|
||||
if (dataUrlMatch) {
|
||||
b64 = dataUrlMatch[1];
|
||||
}
|
||||
// Basic base64 sanity: length multiple of 4 and charset check.
|
||||
if (b64.length % 4 !== 0 || /[^A-Za-z0-9+/=]/.test(b64)) {
|
||||
throw new Error(`attachment ${label}: invalid base64 content`);
|
||||
}
|
||||
try {
|
||||
sizeBytes = Buffer.from(b64, "base64").byteLength;
|
||||
} catch {
|
||||
throw new Error(`attachment ${label}: invalid base64 content`);
|
||||
}
|
||||
if (sizeBytes <= 0 || sizeBytes > maxBytes) {
|
||||
throw new Error(
|
||||
`attachment ${label}: exceeds size limit (${sizeBytes} > ${maxBytes} bytes)`,
|
||||
);
|
||||
}
|
||||
|
||||
const providedMime = normalizeMime(mime);
|
||||
const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64));
|
||||
if (sniffedMime && !isImageMime(sniffedMime)) {
|
||||
log?.warn(
|
||||
`attachment ${label}: detected non-image (${sniffedMime}), dropping`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if (!sniffedMime && !isImageMime(providedMime)) {
|
||||
log?.warn(
|
||||
`attachment ${label}: unable to detect image mime type, dropping`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if (sniffedMime && providedMime && sniffedMime !== providedMime) {
|
||||
log?.warn(
|
||||
`attachment ${label}: mime mismatch (${providedMime} -> ${sniffedMime}), using sniffed`,
|
||||
);
|
||||
}
|
||||
|
||||
images.push({
|
||||
type: "image",
|
||||
data: b64,
|
||||
mimeType: sniffedMime ?? providedMime ?? mime,
|
||||
});
|
||||
}
|
||||
|
||||
return { message, images };
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use parseMessageWithAttachments instead.
|
||||
* This function converts images to markdown data URLs which Claude API cannot process as images.
|
||||
*/
|
||||
export function buildMessageWithAttachments(
|
||||
message: string,
|
||||
attachments: ChatAttachment[] | undefined,
|
||||
|
||||
@@ -43,7 +43,10 @@ import {
|
||||
isChatStopCommandText,
|
||||
resolveChatRunExpiresAtMs,
|
||||
} from "./chat-abort.js";
|
||||
import { buildMessageWithAttachments } from "./chat-attachments.js";
|
||||
import {
|
||||
type ChatImageContent,
|
||||
parseMessageWithAttachments,
|
||||
} from "./chat-attachments.js";
|
||||
import {
|
||||
ErrorCodes,
|
||||
errorShape,
|
||||
@@ -793,32 +796,37 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) {
|
||||
};
|
||||
const stopCommand = isChatStopCommandText(p.message);
|
||||
const normalizedAttachments =
|
||||
p.attachments?.map((a) => ({
|
||||
type: typeof a?.type === "string" ? a.type : undefined,
|
||||
mimeType:
|
||||
typeof a?.mimeType === "string" ? a.mimeType : undefined,
|
||||
fileName:
|
||||
typeof a?.fileName === "string" ? a.fileName : undefined,
|
||||
content:
|
||||
typeof a?.content === "string"
|
||||
? a.content
|
||||
: ArrayBuffer.isView(a?.content)
|
||||
? Buffer.from(
|
||||
a.content.buffer,
|
||||
a.content.byteOffset,
|
||||
a.content.byteLength,
|
||||
).toString("base64")
|
||||
: undefined,
|
||||
})) ?? [];
|
||||
p.attachments
|
||||
?.map((a) => ({
|
||||
type: typeof a?.type === "string" ? a.type : undefined,
|
||||
mimeType:
|
||||
typeof a?.mimeType === "string" ? a.mimeType : undefined,
|
||||
fileName:
|
||||
typeof a?.fileName === "string" ? a.fileName : undefined,
|
||||
content:
|
||||
typeof a?.content === "string"
|
||||
? a.content
|
||||
: ArrayBuffer.isView(a?.content)
|
||||
? Buffer.from(
|
||||
a.content.buffer,
|
||||
a.content.byteOffset,
|
||||
a.content.byteLength,
|
||||
).toString("base64")
|
||||
: undefined,
|
||||
}))
|
||||
.filter((a) => a.content) ?? [];
|
||||
|
||||
let messageWithAttachments = p.message;
|
||||
let parsedMessage = p.message;
|
||||
let parsedImages: ChatImageContent[] = [];
|
||||
if (normalizedAttachments.length > 0) {
|
||||
try {
|
||||
messageWithAttachments = buildMessageWithAttachments(
|
||||
const parsed = await parseMessageWithAttachments(
|
||||
p.message,
|
||||
normalizedAttachments,
|
||||
{ maxBytes: 5_000_000 },
|
||||
{ maxBytes: 5_000_000, log: ctx.logBridge },
|
||||
);
|
||||
parsedMessage = parsed.message;
|
||||
parsedImages = parsed.images;
|
||||
} catch (err) {
|
||||
return {
|
||||
ok: false,
|
||||
@@ -922,7 +930,8 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) {
|
||||
};
|
||||
void agentCommand(
|
||||
{
|
||||
message: messageWithAttachments,
|
||||
message: parsedMessage,
|
||||
images: parsedImages.length > 0 ? parsedImages : undefined,
|
||||
sessionId,
|
||||
sessionKey: p.sessionKey,
|
||||
runId: clientRunId,
|
||||
|
||||
@@ -13,7 +13,10 @@ import {
|
||||
isChatStopCommandText,
|
||||
resolveChatRunExpiresAtMs,
|
||||
} from "../chat-abort.js";
|
||||
import { buildMessageWithAttachments } from "../chat-attachments.js";
|
||||
import {
|
||||
type ChatImageContent,
|
||||
parseMessageWithAttachments,
|
||||
} from "../chat-attachments.js";
|
||||
import {
|
||||
ErrorCodes,
|
||||
errorShape,
|
||||
@@ -181,29 +184,34 @@ export const chatHandlers: GatewayRequestHandlers = {
|
||||
};
|
||||
const stopCommand = isChatStopCommandText(p.message);
|
||||
const normalizedAttachments =
|
||||
p.attachments?.map((a) => ({
|
||||
type: typeof a?.type === "string" ? a.type : undefined,
|
||||
mimeType: typeof a?.mimeType === "string" ? a.mimeType : undefined,
|
||||
fileName: typeof a?.fileName === "string" ? a.fileName : undefined,
|
||||
content:
|
||||
typeof a?.content === "string"
|
||||
? a.content
|
||||
: ArrayBuffer.isView(a?.content)
|
||||
? Buffer.from(
|
||||
a.content.buffer,
|
||||
a.content.byteOffset,
|
||||
a.content.byteLength,
|
||||
).toString("base64")
|
||||
: undefined,
|
||||
})) ?? [];
|
||||
let messageWithAttachments = p.message;
|
||||
p.attachments
|
||||
?.map((a) => ({
|
||||
type: typeof a?.type === "string" ? a.type : undefined,
|
||||
mimeType: typeof a?.mimeType === "string" ? a.mimeType : undefined,
|
||||
fileName: typeof a?.fileName === "string" ? a.fileName : undefined,
|
||||
content:
|
||||
typeof a?.content === "string"
|
||||
? a.content
|
||||
: ArrayBuffer.isView(a?.content)
|
||||
? Buffer.from(
|
||||
a.content.buffer,
|
||||
a.content.byteOffset,
|
||||
a.content.byteLength,
|
||||
).toString("base64")
|
||||
: undefined,
|
||||
}))
|
||||
.filter((a) => a.content) ?? [];
|
||||
let parsedMessage = p.message;
|
||||
let parsedImages: ChatImageContent[] = [];
|
||||
if (normalizedAttachments.length > 0) {
|
||||
try {
|
||||
messageWithAttachments = buildMessageWithAttachments(
|
||||
const parsed = await parseMessageWithAttachments(
|
||||
p.message,
|
||||
normalizedAttachments,
|
||||
{ maxBytes: 5_000_000 },
|
||||
{ maxBytes: 5_000_000, log: context.logGateway },
|
||||
);
|
||||
parsedMessage = parsed.message;
|
||||
parsedImages = parsed.images;
|
||||
} catch (err) {
|
||||
respond(
|
||||
false,
|
||||
@@ -312,7 +320,8 @@ export const chatHandlers: GatewayRequestHandlers = {
|
||||
|
||||
void agentCommand(
|
||||
{
|
||||
message: messageWithAttachments,
|
||||
message: parsedMessage,
|
||||
images: parsedImages.length > 0 ? parsedImages : undefined,
|
||||
sessionId,
|
||||
sessionKey: p.sessionKey,
|
||||
runId: clientRunId,
|
||||
|
||||
@@ -32,6 +32,7 @@ export type GatewayRequestContext = {
|
||||
getHealthCache: () => HealthSummary | null;
|
||||
refreshHealthSnapshot: (opts?: { probe?: boolean }) => Promise<HealthSummary>;
|
||||
logHealth: { error: (message: string) => void };
|
||||
logGateway: { warn: (message: string) => void };
|
||||
incrementPresenceVersion: () => number;
|
||||
getHealthVersion: () => number;
|
||||
broadcast: (
|
||||
|
||||
@@ -1674,6 +1674,7 @@ export async function startGatewayServer(
|
||||
getHealthCache: () => healthCache,
|
||||
refreshHealthSnapshot,
|
||||
logHealth,
|
||||
logGateway: log,
|
||||
incrementPresenceVersion: () => {
|
||||
presenceVersion += 1;
|
||||
return presenceVersion;
|
||||
|
||||
Reference in New Issue
Block a user