fix: enable image attachments in chat messages for Claude API

Images were previously converted to markdown data URLs which Claude API
treats as plain text, not as actual images.

Changes:
- Add parseMessageWithAttachments() that returns {message, images[]}
- Pass images through the stack to session.prompt() as content blocks
- Filter null/empty attachments before parsing
- Strip data URL prefix if client sends it

This enables iOS and other clients to send images that Claude can actually see.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
cristip73
2026-01-10 19:17:32 +02:00
committed by Peter Steinberger
parent 0279f09459
commit c4e76eb635
5 changed files with 156 additions and 42 deletions

View File

@@ -8,7 +8,12 @@ import type {
AgentTool, AgentTool,
ThinkingLevel, ThinkingLevel,
} from "@mariozechner/pi-agent-core"; } from "@mariozechner/pi-agent-core";
import type { Api, AssistantMessage, Model } from "@mariozechner/pi-ai"; import type {
Api,
AssistantMessage,
ImageContent,
Model,
} from "@mariozechner/pi-ai";
import { import {
createAgentSession, createAgentSession,
discoverAuthStorage, discoverAuthStorage,
@@ -1009,6 +1014,8 @@ export async function runEmbeddedPiAgent(params: {
config?: ClawdbotConfig; config?: ClawdbotConfig;
skillsSnapshot?: SkillSnapshot; skillsSnapshot?: SkillSnapshot;
prompt: string; prompt: string;
/** Optional image attachments for multimodal messages. */
images?: ImageContent[];
provider?: string; provider?: string;
model?: string; model?: string;
authProfileId?: string; authProfileId?: string;
@@ -1434,7 +1441,9 @@ export async function runEmbeddedPiAgent(params: {
`embedded run prompt start: runId=${params.runId} sessionId=${params.sessionId}`, `embedded run prompt start: runId=${params.runId} sessionId=${params.sessionId}`,
); );
try { try {
await session.prompt(params.prompt); await session.prompt(params.prompt, {
images: params.images,
});
} catch (err) { } catch (err) {
promptError = err; promptError = err;
} finally { } finally {

View File

@@ -66,8 +66,17 @@ import {
} from "../utils/message-provider.js"; } from "../utils/message-provider.js";
import { normalizeE164 } from "../utils.js"; import { normalizeE164 } from "../utils.js";
/** Image content block for Claude API multimodal messages. */
type ImageContent = {
type: "image";
data: string;
mimeType: string;
};
type AgentCommandOpts = { type AgentCommandOpts = {
message: string; message: string;
/** Optional image attachments for multimodal messages. */
images?: ImageContent[];
to?: string; to?: string;
sessionId?: string; sessionId?: string;
sessionKey?: string; sessionKey?: string;
@@ -450,6 +459,7 @@ export async function agentCommand(
config: cfg, config: cfg,
skillsSnapshot, skillsSnapshot,
prompt: body, prompt: body,
images: opts.images,
provider: providerOverride, provider: providerOverride,
model: modelOverride, model: modelOverride,
authProfileId: sessionEntry?.authProfileOverride, authProfileId: sessionEntry?.authProfileOverride,

View File

@@ -5,6 +5,83 @@ export type ChatAttachment = {
content?: unknown; content?: unknown;
}; };
export type ChatImageContent = {
type: "image";
data: string;
mimeType: string;
};
export type ParsedMessageWithImages = {
message: string;
images: ChatImageContent[];
};
/**
* Parse attachments and extract images as structured content blocks.
* Returns the message text and an array of image content blocks
* compatible with Claude API's image format.
*/
export function parseMessageWithAttachments(
message: string,
attachments: ChatAttachment[] | undefined,
opts?: { maxBytes?: number },
): ParsedMessageWithImages {
const maxBytes = opts?.maxBytes ?? 5_000_000; // 5 MB
if (!attachments || attachments.length === 0) {
return { message, images: [] };
}
const images: ChatImageContent[] = [];
for (const [idx, att] of attachments.entries()) {
if (!att) continue;
const mime = att.mimeType ?? "";
const content = att.content;
const label = att.fileName || att.type || `attachment-${idx + 1}`;
if (typeof content !== "string") {
throw new Error(`attachment ${label}: content must be base64 string`);
}
if (!mime.startsWith("image/")) {
throw new Error(`attachment ${label}: only image/* supported`);
}
let sizeBytes = 0;
let b64 = content.trim();
// Strip data URL prefix if present (e.g., "data:image/jpeg;base64,...")
const dataUrlMatch = /^data:[^;]+;base64,(.*)$/.exec(b64);
if (dataUrlMatch) {
b64 = dataUrlMatch[1];
}
// Basic base64 sanity: length multiple of 4 and charset check.
if (b64.length % 4 !== 0 || /[^A-Za-z0-9+/=]/.test(b64)) {
throw new Error(`attachment ${label}: invalid base64 content`);
}
try {
sizeBytes = Buffer.from(b64, "base64").byteLength;
} catch {
throw new Error(`attachment ${label}: invalid base64 content`);
}
if (sizeBytes <= 0 || sizeBytes > maxBytes) {
throw new Error(
`attachment ${label}: exceeds size limit (${sizeBytes} > ${maxBytes} bytes)`,
);
}
images.push({
type: "image",
data: b64,
mimeType: mime,
});
}
return { message, images };
}
/**
* @deprecated Use parseMessageWithAttachments instead.
* This function converts images to markdown data URLs which Claude API cannot process as images.
*/
export function buildMessageWithAttachments( export function buildMessageWithAttachments(
message: string, message: string,
attachments: ChatAttachment[] | undefined, attachments: ChatAttachment[] | undefined,

View File

@@ -43,7 +43,10 @@ import {
isChatStopCommandText, isChatStopCommandText,
resolveChatRunExpiresAtMs, resolveChatRunExpiresAtMs,
} from "./chat-abort.js"; } from "./chat-abort.js";
import { buildMessageWithAttachments } from "./chat-attachments.js"; import {
type ChatImageContent,
parseMessageWithAttachments,
} from "./chat-attachments.js";
import { import {
ErrorCodes, ErrorCodes,
errorShape, errorShape,
@@ -793,32 +796,37 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) {
}; };
const stopCommand = isChatStopCommandText(p.message); const stopCommand = isChatStopCommandText(p.message);
const normalizedAttachments = const normalizedAttachments =
p.attachments?.map((a) => ({ p.attachments
type: typeof a?.type === "string" ? a.type : undefined, ?.map((a) => ({
mimeType: type: typeof a?.type === "string" ? a.type : undefined,
typeof a?.mimeType === "string" ? a.mimeType : undefined, mimeType:
fileName: typeof a?.mimeType === "string" ? a.mimeType : undefined,
typeof a?.fileName === "string" ? a.fileName : undefined, fileName:
content: typeof a?.fileName === "string" ? a.fileName : undefined,
typeof a?.content === "string" content:
? a.content typeof a?.content === "string"
: ArrayBuffer.isView(a?.content) ? a.content
? Buffer.from( : ArrayBuffer.isView(a?.content)
a.content.buffer, ? Buffer.from(
a.content.byteOffset, a.content.buffer,
a.content.byteLength, a.content.byteOffset,
).toString("base64") a.content.byteLength,
: undefined, ).toString("base64")
})) ?? []; : undefined,
}))
.filter((a) => a.content && a.mimeType) ?? [];
let messageWithAttachments = p.message; let parsedMessage = p.message;
let parsedImages: ChatImageContent[] = [];
if (normalizedAttachments.length > 0) { if (normalizedAttachments.length > 0) {
try { try {
messageWithAttachments = buildMessageWithAttachments( const parsed = parseMessageWithAttachments(
p.message, p.message,
normalizedAttachments, normalizedAttachments,
{ maxBytes: 5_000_000 }, { maxBytes: 5_000_000 },
); );
parsedMessage = parsed.message;
parsedImages = parsed.images;
} catch (err) { } catch (err) {
return { return {
ok: false, ok: false,
@@ -922,7 +930,8 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) {
}; };
void agentCommand( void agentCommand(
{ {
message: messageWithAttachments, message: parsedMessage,
images: parsedImages.length > 0 ? parsedImages : undefined,
sessionId, sessionId,
sessionKey: p.sessionKey, sessionKey: p.sessionKey,
runId: clientRunId, runId: clientRunId,

View File

@@ -13,7 +13,10 @@ import {
isChatStopCommandText, isChatStopCommandText,
resolveChatRunExpiresAtMs, resolveChatRunExpiresAtMs,
} from "../chat-abort.js"; } from "../chat-abort.js";
import { buildMessageWithAttachments } from "../chat-attachments.js"; import {
type ChatImageContent,
parseMessageWithAttachments,
} from "../chat-attachments.js";
import { import {
ErrorCodes, ErrorCodes,
errorShape, errorShape,
@@ -181,29 +184,34 @@ export const chatHandlers: GatewayRequestHandlers = {
}; };
const stopCommand = isChatStopCommandText(p.message); const stopCommand = isChatStopCommandText(p.message);
const normalizedAttachments = const normalizedAttachments =
p.attachments?.map((a) => ({ p.attachments
type: typeof a?.type === "string" ? a.type : undefined, ?.map((a) => ({
mimeType: typeof a?.mimeType === "string" ? a.mimeType : undefined, type: typeof a?.type === "string" ? a.type : undefined,
fileName: typeof a?.fileName === "string" ? a.fileName : undefined, mimeType: typeof a?.mimeType === "string" ? a.mimeType : undefined,
content: fileName: typeof a?.fileName === "string" ? a.fileName : undefined,
typeof a?.content === "string" content:
? a.content typeof a?.content === "string"
: ArrayBuffer.isView(a?.content) ? a.content
? Buffer.from( : ArrayBuffer.isView(a?.content)
a.content.buffer, ? Buffer.from(
a.content.byteOffset, a.content.buffer,
a.content.byteLength, a.content.byteOffset,
).toString("base64") a.content.byteLength,
: undefined, ).toString("base64")
})) ?? []; : undefined,
let messageWithAttachments = p.message; }))
.filter((a) => a.content && a.mimeType) ?? [];
let parsedMessage = p.message;
let parsedImages: ChatImageContent[] = [];
if (normalizedAttachments.length > 0) { if (normalizedAttachments.length > 0) {
try { try {
messageWithAttachments = buildMessageWithAttachments( const parsed = parseMessageWithAttachments(
p.message, p.message,
normalizedAttachments, normalizedAttachments,
{ maxBytes: 5_000_000 }, { maxBytes: 5_000_000 },
); );
parsedMessage = parsed.message;
parsedImages = parsed.images;
} catch (err) { } catch (err) {
respond( respond(
false, false,
@@ -312,7 +320,8 @@ export const chatHandlers: GatewayRequestHandlers = {
void agentCommand( void agentCommand(
{ {
message: messageWithAttachments, message: parsedMessage,
images: parsedImages.length > 0 ? parsedImages : undefined,
sessionId, sessionId,
sessionKey: p.sessionKey, sessionKey: p.sessionKey,
runId: clientRunId, runId: clientRunId,