fix: enable image attachments in chat messages for Claude API
Images were previously converted to markdown data URLs which Claude API
treats as plain text, not as actual images.
Changes:
- Add parseMessageWithAttachments() that returns {message, images[]}
- Pass images through the stack to session.prompt() as content blocks
- Filter null/empty attachments before parsing
- Strip data URL prefix if client sends it
This enables iOS and other clients to send images that Claude can actually see.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
committed by
Peter Steinberger
parent
0279f09459
commit
c4e76eb635
@@ -8,7 +8,12 @@ import type {
|
|||||||
AgentTool,
|
AgentTool,
|
||||||
ThinkingLevel,
|
ThinkingLevel,
|
||||||
} from "@mariozechner/pi-agent-core";
|
} from "@mariozechner/pi-agent-core";
|
||||||
import type { Api, AssistantMessage, Model } from "@mariozechner/pi-ai";
|
import type {
|
||||||
|
Api,
|
||||||
|
AssistantMessage,
|
||||||
|
ImageContent,
|
||||||
|
Model,
|
||||||
|
} from "@mariozechner/pi-ai";
|
||||||
import {
|
import {
|
||||||
createAgentSession,
|
createAgentSession,
|
||||||
discoverAuthStorage,
|
discoverAuthStorage,
|
||||||
@@ -1009,6 +1014,8 @@ export async function runEmbeddedPiAgent(params: {
|
|||||||
config?: ClawdbotConfig;
|
config?: ClawdbotConfig;
|
||||||
skillsSnapshot?: SkillSnapshot;
|
skillsSnapshot?: SkillSnapshot;
|
||||||
prompt: string;
|
prompt: string;
|
||||||
|
/** Optional image attachments for multimodal messages. */
|
||||||
|
images?: ImageContent[];
|
||||||
provider?: string;
|
provider?: string;
|
||||||
model?: string;
|
model?: string;
|
||||||
authProfileId?: string;
|
authProfileId?: string;
|
||||||
@@ -1434,7 +1441,9 @@ export async function runEmbeddedPiAgent(params: {
|
|||||||
`embedded run prompt start: runId=${params.runId} sessionId=${params.sessionId}`,
|
`embedded run prompt start: runId=${params.runId} sessionId=${params.sessionId}`,
|
||||||
);
|
);
|
||||||
try {
|
try {
|
||||||
await session.prompt(params.prompt);
|
await session.prompt(params.prompt, {
|
||||||
|
images: params.images,
|
||||||
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
promptError = err;
|
promptError = err;
|
||||||
} finally {
|
} finally {
|
||||||
|
|||||||
@@ -66,8 +66,17 @@ import {
|
|||||||
} from "../utils/message-provider.js";
|
} from "../utils/message-provider.js";
|
||||||
import { normalizeE164 } from "../utils.js";
|
import { normalizeE164 } from "../utils.js";
|
||||||
|
|
||||||
|
/** Image content block for Claude API multimodal messages. */
|
||||||
|
type ImageContent = {
|
||||||
|
type: "image";
|
||||||
|
data: string;
|
||||||
|
mimeType: string;
|
||||||
|
};
|
||||||
|
|
||||||
type AgentCommandOpts = {
|
type AgentCommandOpts = {
|
||||||
message: string;
|
message: string;
|
||||||
|
/** Optional image attachments for multimodal messages. */
|
||||||
|
images?: ImageContent[];
|
||||||
to?: string;
|
to?: string;
|
||||||
sessionId?: string;
|
sessionId?: string;
|
||||||
sessionKey?: string;
|
sessionKey?: string;
|
||||||
@@ -450,6 +459,7 @@ export async function agentCommand(
|
|||||||
config: cfg,
|
config: cfg,
|
||||||
skillsSnapshot,
|
skillsSnapshot,
|
||||||
prompt: body,
|
prompt: body,
|
||||||
|
images: opts.images,
|
||||||
provider: providerOverride,
|
provider: providerOverride,
|
||||||
model: modelOverride,
|
model: modelOverride,
|
||||||
authProfileId: sessionEntry?.authProfileOverride,
|
authProfileId: sessionEntry?.authProfileOverride,
|
||||||
|
|||||||
@@ -5,6 +5,83 @@ export type ChatAttachment = {
|
|||||||
content?: unknown;
|
content?: unknown;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type ChatImageContent = {
|
||||||
|
type: "image";
|
||||||
|
data: string;
|
||||||
|
mimeType: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type ParsedMessageWithImages = {
|
||||||
|
message: string;
|
||||||
|
images: ChatImageContent[];
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse attachments and extract images as structured content blocks.
|
||||||
|
* Returns the message text and an array of image content blocks
|
||||||
|
* compatible with Claude API's image format.
|
||||||
|
*/
|
||||||
|
export function parseMessageWithAttachments(
|
||||||
|
message: string,
|
||||||
|
attachments: ChatAttachment[] | undefined,
|
||||||
|
opts?: { maxBytes?: number },
|
||||||
|
): ParsedMessageWithImages {
|
||||||
|
const maxBytes = opts?.maxBytes ?? 5_000_000; // 5 MB
|
||||||
|
if (!attachments || attachments.length === 0) {
|
||||||
|
return { message, images: [] };
|
||||||
|
}
|
||||||
|
|
||||||
|
const images: ChatImageContent[] = [];
|
||||||
|
|
||||||
|
for (const [idx, att] of attachments.entries()) {
|
||||||
|
if (!att) continue;
|
||||||
|
const mime = att.mimeType ?? "";
|
||||||
|
const content = att.content;
|
||||||
|
const label = att.fileName || att.type || `attachment-${idx + 1}`;
|
||||||
|
|
||||||
|
if (typeof content !== "string") {
|
||||||
|
throw new Error(`attachment ${label}: content must be base64 string`);
|
||||||
|
}
|
||||||
|
if (!mime.startsWith("image/")) {
|
||||||
|
throw new Error(`attachment ${label}: only image/* supported`);
|
||||||
|
}
|
||||||
|
|
||||||
|
let sizeBytes = 0;
|
||||||
|
let b64 = content.trim();
|
||||||
|
// Strip data URL prefix if present (e.g., "data:image/jpeg;base64,...")
|
||||||
|
const dataUrlMatch = /^data:[^;]+;base64,(.*)$/.exec(b64);
|
||||||
|
if (dataUrlMatch) {
|
||||||
|
b64 = dataUrlMatch[1];
|
||||||
|
}
|
||||||
|
// Basic base64 sanity: length multiple of 4 and charset check.
|
||||||
|
if (b64.length % 4 !== 0 || /[^A-Za-z0-9+/=]/.test(b64)) {
|
||||||
|
throw new Error(`attachment ${label}: invalid base64 content`);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
sizeBytes = Buffer.from(b64, "base64").byteLength;
|
||||||
|
} catch {
|
||||||
|
throw new Error(`attachment ${label}: invalid base64 content`);
|
||||||
|
}
|
||||||
|
if (sizeBytes <= 0 || sizeBytes > maxBytes) {
|
||||||
|
throw new Error(
|
||||||
|
`attachment ${label}: exceeds size limit (${sizeBytes} > ${maxBytes} bytes)`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
images.push({
|
||||||
|
type: "image",
|
||||||
|
data: b64,
|
||||||
|
mimeType: mime,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return { message, images };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Use parseMessageWithAttachments instead.
|
||||||
|
* This function converts images to markdown data URLs which Claude API cannot process as images.
|
||||||
|
*/
|
||||||
export function buildMessageWithAttachments(
|
export function buildMessageWithAttachments(
|
||||||
message: string,
|
message: string,
|
||||||
attachments: ChatAttachment[] | undefined,
|
attachments: ChatAttachment[] | undefined,
|
||||||
|
|||||||
@@ -43,7 +43,10 @@ import {
|
|||||||
isChatStopCommandText,
|
isChatStopCommandText,
|
||||||
resolveChatRunExpiresAtMs,
|
resolveChatRunExpiresAtMs,
|
||||||
} from "./chat-abort.js";
|
} from "./chat-abort.js";
|
||||||
import { buildMessageWithAttachments } from "./chat-attachments.js";
|
import {
|
||||||
|
type ChatImageContent,
|
||||||
|
parseMessageWithAttachments,
|
||||||
|
} from "./chat-attachments.js";
|
||||||
import {
|
import {
|
||||||
ErrorCodes,
|
ErrorCodes,
|
||||||
errorShape,
|
errorShape,
|
||||||
@@ -793,32 +796,37 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) {
|
|||||||
};
|
};
|
||||||
const stopCommand = isChatStopCommandText(p.message);
|
const stopCommand = isChatStopCommandText(p.message);
|
||||||
const normalizedAttachments =
|
const normalizedAttachments =
|
||||||
p.attachments?.map((a) => ({
|
p.attachments
|
||||||
type: typeof a?.type === "string" ? a.type : undefined,
|
?.map((a) => ({
|
||||||
mimeType:
|
type: typeof a?.type === "string" ? a.type : undefined,
|
||||||
typeof a?.mimeType === "string" ? a.mimeType : undefined,
|
mimeType:
|
||||||
fileName:
|
typeof a?.mimeType === "string" ? a.mimeType : undefined,
|
||||||
typeof a?.fileName === "string" ? a.fileName : undefined,
|
fileName:
|
||||||
content:
|
typeof a?.fileName === "string" ? a.fileName : undefined,
|
||||||
typeof a?.content === "string"
|
content:
|
||||||
? a.content
|
typeof a?.content === "string"
|
||||||
: ArrayBuffer.isView(a?.content)
|
? a.content
|
||||||
? Buffer.from(
|
: ArrayBuffer.isView(a?.content)
|
||||||
a.content.buffer,
|
? Buffer.from(
|
||||||
a.content.byteOffset,
|
a.content.buffer,
|
||||||
a.content.byteLength,
|
a.content.byteOffset,
|
||||||
).toString("base64")
|
a.content.byteLength,
|
||||||
: undefined,
|
).toString("base64")
|
||||||
})) ?? [];
|
: undefined,
|
||||||
|
}))
|
||||||
|
.filter((a) => a.content && a.mimeType) ?? [];
|
||||||
|
|
||||||
let messageWithAttachments = p.message;
|
let parsedMessage = p.message;
|
||||||
|
let parsedImages: ChatImageContent[] = [];
|
||||||
if (normalizedAttachments.length > 0) {
|
if (normalizedAttachments.length > 0) {
|
||||||
try {
|
try {
|
||||||
messageWithAttachments = buildMessageWithAttachments(
|
const parsed = parseMessageWithAttachments(
|
||||||
p.message,
|
p.message,
|
||||||
normalizedAttachments,
|
normalizedAttachments,
|
||||||
{ maxBytes: 5_000_000 },
|
{ maxBytes: 5_000_000 },
|
||||||
);
|
);
|
||||||
|
parsedMessage = parsed.message;
|
||||||
|
parsedImages = parsed.images;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
return {
|
return {
|
||||||
ok: false,
|
ok: false,
|
||||||
@@ -922,7 +930,8 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) {
|
|||||||
};
|
};
|
||||||
void agentCommand(
|
void agentCommand(
|
||||||
{
|
{
|
||||||
message: messageWithAttachments,
|
message: parsedMessage,
|
||||||
|
images: parsedImages.length > 0 ? parsedImages : undefined,
|
||||||
sessionId,
|
sessionId,
|
||||||
sessionKey: p.sessionKey,
|
sessionKey: p.sessionKey,
|
||||||
runId: clientRunId,
|
runId: clientRunId,
|
||||||
|
|||||||
@@ -13,7 +13,10 @@ import {
|
|||||||
isChatStopCommandText,
|
isChatStopCommandText,
|
||||||
resolveChatRunExpiresAtMs,
|
resolveChatRunExpiresAtMs,
|
||||||
} from "../chat-abort.js";
|
} from "../chat-abort.js";
|
||||||
import { buildMessageWithAttachments } from "../chat-attachments.js";
|
import {
|
||||||
|
type ChatImageContent,
|
||||||
|
parseMessageWithAttachments,
|
||||||
|
} from "../chat-attachments.js";
|
||||||
import {
|
import {
|
||||||
ErrorCodes,
|
ErrorCodes,
|
||||||
errorShape,
|
errorShape,
|
||||||
@@ -181,29 +184,34 @@ export const chatHandlers: GatewayRequestHandlers = {
|
|||||||
};
|
};
|
||||||
const stopCommand = isChatStopCommandText(p.message);
|
const stopCommand = isChatStopCommandText(p.message);
|
||||||
const normalizedAttachments =
|
const normalizedAttachments =
|
||||||
p.attachments?.map((a) => ({
|
p.attachments
|
||||||
type: typeof a?.type === "string" ? a.type : undefined,
|
?.map((a) => ({
|
||||||
mimeType: typeof a?.mimeType === "string" ? a.mimeType : undefined,
|
type: typeof a?.type === "string" ? a.type : undefined,
|
||||||
fileName: typeof a?.fileName === "string" ? a.fileName : undefined,
|
mimeType: typeof a?.mimeType === "string" ? a.mimeType : undefined,
|
||||||
content:
|
fileName: typeof a?.fileName === "string" ? a.fileName : undefined,
|
||||||
typeof a?.content === "string"
|
content:
|
||||||
? a.content
|
typeof a?.content === "string"
|
||||||
: ArrayBuffer.isView(a?.content)
|
? a.content
|
||||||
? Buffer.from(
|
: ArrayBuffer.isView(a?.content)
|
||||||
a.content.buffer,
|
? Buffer.from(
|
||||||
a.content.byteOffset,
|
a.content.buffer,
|
||||||
a.content.byteLength,
|
a.content.byteOffset,
|
||||||
).toString("base64")
|
a.content.byteLength,
|
||||||
: undefined,
|
).toString("base64")
|
||||||
})) ?? [];
|
: undefined,
|
||||||
let messageWithAttachments = p.message;
|
}))
|
||||||
|
.filter((a) => a.content && a.mimeType) ?? [];
|
||||||
|
let parsedMessage = p.message;
|
||||||
|
let parsedImages: ChatImageContent[] = [];
|
||||||
if (normalizedAttachments.length > 0) {
|
if (normalizedAttachments.length > 0) {
|
||||||
try {
|
try {
|
||||||
messageWithAttachments = buildMessageWithAttachments(
|
const parsed = parseMessageWithAttachments(
|
||||||
p.message,
|
p.message,
|
||||||
normalizedAttachments,
|
normalizedAttachments,
|
||||||
{ maxBytes: 5_000_000 },
|
{ maxBytes: 5_000_000 },
|
||||||
);
|
);
|
||||||
|
parsedMessage = parsed.message;
|
||||||
|
parsedImages = parsed.images;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
respond(
|
respond(
|
||||||
false,
|
false,
|
||||||
@@ -312,7 +320,8 @@ export const chatHandlers: GatewayRequestHandlers = {
|
|||||||
|
|
||||||
void agentCommand(
|
void agentCommand(
|
||||||
{
|
{
|
||||||
message: messageWithAttachments,
|
message: parsedMessage,
|
||||||
|
images: parsedImages.length > 0 ? parsedImages : undefined,
|
||||||
sessionId,
|
sessionId,
|
||||||
sessionKey: p.sessionKey,
|
sessionKey: p.sessionKey,
|
||||||
runId: clientRunId,
|
runId: clientRunId,
|
||||||
|
|||||||
Reference in New Issue
Block a user