186 lines
5.4 KiB
TypeScript
186 lines
5.4 KiB
TypeScript
import { detectMime } from "../media/mime.js";
|
|
|
|
export type ChatAttachment = {
|
|
type?: string;
|
|
mimeType?: string;
|
|
fileName?: string;
|
|
content?: unknown;
|
|
};
|
|
|
|
export type ChatImageContent = {
|
|
type: "image";
|
|
data: string;
|
|
mimeType: string;
|
|
};
|
|
|
|
export type ParsedMessageWithImages = {
|
|
message: string;
|
|
images: ChatImageContent[];
|
|
};
|
|
|
|
type AttachmentLog = {
|
|
warn: (message: string) => void;
|
|
};
|
|
|
|
function normalizeMime(mime?: string): string | undefined {
|
|
if (!mime) return undefined;
|
|
const cleaned = mime.split(";")[0]?.trim().toLowerCase();
|
|
return cleaned || undefined;
|
|
}
|
|
|
|
async function sniffMimeFromBase64(
|
|
base64: string,
|
|
): Promise<string | undefined> {
|
|
const trimmed = base64.trim();
|
|
if (!trimmed) return undefined;
|
|
|
|
const take = Math.min(256, trimmed.length);
|
|
const sliceLen = take - (take % 4);
|
|
if (sliceLen < 8) return undefined;
|
|
|
|
try {
|
|
const head = Buffer.from(trimmed.slice(0, sliceLen), "base64");
|
|
return await detectMime({ buffer: head });
|
|
} catch {
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
function isImageMime(mime?: string): boolean {
|
|
return typeof mime === "string" && mime.startsWith("image/");
|
|
}
|
|
|
|
/**
|
|
* Parse attachments and extract images as structured content blocks.
|
|
* Returns the message text and an array of image content blocks
|
|
* compatible with Claude API's image format.
|
|
*/
|
|
export async function parseMessageWithAttachments(
|
|
message: string,
|
|
attachments: ChatAttachment[] | undefined,
|
|
opts?: { maxBytes?: number; log?: AttachmentLog },
|
|
): Promise<ParsedMessageWithImages> {
|
|
const maxBytes = opts?.maxBytes ?? 5_000_000; // 5 MB
|
|
const log = opts?.log;
|
|
if (!attachments || attachments.length === 0) {
|
|
return { message, images: [] };
|
|
}
|
|
|
|
const images: ChatImageContent[] = [];
|
|
|
|
for (const [idx, att] of attachments.entries()) {
|
|
if (!att) continue;
|
|
const mime = att.mimeType ?? "";
|
|
const content = att.content;
|
|
const label = att.fileName || att.type || `attachment-${idx + 1}`;
|
|
|
|
if (typeof content !== "string") {
|
|
throw new Error(`attachment ${label}: content must be base64 string`);
|
|
}
|
|
|
|
let sizeBytes = 0;
|
|
let b64 = content.trim();
|
|
// Strip data URL prefix if present (e.g., "data:image/jpeg;base64,...")
|
|
const dataUrlMatch = /^data:[^;]+;base64,(.*)$/.exec(b64);
|
|
if (dataUrlMatch) {
|
|
b64 = dataUrlMatch[1];
|
|
}
|
|
// Basic base64 sanity: length multiple of 4 and charset check.
|
|
if (b64.length % 4 !== 0 || /[^A-Za-z0-9+/=]/.test(b64)) {
|
|
throw new Error(`attachment ${label}: invalid base64 content`);
|
|
}
|
|
try {
|
|
sizeBytes = Buffer.from(b64, "base64").byteLength;
|
|
} catch {
|
|
throw new Error(`attachment ${label}: invalid base64 content`);
|
|
}
|
|
if (sizeBytes <= 0 || sizeBytes > maxBytes) {
|
|
throw new Error(
|
|
`attachment ${label}: exceeds size limit (${sizeBytes} > ${maxBytes} bytes)`,
|
|
);
|
|
}
|
|
|
|
const providedMime = normalizeMime(mime);
|
|
const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64));
|
|
if (sniffedMime && !isImageMime(sniffedMime)) {
|
|
log?.warn(
|
|
`attachment ${label}: detected non-image (${sniffedMime}), dropping`,
|
|
);
|
|
continue;
|
|
}
|
|
if (!sniffedMime && !isImageMime(providedMime)) {
|
|
log?.warn(
|
|
`attachment ${label}: unable to detect image mime type, dropping`,
|
|
);
|
|
continue;
|
|
}
|
|
if (sniffedMime && providedMime && sniffedMime !== providedMime) {
|
|
log?.warn(
|
|
`attachment ${label}: mime mismatch (${providedMime} -> ${sniffedMime}), using sniffed`,
|
|
);
|
|
}
|
|
|
|
images.push({
|
|
type: "image",
|
|
data: b64,
|
|
mimeType: sniffedMime ?? providedMime ?? mime,
|
|
});
|
|
}
|
|
|
|
return { message, images };
|
|
}
|
|
|
|
/**
|
|
* @deprecated Use parseMessageWithAttachments instead.
|
|
* This function converts images to markdown data URLs which Claude API cannot process as images.
|
|
*/
|
|
export function buildMessageWithAttachments(
|
|
message: string,
|
|
attachments: ChatAttachment[] | undefined,
|
|
opts?: { maxBytes?: number },
|
|
): string {
|
|
const maxBytes = opts?.maxBytes ?? 2_000_000; // 2 MB
|
|
if (!attachments || attachments.length === 0) return message;
|
|
|
|
const blocks: string[] = [];
|
|
|
|
for (const [idx, att] of attachments.entries()) {
|
|
if (!att) continue;
|
|
const mime = att.mimeType ?? "";
|
|
const content = att.content;
|
|
const label = att.fileName || att.type || `attachment-${idx + 1}`;
|
|
|
|
if (typeof content !== "string") {
|
|
throw new Error(`attachment ${label}: content must be base64 string`);
|
|
}
|
|
if (!mime.startsWith("image/")) {
|
|
throw new Error(`attachment ${label}: only image/* supported`);
|
|
}
|
|
|
|
let sizeBytes = 0;
|
|
const b64 = content.trim();
|
|
// Basic base64 sanity: length multiple of 4 and charset check.
|
|
if (b64.length % 4 !== 0 || /[^A-Za-z0-9+/=]/.test(b64)) {
|
|
throw new Error(`attachment ${label}: invalid base64 content`);
|
|
}
|
|
try {
|
|
sizeBytes = Buffer.from(b64, "base64").byteLength;
|
|
} catch {
|
|
throw new Error(`attachment ${label}: invalid base64 content`);
|
|
}
|
|
if (sizeBytes <= 0 || sizeBytes > maxBytes) {
|
|
throw new Error(
|
|
`attachment ${label}: exceeds size limit (${sizeBytes} > ${maxBytes} bytes)`,
|
|
);
|
|
}
|
|
|
|
const safeLabel = label.replace(/\s+/g, "_");
|
|
const dataUrl = ``;
|
|
blocks.push(dataUrl);
|
|
}
|
|
|
|
if (blocks.length === 0) return message;
|
|
const separator = message.trim().length > 0 ? "\n\n" : "";
|
|
return `${message}${separator}${blocks.join("\n\n")}`;
|
|
}
|