fix: clamp tool images to 5MB
This commit is contained in:
@@ -12,6 +12,7 @@
|
|||||||
- iOS/Android nodes: status pill now surfaces camera activity instead of overlay toasts.
|
- iOS/Android nodes: status pill now surfaces camera activity instead of overlay toasts.
|
||||||
- iOS/Android nodes: camera snaps recompress to keep base64 payloads under 5 MB.
|
- iOS/Android nodes: camera snaps recompress to keep base64 payloads under 5 MB.
|
||||||
- CLI: avoid spurious gateway close errors after successful request/response cycles.
|
- CLI: avoid spurious gateway close errors after successful request/response cycles.
|
||||||
|
- Agent runtime: clamp tool-result images to the 5MB Anthropic limit to avoid hard request rejections.
|
||||||
- Tests: add Swift Testing coverage for camera errors and Kotest coverage for Android bridge endpoints.
|
- Tests: add Swift Testing coverage for camera errors and Kotest coverage for Android bridge endpoints.
|
||||||
|
|
||||||
## 2.0.0-beta4 — 2025-12-27
|
## 2.0.0-beta4 — 2025-12-27
|
||||||
|
|||||||
35
src/agents/tool-images.test.ts
Normal file
35
src/agents/tool-images.test.ts
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
import sharp from "sharp";
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import { sanitizeContentBlocksImages } from "./tool-images.js";
|
||||||
|
|
||||||
|
describe("tool image sanitizing", () => {
|
||||||
|
it("shrinks oversized images to <=5MB", async () => {
|
||||||
|
const width = 2800;
|
||||||
|
const height = 2800;
|
||||||
|
const raw = Buffer.alloc(width * height * 3, 0xff);
|
||||||
|
const bigPng = await sharp(raw, {
|
||||||
|
raw: { width, height, channels: 3 },
|
||||||
|
})
|
||||||
|
.png({ compressionLevel: 0 })
|
||||||
|
.toBuffer();
|
||||||
|
expect(bigPng.byteLength).toBeGreaterThan(5 * 1024 * 1024);
|
||||||
|
|
||||||
|
const blocks = [
|
||||||
|
{
|
||||||
|
type: "image" as const,
|
||||||
|
data: bigPng.toString("base64"),
|
||||||
|
mimeType: "image/png",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const out = await sanitizeContentBlocksImages(blocks, "test");
|
||||||
|
const image = out.find((b) => b.type === "image");
|
||||||
|
if (!image || image.type !== "image") {
|
||||||
|
throw new Error("expected image block");
|
||||||
|
}
|
||||||
|
const size = Buffer.from(image.data, "base64").byteLength;
|
||||||
|
expect(size).toBeLessThanOrEqual(5 * 1024 * 1024);
|
||||||
|
expect(image.mimeType).toBe("image/jpeg");
|
||||||
|
}, 20_000);
|
||||||
|
});
|
||||||
@@ -1,19 +1,19 @@
|
|||||||
import type { AgentToolResult } from "@mariozechner/pi-ai";
|
import type { AgentToolResult } from "@mariozechner/pi-ai";
|
||||||
|
|
||||||
import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js";
|
import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js";
|
||||||
import { detectMime } from "../media/mime.js";
|
|
||||||
|
|
||||||
type ToolContentBlock = AgentToolResult<unknown>["content"][number];
|
type ToolContentBlock = AgentToolResult<unknown>["content"][number];
|
||||||
type ImageContentBlock = Extract<ToolContentBlock, { type: "image" }>;
|
type ImageContentBlock = Extract<ToolContentBlock, { type: "image" }>;
|
||||||
type TextContentBlock = Extract<ToolContentBlock, { type: "text" }>;
|
type TextContentBlock = Extract<ToolContentBlock, { type: "text" }>;
|
||||||
|
|
||||||
// Anthropic Messages API limitation (observed in Clawdis sessions):
|
// Anthropic Messages API limitations (observed in Clawdis sessions):
|
||||||
// When sending many images in a single request (e.g. via session history + tool results),
|
// - Images over ~2000px per side can fail in multi-image requests.
|
||||||
// Anthropic rejects any image where *either* dimension exceeds 2000px.
|
// - Images over 5MB are rejected by the API.
|
||||||
//
|
//
|
||||||
// To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale
|
// To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale
|
||||||
// all base64 image blocks above this limit while preserving aspect ratio.
|
// and recompress base64 image blocks when they exceed these limits.
|
||||||
const MAX_IMAGE_DIMENSION_PX = 2000;
|
const MAX_IMAGE_DIMENSION_PX = 2000;
|
||||||
|
const MAX_IMAGE_BYTES = 5 * 1024 * 1024;
|
||||||
|
|
||||||
function isImageBlock(block: unknown): block is ImageContentBlock {
|
function isImageBlock(block: unknown): block is ImageContentBlock {
|
||||||
if (!block || typeof block !== "object") return false;
|
if (!block || typeof block !== "object") return false;
|
||||||
@@ -35,66 +35,75 @@ async function resizeImageBase64IfNeeded(params: {
|
|||||||
base64: string;
|
base64: string;
|
||||||
mimeType: string;
|
mimeType: string;
|
||||||
maxDimensionPx: number;
|
maxDimensionPx: number;
|
||||||
|
maxBytes: number;
|
||||||
}): Promise<{ base64: string; mimeType: string; resized: boolean }> {
|
}): Promise<{ base64: string; mimeType: string; resized: boolean }> {
|
||||||
const buf = Buffer.from(params.base64, "base64");
|
const buf = Buffer.from(params.base64, "base64");
|
||||||
const meta = await getImageMetadata(buf);
|
const meta = await getImageMetadata(buf);
|
||||||
const width = meta?.width;
|
const width = meta?.width;
|
||||||
const height = meta?.height;
|
const height = meta?.height;
|
||||||
if (
|
const overBytes = buf.byteLength > params.maxBytes;
|
||||||
typeof width !== "number" ||
|
const maxDim = Math.max(width ?? 0, height ?? 0);
|
||||||
typeof height !== "number" ||
|
if (typeof width !== "number" || typeof height !== "number") {
|
||||||
(width <= params.maxDimensionPx && height <= params.maxDimensionPx)
|
if (!overBytes) {
|
||||||
|
return {
|
||||||
|
base64: params.base64,
|
||||||
|
mimeType: params.mimeType,
|
||||||
|
resized: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} else if (
|
||||||
|
!overBytes &&
|
||||||
|
width <= params.maxDimensionPx &&
|
||||||
|
height <= params.maxDimensionPx
|
||||||
) {
|
) {
|
||||||
return { base64: params.base64, mimeType: params.mimeType, resized: false };
|
return { base64: params.base64, mimeType: params.mimeType, resized: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
const mime = params.mimeType.toLowerCase();
|
const qualities = [85, 75, 65, 55, 45, 35];
|
||||||
let out: Buffer;
|
const sideStart = maxDim > 0 ? Math.min(params.maxDimensionPx, maxDim) : params.maxDimensionPx;
|
||||||
try {
|
const sideGrid = [sideStart, 1800, 1600, 1400, 1200, 1000, 800]
|
||||||
const mod = (await import("sharp")) as unknown as {
|
.map((v) => Math.min(params.maxDimensionPx, v))
|
||||||
default?: typeof import("sharp");
|
.filter((v, i, arr) => v > 0 && arr.indexOf(v) === i)
|
||||||
};
|
.sort((a, b) => b - a);
|
||||||
const sharp = mod.default ?? (mod as unknown as typeof import("sharp"));
|
|
||||||
const img = sharp(buf, { failOnError: false }).resize({
|
let smallest: { buffer: Buffer; size: number } | null = null;
|
||||||
width: params.maxDimensionPx,
|
for (const side of sideGrid) {
|
||||||
height: params.maxDimensionPx,
|
for (const quality of qualities) {
|
||||||
fit: "inside",
|
const out = await resizeToJpeg({
|
||||||
withoutEnlargement: true,
|
buffer: buf,
|
||||||
});
|
maxSide: side,
|
||||||
if (mime === "image/jpeg" || mime === "image/jpg") {
|
quality,
|
||||||
out = await img.jpeg({ quality: 85 }).toBuffer();
|
withoutEnlargement: true,
|
||||||
} else if (mime === "image/webp") {
|
});
|
||||||
out = await img.webp({ quality: 85 }).toBuffer();
|
if (!smallest || out.byteLength < smallest.size) {
|
||||||
} else if (mime === "image/png") {
|
smallest = { buffer: out, size: out.byteLength };
|
||||||
out = await img.png().toBuffer();
|
}
|
||||||
} else {
|
if (out.byteLength <= params.maxBytes) {
|
||||||
out = await img.png().toBuffer();
|
return {
|
||||||
|
base64: out.toString("base64"),
|
||||||
|
mimeType: "image/jpeg",
|
||||||
|
resized: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch {
|
|
||||||
// Bun can't load sharp native addons. Fall back to a JPEG conversion.
|
|
||||||
out = await resizeToJpeg({
|
|
||||||
buffer: buf,
|
|
||||||
maxSide: params.maxDimensionPx,
|
|
||||||
quality: 85,
|
|
||||||
withoutEnlargement: true,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const sniffed = await detectMime({ buffer: out.slice(0, 256) });
|
const best = smallest?.buffer ?? buf;
|
||||||
const nextMime = sniffed?.startsWith("image/") ? sniffed : params.mimeType;
|
const maxMb = (params.maxBytes / (1024 * 1024)).toFixed(0);
|
||||||
|
const gotMb = (best.byteLength / (1024 * 1024)).toFixed(2);
|
||||||
return { base64: out.toString("base64"), mimeType: nextMime, resized: true };
|
throw new Error(`Image could not be reduced below ${maxMb}MB (got ${gotMb}MB)`);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function sanitizeContentBlocksImages(
|
export async function sanitizeContentBlocksImages(
|
||||||
blocks: ToolContentBlock[],
|
blocks: ToolContentBlock[],
|
||||||
label: string,
|
label: string,
|
||||||
opts: { maxDimensionPx?: number } = {},
|
opts: { maxDimensionPx?: number; maxBytes?: number } = {},
|
||||||
): Promise<ToolContentBlock[]> {
|
): Promise<ToolContentBlock[]> {
|
||||||
const maxDimensionPx = Math.max(
|
const maxDimensionPx = Math.max(
|
||||||
opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX,
|
opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX,
|
||||||
1,
|
1,
|
||||||
);
|
);
|
||||||
|
const maxBytes = Math.max(opts.maxBytes ?? MAX_IMAGE_BYTES, 1);
|
||||||
const out: ToolContentBlock[] = [];
|
const out: ToolContentBlock[] = [];
|
||||||
|
|
||||||
for (const block of blocks) {
|
for (const block of blocks) {
|
||||||
@@ -117,6 +126,7 @@ export async function sanitizeContentBlocksImages(
|
|||||||
base64: data,
|
base64: data,
|
||||||
mimeType: block.mimeType,
|
mimeType: block.mimeType,
|
||||||
maxDimensionPx,
|
maxDimensionPx,
|
||||||
|
maxBytes,
|
||||||
});
|
});
|
||||||
out.push({ ...block, data: resized.base64, mimeType: resized.mimeType });
|
out.push({ ...block, data: resized.base64, mimeType: resized.mimeType });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -133,7 +143,7 @@ export async function sanitizeContentBlocksImages(
|
|||||||
export async function sanitizeToolResultImages(
|
export async function sanitizeToolResultImages(
|
||||||
result: AgentToolResult<unknown>,
|
result: AgentToolResult<unknown>,
|
||||||
label: string,
|
label: string,
|
||||||
opts: { maxDimensionPx?: number } = {},
|
opts: { maxDimensionPx?: number; maxBytes?: number } = {},
|
||||||
): Promise<AgentToolResult<unknown>> {
|
): Promise<AgentToolResult<unknown>> {
|
||||||
const content = Array.isArray(result.content) ? result.content : [];
|
const content = Array.isArray(result.content) ? result.content : [];
|
||||||
if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) return result;
|
if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) return result;
|
||||||
|
|||||||
Reference in New Issue
Block a user