From 9c06689569edb5a5cf68fd349bcd64c805800bee Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 18 Jan 2026 15:19:25 +0000 Subject: [PATCH] fix: sanitize oversized image payloads --- CHANGELOG.md | 1 + ...ded-helpers.classifyfailoverreason.test.ts | 5 ++ ...dded-helpers.image-dimension-error.test.ts | 16 +++++ ...lpers.iscloudcodeassistformaterror.test.ts | 5 ++ src/agents/pi-embedded-helpers.ts | 2 + src/agents/pi-embedded-helpers/errors.ts | 31 ++++++++- src/agents/pi-embedded-runner/run.ts | 21 ++++++ src/agents/pi-embedded-runner/run/images.ts | 25 ++++++- src/agents/tool-images.test.ts | 52 +++++++++++++- src/agents/tool-images.ts | 67 +++++++++++++++---- 10 files changed, 208 insertions(+), 17 deletions(-) create mode 100644 src/agents/pi-embedded-helpers.image-dimension-error.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 312ea5d99..a3b627d1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.clawd.bot ### Fixes - Auth profiles: keep auto-pinned preference while allowing rotation on failover; user pins stay locked. (#1138) — thanks @cheeeee. +- Agents: sanitize oversized image payloads before send and surface image-dimension errors. - macOS: avoid touching launchd in Remote over SSH so quitting the app no longer disables the remote gateway. (#1105) - Memory: index atomically so failed reindex preserves the previous memory database. (#1151) - Memory: avoid sqlite-vec unique constraint failures when reindexing duplicate chunk ids. (#1151) diff --git a/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts b/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts index d9b8fb1c6..bb449a6e4 100644 --- a/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts +++ b/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts @@ -26,6 +26,11 @@ describe("classifyFailoverReason", () => { expect(classifyFailoverReason("deadline exceeded")).toBe("timeout"); expect(classifyFailoverReason("string should match pattern")).toBe("format"); expect(classifyFailoverReason("bad request")).toBeNull(); + expect( + classifyFailoverReason( + "messages.84.content.1.image.source.base64.data: At least one of the image dimensions exceed max allowed size for many-image requests: 2000 pixels", + ), + ).toBeNull(); }); it("classifies OpenAI usage limit errors as rate_limit", () => { expect(classifyFailoverReason("You have hit your ChatGPT usage limit (plus plan)")).toBe( diff --git a/src/agents/pi-embedded-helpers.image-dimension-error.test.ts b/src/agents/pi-embedded-helpers.image-dimension-error.test.ts new file mode 100644 index 000000000..b3417b9b1 --- /dev/null +++ b/src/agents/pi-embedded-helpers.image-dimension-error.test.ts @@ -0,0 +1,16 @@ +import { describe, expect, it } from "vitest"; + +import { isImageDimensionErrorMessage, parseImageDimensionError } from "./pi-embedded-helpers.js"; + +describe("image dimension errors", () => { + it("parses anthropic image dimension errors", () => { + const raw = + "400 {\"type\":\"error\",\"error\":{\"type\":\"invalid_request_error\",\"message\":\"messages.84.content.1.image.source.base64.data: At least one of the image dimensions exceed max allowed size for many-image requests: 2000 pixels\"}}"; + const parsed = parseImageDimensionError(raw); + expect(parsed).not.toBeNull(); + expect(parsed?.maxDimensionPx).toBe(2000); + expect(parsed?.messageIndex).toBe(84); + expect(parsed?.contentIndex).toBe(1); + expect(isImageDimensionErrorMessage(raw)).toBe(true); + }); +}); diff --git a/src/agents/pi-embedded-helpers.iscloudcodeassistformaterror.test.ts b/src/agents/pi-embedded-helpers.iscloudcodeassistformaterror.test.ts index a2ecf5062..ca0c7861f 100644 --- a/src/agents/pi-embedded-helpers.iscloudcodeassistformaterror.test.ts +++ b/src/agents/pi-embedded-helpers.iscloudcodeassistformaterror.test.ts @@ -23,5 +23,10 @@ describe("isCloudCodeAssistFormatError", () => { }); it("ignores unrelated errors", () => { expect(isCloudCodeAssistFormatError("rate limit exceeded")).toBe(false); + expect( + isCloudCodeAssistFormatError( + "400 {\"type\":\"error\",\"error\":{\"type\":\"invalid_request_error\",\"message\":\"messages.84.content.1.image.source.base64.data: At least one of the image dimensions exceed max allowed size for many-image requests: 2000 pixels\"}}", + ), + ).toBe(false); }); }); diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 48beb95a2..47a19183b 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -21,11 +21,13 @@ export { isContextOverflowError, isFailoverAssistantError, isFailoverErrorMessage, + isImageDimensionErrorMessage, isOverloadedErrorMessage, isRawApiErrorPayload, isRateLimitAssistantError, isRateLimitErrorMessage, isTimeoutErrorMessage, + parseImageDimensionError, } from "./pi-embedded-helpers/errors.js"; export { downgradeGeminiHistory, diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index f4e4736ab..4d3dbfde5 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -339,7 +339,6 @@ const ERROR_PATTERNS = { "no api key found", ], format: [ - "invalid_request_error", "string should match pattern", "tool_use.id", "tool_use_id", @@ -348,6 +347,10 @@ const ERROR_PATTERNS = { ], } as const; +const IMAGE_DIMENSION_ERROR_RE = + /image dimensions exceed max allowed size for many-image requests:\s*(\d+)\s*pixels/i; +const IMAGE_DIMENSION_PATH_RE = /messages\.(\d+)\.content\.(\d+)\.image/i; + function matchesErrorPatterns(raw: string, patterns: readonly ErrorPattern[]): boolean { if (!raw) return false; const value = raw.toLowerCase(); @@ -390,8 +393,31 @@ export function isOverloadedErrorMessage(raw: string): boolean { return matchesErrorPatterns(raw, ERROR_PATTERNS.overloaded); } +export function parseImageDimensionError(raw: string): { + maxDimensionPx?: number; + messageIndex?: number; + contentIndex?: number; + raw: string; +} | null { + if (!raw) return null; + const lower = raw.toLowerCase(); + if (!lower.includes("image dimensions exceed max allowed size")) return null; + const limitMatch = raw.match(IMAGE_DIMENSION_ERROR_RE); + const pathMatch = raw.match(IMAGE_DIMENSION_PATH_RE); + return { + maxDimensionPx: limitMatch?.[1] ? Number.parseInt(limitMatch[1], 10) : undefined, + messageIndex: pathMatch?.[1] ? Number.parseInt(pathMatch[1], 10) : undefined, + contentIndex: pathMatch?.[2] ? Number.parseInt(pathMatch[2], 10) : undefined, + raw, + }; +} + +export function isImageDimensionErrorMessage(raw: string): boolean { + return Boolean(parseImageDimensionError(raw)); +} + export function isCloudCodeAssistFormatError(raw: string): boolean { - return matchesErrorPatterns(raw, ERROR_PATTERNS.format); + return !isImageDimensionErrorMessage(raw) && matchesErrorPatterns(raw, ERROR_PATTERNS.format); } export function isAuthAssistantError(msg: AssistantMessage | undefined): boolean { @@ -400,6 +426,7 @@ export function isAuthAssistantError(msg: AssistantMessage | undefined): boolean } export function classifyFailoverReason(raw: string): FailoverReason | null { + if (isImageDimensionErrorMessage(raw)) return null; if (isRateLimitErrorMessage(raw)) return "rate_limit"; if (isOverloadedErrorMessage(raw)) return "rate_limit"; if (isCloudCodeAssistFormatError(raw)) return "format"; diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 82bafe0b4..c0e17b103 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -31,6 +31,7 @@ import { isContextOverflowError, isFailoverAssistantError, isFailoverErrorMessage, + parseImageDimensionError, isRateLimitAssistantError, isTimeoutErrorMessage, pickFallbackThinkingLevel, @@ -357,6 +358,26 @@ export async function runEmbeddedPiAgent( const failoverFailure = isFailoverAssistantError(lastAssistant); const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? ""); const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError; + const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? ""); + + if (imageDimensionError && lastProfileId) { + const details = [ + imageDimensionError.messageIndex !== undefined + ? `message=${imageDimensionError.messageIndex}` + : null, + imageDimensionError.contentIndex !== undefined + ? `content=${imageDimensionError.contentIndex}` + : null, + imageDimensionError.maxDimensionPx !== undefined + ? `limit=${imageDimensionError.maxDimensionPx}px` + : null, + ] + .filter(Boolean) + .join(" "); + log.warn( + `Profile ${lastProfileId} rejected image payload${details ? ` (${details})` : ""}.`, + ); + } // Treat timeout as potential rate limit (Antigravity hangs on rate limit) const shouldRotate = (!aborted && failoverFailure) || timedOut; diff --git a/src/agents/pi-embedded-runner/run/images.ts b/src/agents/pi-embedded-runner/run/images.ts index ac9cbf103..6bb7bef9b 100644 --- a/src/agents/pi-embedded-runner/run/images.ts +++ b/src/agents/pi-embedded-runner/run/images.ts @@ -5,6 +5,7 @@ import { fileURLToPath } from "node:url"; import type { ImageContent } from "@mariozechner/pi-ai"; import { assertSandboxPath } from "../../sandbox-paths.js"; +import { sanitizeImageBlocks } from "../../tool-images.js"; import { extractTextFromMessage } from "../../../tui/tui-formatters.js"; import { loadWebMedia } from "../../../web/media.js"; import { resolveUserPath } from "../../../utils.js"; @@ -48,6 +49,17 @@ function isImageExtension(filePath: string): boolean { return IMAGE_EXTENSIONS.has(ext); } +async function sanitizeImagesWithLog( + images: ImageContent[], + label: string, +): Promise { + const { images: sanitized, dropped } = await sanitizeImageBlocks(images, label); + if (dropped > 0) { + log.warn(`Native image: dropped ${dropped} image(s) after sanitization (${label}).`); + } + return sanitized; +} + /** * Detects image references in a user prompt. * @@ -392,9 +404,18 @@ export async function detectAndLoadPromptImages(params: { } } + const sanitizedPromptImages = await sanitizeImagesWithLog(promptImages, "prompt:images"); + const sanitizedHistoryImagesByIndex = new Map(); + for (const [index, images] of historyImagesByIndex) { + const sanitized = await sanitizeImagesWithLog(images, `history:images:${index}`); + if (sanitized.length > 0) { + sanitizedHistoryImagesByIndex.set(index, sanitized); + } + } + return { - images: promptImages, - historyImagesByIndex, + images: sanitizedPromptImages, + historyImagesByIndex: sanitizedHistoryImagesByIndex, detectedRefs: allRefs, loadedCount, skippedCount, diff --git a/src/agents/tool-images.test.ts b/src/agents/tool-images.test.ts index 8a0e5f0c6..f656c13ae 100644 --- a/src/agents/tool-images.test.ts +++ b/src/agents/tool-images.test.ts @@ -1,7 +1,7 @@ import sharp from "sharp"; import { describe, expect, it } from "vitest"; -import { sanitizeContentBlocksImages } from "./tool-images.js"; +import { sanitizeContentBlocksImages, sanitizeImageBlocks } from "./tool-images.js"; describe("tool image sanitizing", () => { it("shrinks oversized images to <=5MB", async () => { @@ -33,6 +33,56 @@ describe("tool image sanitizing", () => { expect(image.mimeType).toBe("image/jpeg"); }, 20_000); + it("sanitizes image arrays and reports drops", async () => { + const width = 2600; + const height = 400; + const raw = Buffer.alloc(width * height * 3, 0x7f); + const png = await sharp(raw, { + raw: { width, height, channels: 3 }, + }) + .png({ compressionLevel: 9 }) + .toBuffer(); + + const images = [ + { type: "image" as const, data: png.toString("base64"), mimeType: "image/png" }, + ]; + const { images: out, dropped } = await sanitizeImageBlocks(images, "test"); + expect(dropped).toBe(0); + expect(out.length).toBe(1); + const meta = await sharp(Buffer.from(out[0].data, "base64")).metadata(); + expect(meta.width).toBeLessThanOrEqual(2000); + expect(meta.height).toBeLessThanOrEqual(2000); + }, 20_000); + + it("shrinks images that exceed max dimension even if size is small", async () => { + const width = 2600; + const height = 400; + const raw = Buffer.alloc(width * height * 3, 0x7f); + const png = await sharp(raw, { + raw: { width, height, channels: 3 }, + }) + .png({ compressionLevel: 9 }) + .toBuffer(); + + const blocks = [ + { + type: "image" as const, + data: png.toString("base64"), + mimeType: "image/png", + }, + ]; + + const out = await sanitizeContentBlocksImages(blocks, "test"); + const image = out.find((b) => b.type === "image"); + if (!image || image.type !== "image") { + throw new Error("expected image block"); + } + const meta = await sharp(Buffer.from(image.data, "base64")).metadata(); + expect(meta.width).toBeLessThanOrEqual(2000); + expect(meta.height).toBeLessThanOrEqual(2000); + expect(image.mimeType).toBe("image/jpeg"); + }, 20_000); + it("corrects mismatched jpeg mimeType", async () => { const jpeg = await sharp({ create: { diff --git a/src/agents/tool-images.ts b/src/agents/tool-images.ts index 274b832fd..3ae8b124d 100644 --- a/src/agents/tool-images.ts +++ b/src/agents/tool-images.ts @@ -1,5 +1,7 @@ import type { AgentToolResult } from "@mariozechner/pi-agent-core"; +import type { ImageContent } from "@mariozechner/pi-ai"; +import { createSubsystemLogger } from "../logging.js"; import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js"; type ToolContentBlock = AgentToolResult["content"][number]; @@ -14,6 +16,7 @@ type TextContentBlock = Extract; // and recompress base64 image blocks when they exceed these limits. const MAX_IMAGE_DIMENSION_PX = 2000; const MAX_IMAGE_BYTES = 5 * 1024 * 1024; +const log = createSubsystemLogger("agents/tool-images"); function isImageBlock(block: unknown): block is ImageContentBlock { if (!block || typeof block !== "object") return false; @@ -41,26 +44,41 @@ async function resizeImageBase64IfNeeded(params: { mimeType: string; maxDimensionPx: number; maxBytes: number; -}): Promise<{ base64: string; mimeType: string; resized: boolean }> { + label?: string; +}): Promise<{ + base64: string; + mimeType: string; + resized: boolean; + width?: number; + height?: number; +}> { const buf = Buffer.from(params.base64, "base64"); const meta = await getImageMetadata(buf); const width = meta?.width; const height = meta?.height; const overBytes = buf.byteLength > params.maxBytes; - const maxDim = Math.max(width ?? 0, height ?? 0); - if (typeof width !== "number" || typeof height !== "number") { - if (!overBytes) { - return { - base64: params.base64, - mimeType: params.mimeType, - resized: false, - }; - } - } else if (!overBytes && width <= params.maxDimensionPx && height <= params.maxDimensionPx) { - return { base64: params.base64, mimeType: params.mimeType, resized: false }; + const hasDimensions = typeof width === "number" && typeof height === "number"; + if (hasDimensions && !overBytes && width <= params.maxDimensionPx && height <= params.maxDimensionPx) { + return { + base64: params.base64, + mimeType: params.mimeType, + resized: false, + width, + height, + }; + } + if (hasDimensions && (width > params.maxDimensionPx || height > params.maxDimensionPx || overBytes)) { + log.warn("Image exceeds limits; resizing", { + label: params.label, + width, + height, + maxDimensionPx: params.maxDimensionPx, + maxBytes: params.maxBytes, + }); } const qualities = [85, 75, 65, 55, 45, 35]; + const maxDim = hasDimensions ? Math.max(width ?? 0, height ?? 0) : params.maxDimensionPx; const sideStart = maxDim > 0 ? Math.min(params.maxDimensionPx, maxDim) : params.maxDimensionPx; const sideGrid = [sideStart, 1800, 1600, 1400, 1200, 1000, 800] .map((v) => Math.min(params.maxDimensionPx, v)) @@ -80,10 +98,23 @@ async function resizeImageBase64IfNeeded(params: { smallest = { buffer: out, size: out.byteLength }; } if (out.byteLength <= params.maxBytes) { + log.info("Image resized", { + label: params.label, + width, + height, + maxDimensionPx: params.maxDimensionPx, + maxBytes: params.maxBytes, + originalBytes: buf.byteLength, + resizedBytes: out.byteLength, + quality, + side, + }); return { base64: out.toString("base64"), mimeType: "image/jpeg", resized: true, + width, + height, }; } } @@ -127,6 +158,7 @@ export async function sanitizeContentBlocksImages( mimeType, maxDimensionPx, maxBytes, + label, }); out.push({ ...block, @@ -144,6 +176,17 @@ export async function sanitizeContentBlocksImages( return out; } +export async function sanitizeImageBlocks( + images: ImageContent[], + label: string, + opts: { maxDimensionPx?: number; maxBytes?: number } = {}, +): Promise<{ images: ImageContent[]; dropped: number }> { + if (images.length === 0) return { images, dropped: 0 }; + const sanitized = await sanitizeContentBlocksImages(images as ToolContentBlock[], label, opts); + const next = sanitized.filter(isImageBlock) as ImageContent[]; + return { images: next, dropped: Math.max(0, images.length - next.length) }; +} + export async function sanitizeToolResultImages( result: AgentToolResult, label: string,