From 56fe23549cd42665d6070ff535cd95285db75e39 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 13 Dec 2025 18:08:00 +0000 Subject: [PATCH] feat(browser): clamp screenshots under 5MB --- src/browser/cdp.ts | 22 ++++++++++- src/browser/screenshot.test.ts | 49 ++++++++++++++++++++++++ src/browser/screenshot.ts | 69 ++++++++++++++++++++++++++++++++++ src/browser/server.ts | 37 ++++++++++++++++-- 4 files changed, 173 insertions(+), 4 deletions(-) create mode 100644 src/browser/screenshot.test.ts create mode 100644 src/browser/screenshot.ts diff --git a/src/browser/cdp.ts b/src/browser/cdp.ts index a85471a52..f572f3fd4 100644 --- a/src/browser/cdp.ts +++ b/src/browser/cdp.ts @@ -81,6 +81,19 @@ async function fetchJson(url: string, timeoutMs = 1500): Promise { export async function captureScreenshotPng(opts: { wsUrl: string; fullPage?: boolean; +}): Promise { + return await captureScreenshot({ + wsUrl: opts.wsUrl, + fullPage: opts.fullPage, + format: "png", + }); +} + +export async function captureScreenshot(opts: { + wsUrl: string; + fullPage?: boolean; + format?: "png" | "jpeg"; + quality?: number; // jpeg only (0..100) }): Promise { const ws = new WebSocket(opts.wsUrl, { handshakeTimeout: 5000 }); const { send, closeWithError } = createCdpSender(ws); @@ -110,8 +123,15 @@ export async function captureScreenshotPng(opts: { } } + const format = opts.format ?? "png"; + const quality = + format === "jpeg" + ? Math.max(0, Math.min(100, Math.round(opts.quality ?? 85))) + : undefined; + const result = (await send("Page.captureScreenshot", { - format: "png", + format, + ...(quality !== undefined ? { quality } : {}), fromSurface: true, captureBeyondViewport: true, ...(clip ? { clip } : {}), diff --git a/src/browser/screenshot.test.ts b/src/browser/screenshot.test.ts new file mode 100644 index 000000000..2d5350301 --- /dev/null +++ b/src/browser/screenshot.test.ts @@ -0,0 +1,49 @@ +import crypto from "node:crypto"; + +import sharp from "sharp"; +import { describe, expect, it } from "vitest"; + +import { normalizeBrowserScreenshot } from "./screenshot.js"; + +describe("browser screenshot normalization", () => { + it("shrinks oversized images to <=2000x2000 and <=5MB", async () => { + const width = 2800; + const height = 2800; + const raw = crypto.randomBytes(width * height * 3); + const bigPng = await sharp(raw, { raw: { width, height, channels: 3 } }) + .png({ compressionLevel: 0 }) + .toBuffer(); + + const normalized = await normalizeBrowserScreenshot(bigPng, { + maxSide: 2000, + maxBytes: 5 * 1024 * 1024, + }); + + expect(normalized.buffer.byteLength).toBeLessThanOrEqual(5 * 1024 * 1024); + const meta = await sharp(normalized.buffer).metadata(); + expect(Number(meta.width)).toBeLessThanOrEqual(2000); + expect(Number(meta.height)).toBeLessThanOrEqual(2000); + expect(normalized.buffer[0]).toBe(0xff); + expect(normalized.buffer[1]).toBe(0xd8); + }, 20_000); + + it("keeps already-small screenshots unchanged", async () => { + const jpeg = await sharp({ + create: { + width: 800, + height: 600, + channels: 3, + background: { r: 255, g: 0, b: 0 }, + }, + }) + .jpeg({ quality: 80 }) + .toBuffer(); + + const normalized = await normalizeBrowserScreenshot(jpeg, { + maxSide: 2000, + maxBytes: 5 * 1024 * 1024, + }); + + expect(normalized.buffer.equals(jpeg)).toBe(true); + }); +}); diff --git a/src/browser/screenshot.ts b/src/browser/screenshot.ts new file mode 100644 index 000000000..6c5005ff5 --- /dev/null +++ b/src/browser/screenshot.ts @@ -0,0 +1,69 @@ +import sharp from "sharp"; + +export const DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE = 2000; +export const DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES = 5 * 1024 * 1024; + +export async function normalizeBrowserScreenshot( + buffer: Buffer, + opts?: { + maxSide?: number; + maxBytes?: number; + }, +): Promise<{ buffer: Buffer; contentType?: "image/jpeg" }> { + const maxSide = Math.max( + 1, + Math.round(opts?.maxSide ?? DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE), + ); + const maxBytes = Math.max( + 1, + Math.round(opts?.maxBytes ?? DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES), + ); + + const meta = await sharp(buffer, { failOnError: false }).metadata(); + const width = Number(meta.width ?? 0); + const height = Number(meta.height ?? 0); + const maxDim = Math.max(width, height); + + if ( + buffer.byteLength <= maxBytes && + (maxDim === 0 || (width <= maxSide && height <= maxSide)) + ) { + return { buffer }; + } + + const qualities = [85, 75, 65, 55, 45, 35]; + const sideStart = maxDim > 0 ? Math.min(maxSide, maxDim) : maxSide; + const sideGrid = [sideStart, 1800, 1600, 1400, 1200, 1000, 800] + .map((v) => Math.min(maxSide, v)) + .filter((v, i, arr) => v > 0 && arr.indexOf(v) === i) + .sort((a, b) => b - a); + + let smallest: { buffer: Buffer; size: number } | null = null; + + for (const side of sideGrid) { + for (const quality of qualities) { + const out = await sharp(buffer, { failOnError: false }) + .resize({ + width: side, + height: side, + fit: "inside", + withoutEnlargement: true, + }) + .jpeg({ quality, mozjpeg: true }) + .toBuffer(); + + if (!smallest || out.byteLength < smallest.size) { + smallest = { buffer: out, size: out.byteLength }; + } + + if (out.byteLength <= maxBytes) { + return { buffer: out, contentType: "image/jpeg" }; + } + } + } + + const best = smallest?.buffer ?? buffer; + throw new Error( + `Browser screenshot could not be reduced below ${(maxBytes / (1024 * 1024)).toFixed(0)}MB (got ${(best.byteLength / (1024 * 1024)).toFixed(2)}MB)`, + ); +} diff --git a/src/browser/server.ts b/src/browser/server.ts index 9f7178722..62ad1872a 100644 --- a/src/browser/server.ts +++ b/src/browser/server.ts @@ -6,7 +6,11 @@ import { loadConfig } from "../config/config.js"; import { logError, logInfo, logWarn } from "../logger.js"; import { ensureMediaDir, saveMediaBuffer } from "../media/store.js"; import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; -import { captureScreenshotPng, createTargetViaCdp } from "./cdp.js"; +import { + captureScreenshot, + captureScreenshotPng, + createTargetViaCdp, +} from "./cdp.js"; import { isChromeReachable, launchClawdChrome, @@ -17,6 +21,11 @@ import { resolveBrowserConfig, shouldStartLocalBrowserServer, } from "./config.js"; +import { + DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, + normalizeBrowserScreenshot, +} from "./screenshot.js"; export type BrowserTab = { targetId: string; @@ -299,9 +308,31 @@ export async function startBrowserControlServerFromConfig( : tabs.at(0); if (!chosen?.wsUrl) return jsonError(res, 404, "tab not found"); - const png = await captureScreenshotPng({ wsUrl: chosen.wsUrl, fullPage }); + let shot: Buffer = Buffer.alloc(0); + let contentTypeHint: "image/jpeg" | "image/png" = "image/jpeg"; + try { + shot = await captureScreenshot({ + wsUrl: chosen.wsUrl, + fullPage, + format: "jpeg", + quality: 85, + }); + } catch { + contentTypeHint = "image/png"; + shot = await captureScreenshotPng({ wsUrl: chosen.wsUrl, fullPage }); + } + + const normalized = await normalizeBrowserScreenshot(shot, { + maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, + maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + }); await ensureMediaDir(); - const saved = await saveMediaBuffer(png, "image/png", "browser"); + const saved = await saveMediaBuffer( + normalized.buffer, + normalized.contentType ?? contentTypeHint, + "browser", + DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + ); const filePath = path.resolve(saved.path); res.json({ ok: true,