feat(browser): clamp screenshots under 5MB

This commit is contained in:
Peter Steinberger
2025-12-13 18:08:00 +00:00
parent 867d7e5d25
commit 56fe23549c
4 changed files with 173 additions and 4 deletions

View File

@@ -81,6 +81,19 @@ async function fetchJson<T>(url: string, timeoutMs = 1500): Promise<T> {
export async function captureScreenshotPng(opts: {
wsUrl: string;
fullPage?: boolean;
}): Promise<Buffer> {
return await captureScreenshot({
wsUrl: opts.wsUrl,
fullPage: opts.fullPage,
format: "png",
});
}
export async function captureScreenshot(opts: {
wsUrl: string;
fullPage?: boolean;
format?: "png" | "jpeg";
quality?: number; // jpeg only (0..100)
}): Promise<Buffer> {
const ws = new WebSocket(opts.wsUrl, { handshakeTimeout: 5000 });
const { send, closeWithError } = createCdpSender(ws);
@@ -110,8 +123,15 @@ export async function captureScreenshotPng(opts: {
}
}
const format = opts.format ?? "png";
const quality =
format === "jpeg"
? Math.max(0, Math.min(100, Math.round(opts.quality ?? 85)))
: undefined;
const result = (await send("Page.captureScreenshot", {
format: "png",
format,
...(quality !== undefined ? { quality } : {}),
fromSurface: true,
captureBeyondViewport: true,
...(clip ? { clip } : {}),

View File

@@ -0,0 +1,49 @@
import crypto from "node:crypto";
import sharp from "sharp";
import { describe, expect, it } from "vitest";
import { normalizeBrowserScreenshot } from "./screenshot.js";
describe("browser screenshot normalization", () => {
it("shrinks oversized images to <=2000x2000 and <=5MB", async () => {
const width = 2800;
const height = 2800;
const raw = crypto.randomBytes(width * height * 3);
const bigPng = await sharp(raw, { raw: { width, height, channels: 3 } })
.png({ compressionLevel: 0 })
.toBuffer();
const normalized = await normalizeBrowserScreenshot(bigPng, {
maxSide: 2000,
maxBytes: 5 * 1024 * 1024,
});
expect(normalized.buffer.byteLength).toBeLessThanOrEqual(5 * 1024 * 1024);
const meta = await sharp(normalized.buffer).metadata();
expect(Number(meta.width)).toBeLessThanOrEqual(2000);
expect(Number(meta.height)).toBeLessThanOrEqual(2000);
expect(normalized.buffer[0]).toBe(0xff);
expect(normalized.buffer[1]).toBe(0xd8);
}, 20_000);
it("keeps already-small screenshots unchanged", async () => {
const jpeg = await sharp({
create: {
width: 800,
height: 600,
channels: 3,
background: { r: 255, g: 0, b: 0 },
},
})
.jpeg({ quality: 80 })
.toBuffer();
const normalized = await normalizeBrowserScreenshot(jpeg, {
maxSide: 2000,
maxBytes: 5 * 1024 * 1024,
});
expect(normalized.buffer.equals(jpeg)).toBe(true);
});
});

69
src/browser/screenshot.ts Normal file
View File

@@ -0,0 +1,69 @@
import sharp from "sharp";
export const DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE = 2000;
export const DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES = 5 * 1024 * 1024;
export async function normalizeBrowserScreenshot(
buffer: Buffer,
opts?: {
maxSide?: number;
maxBytes?: number;
},
): Promise<{ buffer: Buffer; contentType?: "image/jpeg" }> {
const maxSide = Math.max(
1,
Math.round(opts?.maxSide ?? DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE),
);
const maxBytes = Math.max(
1,
Math.round(opts?.maxBytes ?? DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES),
);
const meta = await sharp(buffer, { failOnError: false }).metadata();
const width = Number(meta.width ?? 0);
const height = Number(meta.height ?? 0);
const maxDim = Math.max(width, height);
if (
buffer.byteLength <= maxBytes &&
(maxDim === 0 || (width <= maxSide && height <= maxSide))
) {
return { buffer };
}
const qualities = [85, 75, 65, 55, 45, 35];
const sideStart = maxDim > 0 ? Math.min(maxSide, maxDim) : maxSide;
const sideGrid = [sideStart, 1800, 1600, 1400, 1200, 1000, 800]
.map((v) => Math.min(maxSide, v))
.filter((v, i, arr) => v > 0 && arr.indexOf(v) === i)
.sort((a, b) => b - a);
let smallest: { buffer: Buffer; size: number } | null = null;
for (const side of sideGrid) {
for (const quality of qualities) {
const out = await sharp(buffer, { failOnError: false })
.resize({
width: side,
height: side,
fit: "inside",
withoutEnlargement: true,
})
.jpeg({ quality, mozjpeg: true })
.toBuffer();
if (!smallest || out.byteLength < smallest.size) {
smallest = { buffer: out, size: out.byteLength };
}
if (out.byteLength <= maxBytes) {
return { buffer: out, contentType: "image/jpeg" };
}
}
}
const best = smallest?.buffer ?? buffer;
throw new Error(
`Browser screenshot could not be reduced below ${(maxBytes / (1024 * 1024)).toFixed(0)}MB (got ${(best.byteLength / (1024 * 1024)).toFixed(2)}MB)`,
);
}

View File

@@ -6,7 +6,11 @@ import { loadConfig } from "../config/config.js";
import { logError, logInfo, logWarn } from "../logger.js";
import { ensureMediaDir, saveMediaBuffer } from "../media/store.js";
import { defaultRuntime, type RuntimeEnv } from "../runtime.js";
import { captureScreenshotPng, createTargetViaCdp } from "./cdp.js";
import {
captureScreenshot,
captureScreenshotPng,
createTargetViaCdp,
} from "./cdp.js";
import {
isChromeReachable,
launchClawdChrome,
@@ -17,6 +21,11 @@ import {
resolveBrowserConfig,
shouldStartLocalBrowserServer,
} from "./config.js";
import {
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
normalizeBrowserScreenshot,
} from "./screenshot.js";
export type BrowserTab = {
targetId: string;
@@ -299,9 +308,31 @@ export async function startBrowserControlServerFromConfig(
: tabs.at(0);
if (!chosen?.wsUrl) return jsonError(res, 404, "tab not found");
const png = await captureScreenshotPng({ wsUrl: chosen.wsUrl, fullPage });
let shot: Buffer<ArrayBufferLike> = Buffer.alloc(0);
let contentTypeHint: "image/jpeg" | "image/png" = "image/jpeg";
try {
shot = await captureScreenshot({
wsUrl: chosen.wsUrl,
fullPage,
format: "jpeg",
quality: 85,
});
} catch {
contentTypeHint = "image/png";
shot = await captureScreenshotPng({ wsUrl: chosen.wsUrl, fullPage });
}
const normalized = await normalizeBrowserScreenshot(shot, {
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
});
await ensureMediaDir();
const saved = await saveMediaBuffer(png, "image/png", "browser");
const saved = await saveMediaBuffer(
normalized.buffer,
normalized.contentType ?? contentTypeHint,
"browser",
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
);
const filePath = path.resolve(saved.path);
res.json({
ok: true,