feat: add browser snapshot modes

This commit is contained in:
Peter Steinberger
2026-01-15 03:50:48 +00:00
parent 4e48d0a431
commit a6e780b2f6
18 changed files with 430 additions and 141 deletions

View File

@@ -4,7 +4,11 @@ import type express from "express";
import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js";
import { captureScreenshot, snapshotAria } from "../cdp.js";
import { DEFAULT_AI_SNAPSHOT_MAX_CHARS } from "../constants.js";
import {
DEFAULT_AI_SNAPSHOT_EFFICIENT_DEPTH,
DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS,
DEFAULT_AI_SNAPSHOT_MAX_CHARS,
} from "../constants.js";
import {
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
@@ -138,14 +142,12 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
const profileCtx = resolveProfileContext(req, res, ctx);
if (!profileCtx) return;
const targetId = typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const mode = req.query.mode === "efficient" ? "efficient" : undefined;
const labels = toBoolean(req.query.labels) ?? undefined;
const explicitFormat =
req.query.format === "aria" ? "aria" : req.query.format === "ai" ? "ai" : undefined;
const format =
req.query.format === "aria"
? "aria"
: req.query.format === "ai"
? "ai"
: (await getPwAiModule())
? "ai"
: "aria";
explicitFormat ?? (mode ? "ai" : (await getPwAiModule()) ? "ai" : "aria");
const limitRaw = typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
const hasMaxChars = Object.hasOwn(req.query, "maxChars");
const maxCharsRaw =
@@ -156,19 +158,34 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
? Math.floor(maxCharsRaw)
: undefined;
const resolvedMaxChars =
format === "ai" ? (hasMaxChars ? maxChars : DEFAULT_AI_SNAPSHOT_MAX_CHARS) : undefined;
const interactive = toBoolean(req.query.interactive);
const compact = toBoolean(req.query.compact);
const depth = toNumber(req.query.depth);
format === "ai"
? hasMaxChars
? maxChars
: mode === "efficient"
? DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS
: DEFAULT_AI_SNAPSHOT_MAX_CHARS
: undefined;
const interactiveRaw = toBoolean(req.query.interactive);
const compactRaw = toBoolean(req.query.compact);
const depthRaw = toNumber(req.query.depth);
const interactive = interactiveRaw ?? (mode === "efficient" ? true : undefined);
const compact = compactRaw ?? (mode === "efficient" ? true : undefined);
const depth =
depthRaw ?? (mode === "efficient" ? DEFAULT_AI_SNAPSHOT_EFFICIENT_DEPTH : undefined);
const selector = toStringOrEmpty(req.query.selector);
const frameSelector = toStringOrEmpty(req.query.frame);
try {
const tab = await profileCtx.ensureTabAvailable(targetId || undefined);
if ((labels || mode === "efficient") && format === "aria") {
return jsonError(res, 400, "labels/mode=efficient require format=ai");
}
if (format === "ai") {
const pw = await requirePwAi(res, "ai snapshot");
if (!pw) return;
const wantsRoleSnapshot =
labels === true ||
mode === "efficient" ||
interactive === true ||
compact === true ||
depth !== undefined ||
@@ -210,6 +227,39 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
}
throw err;
});
if (labels) {
const labeled = await pw.screenshotWithLabelsViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
refs: "refs" in snap ? snap.refs : {},
type: "png",
});
const normalized = await normalizeBrowserScreenshot(labeled.buffer, {
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
});
await ensureMediaDir();
const saved = await saveMediaBuffer(
normalized.buffer,
normalized.contentType ?? "image/png",
"browser",
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
);
const imageType = normalized.contentType?.includes("jpeg") ? "jpeg" : "png";
return res.json({
ok: true,
format,
targetId: tab.targetId,
url: tab.url,
labels: true,
labelsCount: labeled.labels,
labelsSkipped: labeled.skipped,
imagePath: path.resolve(saved.path),
imageType,
...snap,
});
}
return res.json({
ok: true,
format,