feat: add browser snapshot modes
This commit is contained in:
@@ -37,6 +37,7 @@ const BROWSER_TOOL_ACTIONS = [
|
||||
const BROWSER_TARGETS = ["sandbox", "host", "custom"] as const;
|
||||
|
||||
const BROWSER_SNAPSHOT_FORMATS = ["aria", "ai"] as const;
|
||||
const BROWSER_SNAPSHOT_MODES = ["efficient"] as const;
|
||||
|
||||
const BROWSER_IMAGE_TYPES = ["png", "jpeg"] as const;
|
||||
|
||||
@@ -87,12 +88,14 @@ export const BrowserToolSchema = Type.Object({
|
||||
targetId: Type.Optional(Type.String()),
|
||||
limit: Type.Optional(Type.Number()),
|
||||
maxChars: Type.Optional(Type.Number()),
|
||||
mode: optionalStringEnum(BROWSER_SNAPSHOT_MODES),
|
||||
format: optionalStringEnum(BROWSER_SNAPSHOT_FORMATS),
|
||||
interactive: Type.Optional(Type.Boolean()),
|
||||
compact: Type.Optional(Type.Boolean()),
|
||||
depth: Type.Optional(Type.Number()),
|
||||
selector: Type.Optional(Type.String()),
|
||||
frame: Type.Optional(Type.String()),
|
||||
labels: Type.Optional(Type.Boolean()),
|
||||
fullPage: Type.Optional(Type.Boolean()),
|
||||
ref: Type.Optional(Type.String()),
|
||||
element: Type.Optional(Type.String()),
|
||||
|
||||
@@ -182,6 +182,8 @@ export function createBrowserTool(opts?: {
|
||||
params.format === "ai" || params.format === "aria"
|
||||
? (params.format as "ai" | "aria")
|
||||
: "ai";
|
||||
const mode = params.mode === "efficient" ? "efficient" : undefined;
|
||||
const labels = typeof params.labels === "boolean" ? params.labels : undefined;
|
||||
const hasMaxChars = Object.hasOwn(params, "maxChars");
|
||||
const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
|
||||
const limit =
|
||||
@@ -195,7 +197,13 @@ export function createBrowserTool(opts?: {
|
||||
? Math.floor(params.maxChars)
|
||||
: undefined;
|
||||
const resolvedMaxChars =
|
||||
format === "ai" ? (hasMaxChars ? maxChars : DEFAULT_AI_SNAPSHOT_MAX_CHARS) : undefined;
|
||||
format === "ai"
|
||||
? hasMaxChars
|
||||
? maxChars
|
||||
: mode === "efficient"
|
||||
? undefined
|
||||
: DEFAULT_AI_SNAPSHOT_MAX_CHARS
|
||||
: undefined;
|
||||
const interactive =
|
||||
typeof params.interactive === "boolean" ? params.interactive : undefined;
|
||||
const compact = typeof params.compact === "boolean" ? params.compact : undefined;
|
||||
@@ -215,9 +223,19 @@ export function createBrowserTool(opts?: {
|
||||
depth,
|
||||
selector,
|
||||
frame,
|
||||
labels,
|
||||
mode,
|
||||
profile,
|
||||
});
|
||||
if (snapshot.format === "ai") {
|
||||
if (labels && snapshot.imagePath) {
|
||||
return await imageResultFromFile({
|
||||
label: "browser:snapshot",
|
||||
path: snapshot.imagePath,
|
||||
extraText: snapshot.snapshot,
|
||||
details: snapshot,
|
||||
});
|
||||
}
|
||||
return {
|
||||
content: [{ type: "text", text: snapshot.snapshot }],
|
||||
details: snapshot,
|
||||
|
||||
@@ -79,6 +79,11 @@ export type SnapshotResult =
|
||||
refs: number;
|
||||
interactive: number;
|
||||
};
|
||||
labels?: boolean;
|
||||
labelsCount?: number;
|
||||
labelsSkipped?: number;
|
||||
imagePath?: string;
|
||||
imageType?: "png" | "jpeg";
|
||||
};
|
||||
|
||||
export function resolveBrowserControlUrl(overrideUrl?: string) {
|
||||
@@ -264,6 +269,8 @@ export async function browserSnapshot(
|
||||
depth?: number;
|
||||
selector?: string;
|
||||
frame?: string;
|
||||
labels?: boolean;
|
||||
mode?: "efficient";
|
||||
profile?: string;
|
||||
},
|
||||
): Promise<SnapshotResult> {
|
||||
@@ -280,6 +287,8 @@ export async function browserSnapshot(
|
||||
q.set("depth", String(opts.depth));
|
||||
if (opts.selector?.trim()) q.set("selector", opts.selector.trim());
|
||||
if (opts.frame?.trim()) q.set("frame", opts.frame.trim());
|
||||
if (opts.labels === true) q.set("labels", "1");
|
||||
if (opts.mode) q.set("mode", opts.mode);
|
||||
if (opts.profile) q.set("profile", opts.profile);
|
||||
return await fetchBrowserJson<SnapshotResult>(`${baseUrl}/snapshot?${q.toString()}`, {
|
||||
timeoutMs: 20000,
|
||||
|
||||
@@ -3,3 +3,5 @@ export const DEFAULT_CLAWD_BROWSER_CONTROL_URL = "http://127.0.0.1:18791";
|
||||
export const DEFAULT_CLAWD_BROWSER_COLOR = "#FF4500";
|
||||
export const DEFAULT_CLAWD_BROWSER_PROFILE_NAME = "clawd";
|
||||
export const DEFAULT_AI_SNAPSHOT_MAX_CHARS = 80_000;
|
||||
export const DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS = 10_000;
|
||||
export const DEFAULT_AI_SNAPSHOT_EFFICIENT_DEPTH = 6;
|
||||
|
||||
@@ -42,6 +42,7 @@ export {
|
||||
setTimezoneViaPlaywright,
|
||||
snapshotAiViaPlaywright,
|
||||
snapshotRoleViaPlaywright,
|
||||
screenshotWithLabelsViaPlaywright,
|
||||
storageClearViaPlaywright,
|
||||
storageGetViaPlaywright,
|
||||
storageSetViaPlaywright,
|
||||
|
||||
@@ -347,6 +347,132 @@ export async function takeScreenshotViaPlaywright(opts: {
|
||||
return { buffer };
|
||||
}
|
||||
|
||||
export async function screenshotWithLabelsViaPlaywright(opts: {
|
||||
cdpUrl: string;
|
||||
targetId?: string;
|
||||
refs: Record<string, { role: string; name?: string; nth?: number }>;
|
||||
maxLabels?: number;
|
||||
type?: "png" | "jpeg";
|
||||
}): Promise<{ buffer: Buffer; labels: number; skipped: number }> {
|
||||
const page = await getPageForTargetId(opts);
|
||||
ensurePageState(page);
|
||||
const type = opts.type ?? "png";
|
||||
const maxLabels =
|
||||
typeof opts.maxLabels === "number" && Number.isFinite(opts.maxLabels)
|
||||
? Math.max(1, Math.floor(opts.maxLabels))
|
||||
: 150;
|
||||
|
||||
const viewport = await page.evaluate(() => ({
|
||||
scrollX: window.scrollX || 0,
|
||||
scrollY: window.scrollY || 0,
|
||||
width: window.innerWidth || 0,
|
||||
height: window.innerHeight || 0,
|
||||
}));
|
||||
|
||||
const refs = Object.keys(opts.refs ?? {});
|
||||
const boxes: Array<{ ref: string; x: number; y: number; w: number; h: number }> = [];
|
||||
let skipped = 0;
|
||||
|
||||
for (const ref of refs) {
|
||||
if (boxes.length >= maxLabels) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const box = await refLocator(page, ref).boundingBox();
|
||||
if (!box) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
const x0 = box.x;
|
||||
const y0 = box.y;
|
||||
const x1 = box.x + box.width;
|
||||
const y1 = box.y + box.height;
|
||||
const vx0 = viewport.scrollX;
|
||||
const vy0 = viewport.scrollY;
|
||||
const vx1 = viewport.scrollX + viewport.width;
|
||||
const vy1 = viewport.scrollY + viewport.height;
|
||||
if (x1 < vx0 || x0 > vx1 || y1 < vy0 || y0 > vy1) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
boxes.push({
|
||||
ref,
|
||||
x: x0 - viewport.scrollX,
|
||||
y: y0 - viewport.scrollY,
|
||||
w: Math.max(1, box.width),
|
||||
h: Math.max(1, box.height),
|
||||
});
|
||||
} catch {
|
||||
skipped += 1;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if (boxes.length > 0) {
|
||||
await page.evaluate((labels) => {
|
||||
const existing = document.querySelectorAll("[data-clawdbot-labels]");
|
||||
existing.forEach((el) => el.remove());
|
||||
|
||||
const root = document.createElement("div");
|
||||
root.setAttribute("data-clawdbot-labels", "1");
|
||||
root.style.position = "fixed";
|
||||
root.style.left = "0";
|
||||
root.style.top = "0";
|
||||
root.style.zIndex = "2147483647";
|
||||
root.style.pointerEvents = "none";
|
||||
root.style.fontFamily =
|
||||
'"SF Mono","SFMono-Regular",Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace';
|
||||
|
||||
const clamp = (value: number, min: number, max: number) =>
|
||||
Math.min(max, Math.max(min, value));
|
||||
|
||||
for (const label of labels) {
|
||||
const box = document.createElement("div");
|
||||
box.setAttribute("data-clawdbot-labels", "1");
|
||||
box.style.position = "absolute";
|
||||
box.style.left = `${label.x}px`;
|
||||
box.style.top = `${label.y}px`;
|
||||
box.style.width = `${label.w}px`;
|
||||
box.style.height = `${label.h}px`;
|
||||
box.style.border = "2px solid #ffb020";
|
||||
box.style.boxSizing = "border-box";
|
||||
|
||||
const tag = document.createElement("div");
|
||||
tag.setAttribute("data-clawdbot-labels", "1");
|
||||
tag.textContent = label.ref;
|
||||
tag.style.position = "absolute";
|
||||
tag.style.left = `${label.x}px`;
|
||||
tag.style.top = `${clamp(label.y - 18, 0, 20000)}px`;
|
||||
tag.style.background = "#ffb020";
|
||||
tag.style.color = "#1a1a1a";
|
||||
tag.style.fontSize = "12px";
|
||||
tag.style.lineHeight = "14px";
|
||||
tag.style.padding = "1px 4px";
|
||||
tag.style.borderRadius = "3px";
|
||||
tag.style.boxShadow = "0 1px 2px rgba(0,0,0,0.35)";
|
||||
tag.style.whiteSpace = "nowrap";
|
||||
|
||||
root.appendChild(box);
|
||||
root.appendChild(tag);
|
||||
}
|
||||
|
||||
document.documentElement.appendChild(root);
|
||||
}, boxes);
|
||||
}
|
||||
|
||||
const buffer = await page.screenshot({ type });
|
||||
return { buffer, labels: boxes.length, skipped };
|
||||
} finally {
|
||||
await page
|
||||
.evaluate(() => {
|
||||
const existing = document.querySelectorAll("[data-clawdbot-labels]");
|
||||
existing.forEach((el) => el.remove());
|
||||
})
|
||||
.catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
export async function setInputFilesViaPlaywright(opts: {
|
||||
cdpUrl: string;
|
||||
targetId?: string;
|
||||
|
||||
@@ -4,7 +4,11 @@ import type express from "express";
|
||||
|
||||
import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js";
|
||||
import { captureScreenshot, snapshotAria } from "../cdp.js";
|
||||
import { DEFAULT_AI_SNAPSHOT_MAX_CHARS } from "../constants.js";
|
||||
import {
|
||||
DEFAULT_AI_SNAPSHOT_EFFICIENT_DEPTH,
|
||||
DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS,
|
||||
DEFAULT_AI_SNAPSHOT_MAX_CHARS,
|
||||
} from "../constants.js";
|
||||
import {
|
||||
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
|
||||
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
|
||||
@@ -138,14 +142,12 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
|
||||
const profileCtx = resolveProfileContext(req, res, ctx);
|
||||
if (!profileCtx) return;
|
||||
const targetId = typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
|
||||
const mode = req.query.mode === "efficient" ? "efficient" : undefined;
|
||||
const labels = toBoolean(req.query.labels) ?? undefined;
|
||||
const explicitFormat =
|
||||
req.query.format === "aria" ? "aria" : req.query.format === "ai" ? "ai" : undefined;
|
||||
const format =
|
||||
req.query.format === "aria"
|
||||
? "aria"
|
||||
: req.query.format === "ai"
|
||||
? "ai"
|
||||
: (await getPwAiModule())
|
||||
? "ai"
|
||||
: "aria";
|
||||
explicitFormat ?? (mode ? "ai" : (await getPwAiModule()) ? "ai" : "aria");
|
||||
const limitRaw = typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
|
||||
const hasMaxChars = Object.hasOwn(req.query, "maxChars");
|
||||
const maxCharsRaw =
|
||||
@@ -156,19 +158,34 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
|
||||
? Math.floor(maxCharsRaw)
|
||||
: undefined;
|
||||
const resolvedMaxChars =
|
||||
format === "ai" ? (hasMaxChars ? maxChars : DEFAULT_AI_SNAPSHOT_MAX_CHARS) : undefined;
|
||||
const interactive = toBoolean(req.query.interactive);
|
||||
const compact = toBoolean(req.query.compact);
|
||||
const depth = toNumber(req.query.depth);
|
||||
format === "ai"
|
||||
? hasMaxChars
|
||||
? maxChars
|
||||
: mode === "efficient"
|
||||
? DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS
|
||||
: DEFAULT_AI_SNAPSHOT_MAX_CHARS
|
||||
: undefined;
|
||||
const interactiveRaw = toBoolean(req.query.interactive);
|
||||
const compactRaw = toBoolean(req.query.compact);
|
||||
const depthRaw = toNumber(req.query.depth);
|
||||
const interactive = interactiveRaw ?? (mode === "efficient" ? true : undefined);
|
||||
const compact = compactRaw ?? (mode === "efficient" ? true : undefined);
|
||||
const depth =
|
||||
depthRaw ?? (mode === "efficient" ? DEFAULT_AI_SNAPSHOT_EFFICIENT_DEPTH : undefined);
|
||||
const selector = toStringOrEmpty(req.query.selector);
|
||||
const frameSelector = toStringOrEmpty(req.query.frame);
|
||||
|
||||
try {
|
||||
const tab = await profileCtx.ensureTabAvailable(targetId || undefined);
|
||||
if ((labels || mode === "efficient") && format === "aria") {
|
||||
return jsonError(res, 400, "labels/mode=efficient require format=ai");
|
||||
}
|
||||
if (format === "ai") {
|
||||
const pw = await requirePwAi(res, "ai snapshot");
|
||||
if (!pw) return;
|
||||
const wantsRoleSnapshot =
|
||||
labels === true ||
|
||||
mode === "efficient" ||
|
||||
interactive === true ||
|
||||
compact === true ||
|
||||
depth !== undefined ||
|
||||
@@ -210,6 +227,39 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
|
||||
}
|
||||
throw err;
|
||||
});
|
||||
if (labels) {
|
||||
const labeled = await pw.screenshotWithLabelsViaPlaywright({
|
||||
cdpUrl: profileCtx.profile.cdpUrl,
|
||||
targetId: tab.targetId,
|
||||
refs: "refs" in snap ? snap.refs : {},
|
||||
type: "png",
|
||||
});
|
||||
const normalized = await normalizeBrowserScreenshot(labeled.buffer, {
|
||||
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
|
||||
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
|
||||
});
|
||||
await ensureMediaDir();
|
||||
const saved = await saveMediaBuffer(
|
||||
normalized.buffer,
|
||||
normalized.contentType ?? "image/png",
|
||||
"browser",
|
||||
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
|
||||
);
|
||||
const imageType = normalized.contentType?.includes("jpeg") ? "jpeg" : "png";
|
||||
return res.json({
|
||||
ok: true,
|
||||
format,
|
||||
targetId: tab.targetId,
|
||||
url: tab.url,
|
||||
labels: true,
|
||||
labelsCount: labeled.labels,
|
||||
labelsSkipped: labeled.skipped,
|
||||
imagePath: path.resolve(saved.path),
|
||||
imageType,
|
||||
...snap,
|
||||
});
|
||||
}
|
||||
|
||||
return res.json({
|
||||
ok: true,
|
||||
format,
|
||||
|
||||
@@ -11,6 +11,8 @@ export const browserCoreExamples = [
|
||||
"clawdbot browser screenshot --ref 12",
|
||||
"clawdbot browser snapshot",
|
||||
"clawdbot browser snapshot --format aria --limit 200",
|
||||
"clawdbot browser snapshot --efficient",
|
||||
"clawdbot browser snapshot --labels",
|
||||
];
|
||||
|
||||
export const browserActionExamples = [
|
||||
|
||||
@@ -48,17 +48,22 @@ export function registerBrowserInspectCommands(
|
||||
.option("--format <aria|ai>", "Snapshot format (default: ai)", "ai")
|
||||
.option("--target-id <id>", "CDP target id (or unique prefix)")
|
||||
.option("--limit <n>", "Max nodes (default: 500/800)", (v: string) => Number(v))
|
||||
.option("--mode <efficient>", "Snapshot preset (efficient)")
|
||||
.option("--efficient", "Use the efficient snapshot preset", false)
|
||||
.option("--interactive", "Role snapshot: interactive elements only", false)
|
||||
.option("--compact", "Role snapshot: compact output", false)
|
||||
.option("--depth <n>", "Role snapshot: max depth", (v: string) => Number(v))
|
||||
.option("--selector <sel>", "Role snapshot: scope to CSS selector")
|
||||
.option("--frame <sel>", "Role snapshot: scope to an iframe selector")
|
||||
.option("--labels", "Include viewport label overlay screenshot", false)
|
||||
.option("--out <path>", "Write snapshot to a file")
|
||||
.action(async (opts, cmd) => {
|
||||
const parent = parentOpts(cmd);
|
||||
const baseUrl = resolveBrowserControlUrl(parent?.url);
|
||||
const profile = parent?.browserProfile;
|
||||
const format = opts.format === "aria" ? "aria" : "ai";
|
||||
const mode =
|
||||
opts.efficient === true || opts.mode === "efficient" ? "efficient" : undefined;
|
||||
try {
|
||||
const result = await browserSnapshot(baseUrl, {
|
||||
format,
|
||||
@@ -69,6 +74,8 @@ export function registerBrowserInspectCommands(
|
||||
depth: Number.isFinite(opts.depth) ? opts.depth : undefined,
|
||||
selector: opts.selector?.trim() || undefined,
|
||||
frame: opts.frame?.trim() || undefined,
|
||||
labels: Boolean(opts.labels) || undefined,
|
||||
mode,
|
||||
profile,
|
||||
});
|
||||
|
||||
@@ -81,9 +88,24 @@ export function registerBrowserInspectCommands(
|
||||
await fs.writeFile(opts.out, payload, "utf8");
|
||||
}
|
||||
if (parent?.json) {
|
||||
defaultRuntime.log(JSON.stringify({ ok: true, out: opts.out }, null, 2));
|
||||
defaultRuntime.log(
|
||||
JSON.stringify(
|
||||
{
|
||||
ok: true,
|
||||
out: opts.out,
|
||||
...(result.format === "ai" && result.imagePath
|
||||
? { imagePath: result.imagePath }
|
||||
: {}),
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
} else {
|
||||
defaultRuntime.log(opts.out);
|
||||
if (result.format === "ai" && result.imagePath) {
|
||||
defaultRuntime.log(`MEDIA:${result.imagePath}`);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -95,6 +117,9 @@ export function registerBrowserInspectCommands(
|
||||
|
||||
if (result.format === "ai") {
|
||||
defaultRuntime.log(result.snapshot);
|
||||
if (result.imagePath) {
|
||||
defaultRuntime.log(`MEDIA:${result.imagePath}`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user