feat(browser): add snapshot refs=aria mode
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
- Browser: ship a built-in `chrome` profile for extension relay and start the relay automatically when running locally.
|
||||
- Browser: default `browser.defaultProfile` to `chrome` (existing Chrome takeover mode).
|
||||
- Browser: add `clawdbot browser extension install/path` and copy extension path to clipboard.
|
||||
- Browser: add `snapshot refs=aria` (Playwright aria-ref ids) for self-resolving refs across `snapshot` → `act`.
|
||||
- Control UI: show raw any-map entries in config views; move Docs link into the left nav.
|
||||
|
||||
#### Plugins
|
||||
|
||||
@@ -39,6 +39,7 @@ const BROWSER_TARGETS = ["sandbox", "host", "custom"] as const;
|
||||
|
||||
const BROWSER_SNAPSHOT_FORMATS = ["aria", "ai"] as const;
|
||||
const BROWSER_SNAPSHOT_MODES = ["efficient"] as const;
|
||||
const BROWSER_SNAPSHOT_REFS = ["role", "aria"] as const;
|
||||
|
||||
const BROWSER_IMAGE_TYPES = ["png", "jpeg"] as const;
|
||||
|
||||
@@ -91,6 +92,7 @@ export const BrowserToolSchema = Type.Object({
|
||||
maxChars: Type.Optional(Type.Number()),
|
||||
mode: optionalStringEnum(BROWSER_SNAPSHOT_MODES),
|
||||
format: optionalStringEnum(BROWSER_SNAPSHOT_FORMATS),
|
||||
refs: optionalStringEnum(BROWSER_SNAPSHOT_REFS),
|
||||
interactive: Type.Optional(Type.Boolean()),
|
||||
compact: Type.Optional(Type.Boolean()),
|
||||
depth: Type.Optional(Type.Number()),
|
||||
|
||||
@@ -121,6 +121,19 @@ describe("browser tool snapshot maxChars", () => {
|
||||
|
||||
expect(browserClientMocks.browserProfiles).toHaveBeenCalledWith("http://127.0.0.1:18791");
|
||||
});
|
||||
|
||||
it("passes refs mode through to browser snapshot", async () => {
|
||||
const tool = createBrowserTool();
|
||||
await tool.execute?.(null, { action: "snapshot", format: "ai", refs: "aria" });
|
||||
|
||||
expect(browserClientMocks.browserSnapshot).toHaveBeenCalledWith(
|
||||
"http://127.0.0.1:18791",
|
||||
expect.objectContaining({
|
||||
format: "ai",
|
||||
refs: "aria",
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("browser tool snapshot labels", () => {
|
||||
|
||||
@@ -128,6 +128,7 @@ export function createBrowserTool(opts?: {
|
||||
'Profiles: use profile="chrome" for Chrome extension relay takeover (your existing Chrome tabs). Use profile="clawd" for the isolated clawd-managed browser.',
|
||||
"Chrome extension relay needs an attached tab: user must click the Clawdbot Browser Relay toolbar icon on the tab (badge ON). If no tab is connected, ask them to attach it.",
|
||||
"When using refs from snapshot (e.g. e12), keep the same tab: prefer passing targetId from the snapshot response into subsequent actions (act/click/type/etc).",
|
||||
'For stable, self-resolving refs across calls, use snapshot with refs="aria" (Playwright aria-ref ids). Default refs="role" are role+name-based.',
|
||||
"Use snapshot+act for UI automation. Avoid act:wait by default; use only in exceptional cases when no reliable UI state exists.",
|
||||
`target selects browser location (sandbox|host|custom). Default: ${targetDefault}.`,
|
||||
"controlUrl implies target=custom (remote control server).",
|
||||
@@ -190,6 +191,7 @@ export function createBrowserTool(opts?: {
|
||||
: "ai";
|
||||
const mode = params.mode === "efficient" ? "efficient" : undefined;
|
||||
const labels = typeof params.labels === "boolean" ? params.labels : undefined;
|
||||
const refs = params.refs === "aria" || params.refs === "role" ? params.refs : undefined;
|
||||
const hasMaxChars = Object.hasOwn(params, "maxChars");
|
||||
const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
|
||||
const limit =
|
||||
@@ -224,6 +226,7 @@ export function createBrowserTool(opts?: {
|
||||
targetId,
|
||||
limit,
|
||||
...(typeof resolvedMaxChars === "number" ? { maxChars: resolvedMaxChars } : {}),
|
||||
refs,
|
||||
interactive,
|
||||
compact,
|
||||
depth,
|
||||
|
||||
@@ -118,6 +118,36 @@ describe("browser client", () => {
|
||||
expect(parsed.searchParams.get("mode")).toBe("efficient");
|
||||
});
|
||||
|
||||
it("adds refs=aria to snapshots when requested", async () => {
|
||||
const calls: string[] = [];
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn(async (url: string) => {
|
||||
calls.push(url);
|
||||
return {
|
||||
ok: true,
|
||||
json: async () => ({
|
||||
ok: true,
|
||||
format: "ai",
|
||||
targetId: "t1",
|
||||
url: "https://x",
|
||||
snapshot: "ok",
|
||||
}),
|
||||
} as unknown as Response;
|
||||
}),
|
||||
);
|
||||
|
||||
await browserSnapshot("http://127.0.0.1:18791", {
|
||||
format: "ai",
|
||||
refs: "aria",
|
||||
});
|
||||
|
||||
const snapshotCall = calls.find((url) => url.includes("/snapshot?"));
|
||||
expect(snapshotCall).toBeTruthy();
|
||||
const parsed = new URL(snapshotCall as string);
|
||||
expect(parsed.searchParams.get("refs")).toBe("aria");
|
||||
});
|
||||
|
||||
it("uses the expected endpoints + methods for common calls", async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
|
||||
|
||||
@@ -270,6 +270,7 @@ export async function browserSnapshot(
|
||||
targetId?: string;
|
||||
limit?: number;
|
||||
maxChars?: number;
|
||||
refs?: "role" | "aria";
|
||||
interactive?: boolean;
|
||||
compact?: boolean;
|
||||
depth?: number;
|
||||
@@ -287,6 +288,7 @@ export async function browserSnapshot(
|
||||
if (typeof opts.maxChars === "number" && Number.isFinite(opts.maxChars)) {
|
||||
q.set("maxChars", String(opts.maxChars));
|
||||
}
|
||||
if (opts.refs === "aria" || opts.refs === "role") q.set("refs", opts.refs);
|
||||
if (typeof opts.interactive === "boolean") q.set("interactive", String(opts.interactive));
|
||||
if (typeof opts.compact === "boolean") q.set("compact", String(opts.compact));
|
||||
if (typeof opts.depth === "number" && Number.isFinite(opts.depth))
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import {
|
||||
buildRoleSnapshotFromAiSnapshot,
|
||||
buildRoleSnapshotFromAriaSnapshot,
|
||||
getRoleSnapshotStats,
|
||||
parseRoleRef,
|
||||
@@ -67,4 +68,24 @@ describe("pw-role-snapshot", () => {
|
||||
expect(parseRoleRef("12")).toBeNull();
|
||||
expect(parseRoleRef("")).toBeNull();
|
||||
});
|
||||
|
||||
it("preserves Playwright aria-ref ids in ai snapshots", () => {
|
||||
const ai = [
|
||||
'- navigation [ref=e1]:',
|
||||
' - link "Home" [ref=e5]',
|
||||
' - heading "Title" [ref=e6]',
|
||||
' - button "Save" [ref=e7] [cursor=pointer]:',
|
||||
" - paragraph: hello",
|
||||
].join("\n");
|
||||
|
||||
const res = buildRoleSnapshotFromAiSnapshot(ai, { interactive: true });
|
||||
expect(res.snapshot).toContain('[ref=e5]');
|
||||
expect(res.snapshot).toContain('- link "Home"');
|
||||
expect(res.snapshot).toContain('- button "Save"');
|
||||
expect(res.snapshot).not.toContain("navigation");
|
||||
expect(res.snapshot).not.toContain("heading");
|
||||
expect(Object.keys(res.refs).sort()).toEqual(["e5", "e7"]);
|
||||
expect(res.refs.e5).toMatchObject({ role: "link", name: "Home" });
|
||||
expect(res.refs.e7).toMatchObject({ role: "button", name: "Save" });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -293,3 +293,75 @@ export function buildRoleSnapshotFromAriaSnapshot(
|
||||
refs,
|
||||
};
|
||||
}
|
||||
|
||||
function parseAiSnapshotRef(suffix: string): string | null {
|
||||
const match = suffix.match(/\[ref=(e\d+)\]/i);
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a role snapshot from Playwright's AI snapshot output while preserving Playwright's own
|
||||
* aria-ref ids (e.g. ref=e13). This makes the refs self-resolving across calls.
|
||||
*/
|
||||
export function buildRoleSnapshotFromAiSnapshot(
|
||||
aiSnapshot: string,
|
||||
options: RoleSnapshotOptions = {},
|
||||
): { snapshot: string; refs: RoleRefMap } {
|
||||
const lines = String(aiSnapshot ?? "").split("\n");
|
||||
const refs: RoleRefMap = {};
|
||||
|
||||
if (options.interactive) {
|
||||
const out: string[] = [];
|
||||
for (const line of lines) {
|
||||
const depth = getIndentLevel(line);
|
||||
if (options.maxDepth !== undefined && depth > options.maxDepth) continue;
|
||||
const match = line.match(/^(\s*-\s*)(\w+)(?:\s+"([^"]*)")?(.*)$/);
|
||||
if (!match) continue;
|
||||
const [, , roleRaw, name, suffix] = match;
|
||||
if (roleRaw.startsWith("/")) continue;
|
||||
const role = roleRaw.toLowerCase();
|
||||
if (!INTERACTIVE_ROLES.has(role)) continue;
|
||||
const ref = parseAiSnapshotRef(suffix);
|
||||
if (!ref) continue;
|
||||
refs[ref] = { role, ...(name ? { name } : {}) };
|
||||
out.push(`- ${roleRaw}${name ? ` "${name}"` : ""}${suffix}`);
|
||||
}
|
||||
return {
|
||||
snapshot: out.join("\n") || "(no interactive elements)",
|
||||
refs,
|
||||
};
|
||||
}
|
||||
|
||||
const out: string[] = [];
|
||||
for (const line of lines) {
|
||||
const depth = getIndentLevel(line);
|
||||
if (options.maxDepth !== undefined && depth > options.maxDepth) continue;
|
||||
|
||||
const match = line.match(/^(\s*-\s*)(\w+)(?:\s+"([^"]*)")?(.*)$/);
|
||||
if (!match) {
|
||||
out.push(line);
|
||||
continue;
|
||||
}
|
||||
const [, , roleRaw, name, suffix] = match;
|
||||
if (roleRaw.startsWith("/")) {
|
||||
out.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
const role = roleRaw.toLowerCase();
|
||||
const isStructural = STRUCTURAL_ROLES.has(role);
|
||||
|
||||
if (options.compact && isStructural && !name) continue;
|
||||
|
||||
const ref = parseAiSnapshotRef(suffix);
|
||||
if (ref) refs[ref] = { role, ...(name ? { name } : {}) };
|
||||
|
||||
out.push(line);
|
||||
}
|
||||
|
||||
const tree = out.join("\n") || "(empty)";
|
||||
return {
|
||||
snapshot: options.compact ? compactTree(tree) : tree,
|
||||
refs,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -62,6 +62,16 @@ describe("pw-session refLocator", () => {
|
||||
|
||||
expect(mocks.getByRole).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("uses aria-ref locators when refs mode is aria", () => {
|
||||
const { page, mocks } = fakePage();
|
||||
const state = ensurePageState(page);
|
||||
state.roleRefsMode = "aria";
|
||||
|
||||
refLocator(page, "e1");
|
||||
|
||||
expect(mocks.locator).toHaveBeenCalledWith("aria-ref=e1");
|
||||
});
|
||||
});
|
||||
|
||||
describe("pw-session role refs cache", () => {
|
||||
|
||||
@@ -64,9 +64,11 @@ type PageState = {
|
||||
armIdDownload: number;
|
||||
/**
|
||||
* Role-based refs from the last role snapshot (e.g. e1/e2).
|
||||
* These refs are NOT Playwright's `aria-ref` values.
|
||||
* Mode "role" refs are generated from ariaSnapshot and resolved via getByRole.
|
||||
* Mode "aria" refs are Playwright aria-ref ids and resolved via `aria-ref=...`.
|
||||
*/
|
||||
roleRefs?: Record<string, { role: string; name?: string; nth?: number }>;
|
||||
roleRefsMode?: "role" | "aria";
|
||||
roleRefsFrameSelector?: string;
|
||||
};
|
||||
|
||||
@@ -74,6 +76,7 @@ type RoleRefs = NonNullable<PageState["roleRefs"]>;
|
||||
type RoleRefsCacheEntry = {
|
||||
refs: RoleRefs;
|
||||
frameSelector?: string;
|
||||
mode?: NonNullable<PageState["roleRefsMode"]>;
|
||||
};
|
||||
|
||||
type ContextState = {
|
||||
@@ -110,12 +113,14 @@ export function rememberRoleRefsForTarget(opts: {
|
||||
targetId: string;
|
||||
refs: RoleRefs;
|
||||
frameSelector?: string;
|
||||
mode?: NonNullable<PageState["roleRefsMode"]>;
|
||||
}): void {
|
||||
const targetId = opts.targetId.trim();
|
||||
if (!targetId) return;
|
||||
roleRefsByTarget.set(roleRefsKey(opts.cdpUrl, targetId), {
|
||||
refs: opts.refs,
|
||||
...(opts.frameSelector ? { frameSelector: opts.frameSelector } : {}),
|
||||
...(opts.mode ? { mode: opts.mode } : {}),
|
||||
});
|
||||
while (roleRefsByTarget.size > MAX_ROLE_REFS_CACHE) {
|
||||
const first = roleRefsByTarget.keys().next();
|
||||
@@ -137,6 +142,7 @@ export function restoreRoleRefsForTarget(opts: {
|
||||
if (state.roleRefs) return;
|
||||
state.roleRefs = cached.refs;
|
||||
state.roleRefsFrameSelector = cached.frameSelector;
|
||||
state.roleRefsMode = cached.mode;
|
||||
}
|
||||
|
||||
export function ensurePageState(page: Page): PageState {
|
||||
@@ -339,6 +345,12 @@ export function refLocator(page: Page, ref: string) {
|
||||
|
||||
if (/^e\d+$/.test(normalized)) {
|
||||
const state = pageStates.get(page);
|
||||
if (state?.roleRefsMode === "aria") {
|
||||
const scope = state.roleRefsFrameSelector
|
||||
? page.frameLocator(state.roleRefsFrameSelector)
|
||||
: page;
|
||||
return scope.locator(`aria-ref=${normalized}`);
|
||||
}
|
||||
const info = state?.roleRefs?.[normalized];
|
||||
if (!info) {
|
||||
throw new Error(
|
||||
|
||||
@@ -265,6 +265,7 @@ export async function scrollIntoViewViaPlaywright(opts: {
|
||||
}): Promise<void> {
|
||||
const page = await getPageForTargetId(opts);
|
||||
ensurePageState(page);
|
||||
restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page });
|
||||
const timeout = normalizeTimeoutMs(opts.timeoutMs, 20_000);
|
||||
|
||||
const ref = requireRef(opts.ref);
|
||||
@@ -340,6 +341,7 @@ export async function takeScreenshotViaPlaywright(opts: {
|
||||
}): Promise<{ buffer: Buffer }> {
|
||||
const page = await getPageForTargetId(opts);
|
||||
ensurePageState(page);
|
||||
restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page });
|
||||
const type = opts.type ?? "png";
|
||||
if (opts.ref) {
|
||||
if (opts.fullPage) throw new Error("fullPage is not supported for element screenshots");
|
||||
@@ -369,6 +371,7 @@ export async function screenshotWithLabelsViaPlaywright(opts: {
|
||||
}): Promise<{ buffer: Buffer; labels: number; skipped: number }> {
|
||||
const page = await getPageForTargetId(opts);
|
||||
ensurePageState(page);
|
||||
restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page });
|
||||
const type = opts.type ?? "png";
|
||||
const maxLabels =
|
||||
typeof opts.maxLabels === "number" && Number.isFinite(opts.maxLabels)
|
||||
@@ -495,6 +498,7 @@ export async function setInputFilesViaPlaywright(opts: {
|
||||
}): Promise<void> {
|
||||
const page = await getPageForTargetId(opts);
|
||||
ensurePageState(page);
|
||||
restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page });
|
||||
if (!opts.paths.length) throw new Error("paths are required");
|
||||
const inputRef = typeof opts.inputRef === "string" ? opts.inputRef.trim() : "";
|
||||
const element = typeof opts.element === "string" ? opts.element.trim() : "";
|
||||
|
||||
@@ -2,6 +2,7 @@ import type { Page } from "playwright-core";
|
||||
|
||||
import { type AriaSnapshotNode, formatAriaSnapshot, type RawAXNode } from "./cdp.js";
|
||||
import {
|
||||
buildRoleSnapshotFromAiSnapshot,
|
||||
buildRoleSnapshotFromAriaSnapshot,
|
||||
getRoleSnapshotStats,
|
||||
type RoleSnapshotOptions,
|
||||
@@ -76,6 +77,7 @@ export async function snapshotRoleViaPlaywright(opts: {
|
||||
targetId?: string;
|
||||
selector?: string;
|
||||
frameSelector?: string;
|
||||
refsMode?: "role" | "aria";
|
||||
options?: RoleSnapshotOptions;
|
||||
}): Promise<{
|
||||
snapshot: string;
|
||||
@@ -88,6 +90,37 @@ export async function snapshotRoleViaPlaywright(opts: {
|
||||
});
|
||||
const state = ensurePageState(page);
|
||||
|
||||
if (opts.refsMode === "aria") {
|
||||
if (opts.selector?.trim() || opts.frameSelector?.trim()) {
|
||||
throw new Error("refs=aria does not support selector/frame snapshots yet.");
|
||||
}
|
||||
const maybe = page as unknown as WithSnapshotForAI;
|
||||
if (!maybe._snapshotForAI) {
|
||||
throw new Error("refs=aria requires Playwright _snapshotForAI support.");
|
||||
}
|
||||
const result = await maybe._snapshotForAI({
|
||||
timeout: 5000,
|
||||
track: "response",
|
||||
});
|
||||
const built = buildRoleSnapshotFromAiSnapshot(String(result?.full ?? ""), opts.options);
|
||||
state.roleRefs = built.refs;
|
||||
state.roleRefsFrameSelector = undefined;
|
||||
state.roleRefsMode = "aria";
|
||||
if (opts.targetId) {
|
||||
rememberRoleRefsForTarget({
|
||||
cdpUrl: opts.cdpUrl,
|
||||
targetId: opts.targetId,
|
||||
refs: built.refs,
|
||||
mode: "aria",
|
||||
});
|
||||
}
|
||||
return {
|
||||
snapshot: built.snapshot,
|
||||
refs: built.refs,
|
||||
stats: getRoleSnapshotStats(built.snapshot, built.refs),
|
||||
};
|
||||
}
|
||||
|
||||
const frameSelector = opts.frameSelector?.trim() || "";
|
||||
const selector = opts.selector?.trim() || "";
|
||||
const locator = frameSelector
|
||||
@@ -102,12 +135,14 @@ export async function snapshotRoleViaPlaywright(opts: {
|
||||
const built = buildRoleSnapshotFromAriaSnapshot(String(ariaSnapshot ?? ""), opts.options);
|
||||
state.roleRefs = built.refs;
|
||||
state.roleRefsFrameSelector = frameSelector || undefined;
|
||||
state.roleRefsMode = "role";
|
||||
if (opts.targetId) {
|
||||
rememberRoleRefsForTarget({
|
||||
cdpUrl: opts.cdpUrl,
|
||||
targetId: opts.targetId,
|
||||
refs: built.refs,
|
||||
frameSelector: frameSelector || undefined,
|
||||
mode: "role",
|
||||
});
|
||||
}
|
||||
return {
|
||||
|
||||
@@ -169,6 +169,8 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
|
||||
const interactiveRaw = toBoolean(req.query.interactive);
|
||||
const compactRaw = toBoolean(req.query.compact);
|
||||
const depthRaw = toNumber(req.query.depth);
|
||||
const refsModeRaw = toStringOrEmpty(req.query.refs).trim();
|
||||
const refsMode = refsModeRaw === "aria" ? "aria" : refsModeRaw === "role" ? "role" : undefined;
|
||||
const interactive = interactiveRaw ?? (mode === "efficient" ? true : undefined);
|
||||
const compact = compactRaw ?? (mode === "efficient" ? true : undefined);
|
||||
const depth =
|
||||
@@ -199,6 +201,7 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
|
||||
targetId: tab.targetId,
|
||||
selector: selector.trim() || undefined,
|
||||
frameSelector: frameSelector.trim() || undefined,
|
||||
refsMode,
|
||||
options: {
|
||||
interactive: interactive ?? undefined,
|
||||
compact: compact ?? undefined,
|
||||
@@ -219,6 +222,7 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
|
||||
targetId: tab.targetId,
|
||||
selector: selector.trim() || undefined,
|
||||
frameSelector: frameSelector.trim() || undefined,
|
||||
refsMode,
|
||||
options: {
|
||||
interactive: interactive ?? undefined,
|
||||
compact: compact ?? undefined,
|
||||
|
||||
Reference in New Issue
Block a user