feat: role snapshot refs for browser

This commit is contained in:
Peter Steinberger
2026-01-12 08:36:20 +00:00
parent 6a7b812513
commit fadad6e061
10 changed files with 480 additions and 19 deletions

View File

@@ -250,6 +250,10 @@ export async function browserSnapshot(
targetId?: string;
limit?: number;
maxChars?: number;
interactive?: boolean;
compact?: boolean;
depth?: number;
selector?: string;
profile?: string;
},
): Promise<SnapshotResult> {
@@ -260,6 +264,12 @@ export async function browserSnapshot(
if (typeof opts.maxChars === "number" && Number.isFinite(opts.maxChars)) {
q.set("maxChars", String(opts.maxChars));
}
if (typeof opts.interactive === "boolean")
q.set("interactive", String(opts.interactive));
if (typeof opts.compact === "boolean") q.set("compact", String(opts.compact));
if (typeof opts.depth === "number" && Number.isFinite(opts.depth))
q.set("depth", String(opts.depth));
if (opts.selector?.trim()) q.set("selector", opts.selector.trim());
if (opts.profile) q.set("profile", opts.profile);
return await fetchBrowserJson<SnapshotResult>(
`${baseUrl}/snapshot?${q.toString()}`,

View File

@@ -24,6 +24,7 @@ export {
selectOptionViaPlaywright,
setInputFilesViaPlaywright,
snapshotAiViaPlaywright,
snapshotRoleViaPlaywright,
takeScreenshotViaPlaywright,
typeViaPlaywright,
waitForViaPlaywright,

View File

@@ -0,0 +1,45 @@
import { describe, expect, it } from "vitest";
import { buildRoleSnapshotFromAriaSnapshot } from "./pw-role-snapshot.js";
describe("pw-role-snapshot", () => {
it("adds refs for interactive elements", () => {
const aria = [
'- heading "Example" [level=1]',
"- paragraph: hello",
'- button "Submit"',
" - generic",
'- link "Learn more"',
].join("\n");
const res = buildRoleSnapshotFromAriaSnapshot(aria, { interactive: true });
expect(res.snapshot).toContain("[ref=e1]");
expect(res.snapshot).toContain("[ref=e2]");
expect(res.snapshot).toContain('- button "Submit" [ref=e1]');
expect(res.snapshot).toContain('- link "Learn more" [ref=e2]');
expect(Object.keys(res.refs)).toEqual(["e1", "e2"]);
expect(res.refs.e1).toMatchObject({ role: "button", name: "Submit" });
expect(res.refs.e2).toMatchObject({ role: "link", name: "Learn more" });
});
it("uses nth only when duplicates exist", () => {
const aria = ['- button "OK"', '- button "OK"', '- button "Cancel"'].join(
"\n",
);
const res = buildRoleSnapshotFromAriaSnapshot(aria);
expect(res.snapshot).toContain("[ref=e1]");
expect(res.snapshot).toContain("[ref=e2] [nth=1]");
expect(res.refs.e1?.nth).toBe(0);
expect(res.refs.e2?.nth).toBe(1);
expect(res.refs.e3?.nth).toBeUndefined();
});
it("respects maxDepth", () => {
const aria = ['- region "Main"', " - group", ' - button "Deep"'].join(
"\n",
);
const res = buildRoleSnapshotFromAriaSnapshot(aria, { maxDepth: 1 });
expect(res.snapshot).toContain('- region "Main"');
expect(res.snapshot).toContain(" - group");
expect(res.snapshot).not.toContain("button");
});
});

View File

@@ -0,0 +1,281 @@
export type RoleRef = {
role: string;
name?: string;
/** Index used only when role+name duplicates exist. */
nth?: number;
};
export type RoleRefMap = Record<string, RoleRef>;
export type RoleSnapshotOptions = {
/** Only include interactive elements (buttons, links, inputs, etc.). */
interactive?: boolean;
/** Maximum depth to include (0 = root only). */
maxDepth?: number;
/** Remove unnamed structural elements and empty branches. */
compact?: boolean;
};
const INTERACTIVE_ROLES = new Set([
"button",
"link",
"textbox",
"checkbox",
"radio",
"combobox",
"listbox",
"menuitem",
"menuitemcheckbox",
"menuitemradio",
"option",
"searchbox",
"slider",
"spinbutton",
"switch",
"tab",
"treeitem",
]);
const CONTENT_ROLES = new Set([
"heading",
"cell",
"gridcell",
"columnheader",
"rowheader",
"listitem",
"article",
"region",
"main",
"navigation",
]);
const STRUCTURAL_ROLES = new Set([
"generic",
"group",
"list",
"table",
"row",
"rowgroup",
"grid",
"treegrid",
"menu",
"menubar",
"toolbar",
"tablist",
"tree",
"directory",
"document",
"application",
"presentation",
"none",
]);
function getIndentLevel(line: string): number {
const match = line.match(/^(\s*)/);
return match ? Math.floor(match[1].length / 2) : 0;
}
type RoleNameTracker = {
counts: Map<string, number>;
refsByKey: Map<string, string[]>;
getKey: (role: string, name?: string) => string;
getNextIndex: (role: string, name?: string) => number;
trackRef: (role: string, name: string | undefined, ref: string) => void;
getDuplicateKeys: () => Set<string>;
};
function createRoleNameTracker(): RoleNameTracker {
const counts = new Map<string, number>();
const refsByKey = new Map<string, string[]>();
return {
counts,
refsByKey,
getKey(role: string, name?: string) {
return `${role}:${name ?? ""}`;
},
getNextIndex(role: string, name?: string) {
const key = this.getKey(role, name);
const current = counts.get(key) ?? 0;
counts.set(key, current + 1);
return current;
},
trackRef(role: string, name: string | undefined, ref: string) {
const key = this.getKey(role, name);
const list = refsByKey.get(key) ?? [];
list.push(ref);
refsByKey.set(key, list);
},
getDuplicateKeys() {
const out = new Set<string>();
for (const [key, refs] of refsByKey) {
if (refs.length > 1) out.add(key);
}
return out;
},
};
}
function removeNthFromNonDuplicates(
refs: RoleRefMap,
tracker: RoleNameTracker,
) {
const duplicates = tracker.getDuplicateKeys();
for (const [ref, data] of Object.entries(refs)) {
const key = tracker.getKey(data.role, data.name);
if (!duplicates.has(key)) delete refs[ref]?.nth;
}
}
function compactTree(tree: string) {
const lines = tree.split("\n");
const result: string[] = [];
for (let i = 0; i < lines.length; i += 1) {
const line = lines[i];
if (line.includes("[ref=")) {
result.push(line);
continue;
}
if (line.includes(":") && !line.trimEnd().endsWith(":")) {
result.push(line);
continue;
}
const currentIndent = getIndentLevel(line);
let hasRelevantChildren = false;
for (let j = i + 1; j < lines.length; j += 1) {
const childIndent = getIndentLevel(lines[j]);
if (childIndent <= currentIndent) break;
if (lines[j]?.includes("[ref=")) {
hasRelevantChildren = true;
break;
}
}
if (hasRelevantChildren) result.push(line);
}
return result.join("\n");
}
function processLine(
line: string,
refs: RoleRefMap,
options: RoleSnapshotOptions,
tracker: RoleNameTracker,
nextRef: () => string,
): string | null {
const depth = getIndentLevel(line);
if (options.maxDepth !== undefined && depth > options.maxDepth) return null;
const match = line.match(/^(\s*-\s*)(\w+)(?:\s+"([^"]*)")?(.*)$/);
if (!match) return options.interactive ? null : line;
const [, prefix, roleRaw, name, suffix] = match;
if (roleRaw.startsWith("/")) return options.interactive ? null : line;
const role = roleRaw.toLowerCase();
const isInteractive = INTERACTIVE_ROLES.has(role);
const isContent = CONTENT_ROLES.has(role);
const isStructural = STRUCTURAL_ROLES.has(role);
if (options.interactive && !isInteractive) return null;
if (options.compact && isStructural && !name) return null;
const shouldHaveRef = isInteractive || (isContent && name);
if (!shouldHaveRef) return line;
const ref = nextRef();
const nth = tracker.getNextIndex(role, name);
tracker.trackRef(role, name, ref);
refs[ref] = {
role,
name,
nth,
};
let enhanced = `${prefix}${roleRaw}`;
if (name) enhanced += ` "${name}"`;
enhanced += ` [ref=${ref}]`;
if (nth > 0) enhanced += ` [nth=${nth}]`;
if (suffix) enhanced += suffix;
return enhanced;
}
export function parseRoleRef(raw: string): string | null {
const trimmed = raw.trim();
if (!trimmed) return null;
const normalized = trimmed.startsWith("@")
? trimmed.slice(1)
: trimmed.startsWith("ref=")
? trimmed.slice(4)
: trimmed;
return /^e\d+$/.test(normalized) ? normalized : null;
}
export function buildRoleSnapshotFromAriaSnapshot(
ariaSnapshot: string,
options: RoleSnapshotOptions = {},
): { snapshot: string; refs: RoleRefMap } {
const lines = ariaSnapshot.split("\n");
const refs: RoleRefMap = {};
const tracker = createRoleNameTracker();
let counter = 0;
const nextRef = () => {
counter += 1;
return `e${counter}`;
};
if (options.interactive) {
const result: string[] = [];
for (const line of lines) {
const depth = getIndentLevel(line);
if (options.maxDepth !== undefined && depth > options.maxDepth) continue;
const match = line.match(/^(\s*-\s*)(\w+)(?:\s+"([^"]*)")?(.*)$/);
if (!match) continue;
const [, , roleRaw, name, suffix] = match;
if (roleRaw.startsWith("/")) continue;
const role = roleRaw.toLowerCase();
if (!INTERACTIVE_ROLES.has(role)) continue;
const ref = nextRef();
const nth = tracker.getNextIndex(role, name);
tracker.trackRef(role, name, ref);
refs[ref] = {
role,
name,
nth,
};
let enhanced = `- ${roleRaw}`;
if (name) enhanced += ` "${name}"`;
enhanced += ` [ref=${ref}]`;
if (nth > 0) enhanced += ` [nth=${nth}]`;
if (suffix.includes("[")) enhanced += suffix;
result.push(enhanced);
}
removeNthFromNonDuplicates(refs, tracker);
return {
snapshot: result.join("\n") || "(no interactive elements)",
refs,
};
}
const result: string[] = [];
for (const line of lines) {
const processed = processLine(line, refs, options, tracker, nextRef);
if (processed !== null) result.push(processed);
}
removeNthFromNonDuplicates(refs, tracker);
const tree = result.join("\n") || "(empty)";
return {
snapshot: options.compact ? compactTree(tree) : tree,
refs,
};
}

View File

@@ -39,6 +39,11 @@ type PageState = {
console: BrowserConsoleMessage[];
armIdUpload: number;
armIdDialog: number;
/**
* Role-based refs from the last role snapshot (e.g. e1/e2).
* These refs are NOT Playwright's `aria-ref` values.
*/
roleRefs?: Record<string, { role: string; name?: string; nth?: number }>;
};
const pageStates = new WeakMap<Page, PageState>();
@@ -189,7 +194,27 @@ export async function getPageForTargetId(opts: {
}
export function refLocator(page: Page, ref: string) {
return page.locator(`aria-ref=${ref}`);
const normalized = ref.startsWith("@")
? ref.slice(1)
: ref.startsWith("ref=")
? ref.slice(4)
: ref;
if (/^e\d+$/.test(normalized)) {
const state = pageStates.get(page);
const info = state?.roleRefs?.[normalized];
if (!info) {
throw new Error(
`Unknown ref "${normalized}". Run a new snapshot and use a ref from that snapshot.`,
);
}
const locator = info.name
? page.getByRole(info.role as never, { name: info.name, exact: true })
: page.getByRole(info.role as never);
return info.nth !== undefined ? locator.nth(info.nth) : locator;
}
return page.locator(`aria-ref=${normalized}`);
}
export async function closePlaywrightBrowserConnection(): Promise<void> {

View File

@@ -1,4 +1,9 @@
import type { BrowserFormField } from "./client-actions-core.js";
import {
buildRoleSnapshotFromAriaSnapshot,
parseRoleRef,
type RoleSnapshotOptions,
} from "./pw-role-snapshot.js";
import {
type BrowserConsoleMessage,
ensurePageState,
@@ -11,7 +16,9 @@ let nextUploadArmId = 0;
let nextDialogArmId = 0;
function requireRef(value: unknown): string {
const ref = typeof value === "string" ? value.trim() : "";
const raw = typeof value === "string" ? value.trim() : "";
const roleRef = raw ? parseRoleRef(raw) : null;
const ref = roleRef ?? (raw.startsWith("@") ? raw.slice(1) : raw);
if (!ref) throw new Error("ref is required");
return ref;
}
@@ -55,6 +62,31 @@ export async function snapshotAiViaPlaywright(opts: {
return { snapshot };
}
export async function snapshotRoleViaPlaywright(opts: {
cdpUrl: string;
targetId?: string;
selector?: string;
options?: RoleSnapshotOptions;
}): Promise<{ snapshot: string }> {
const page = await getPageForTargetId({
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
});
const state = ensurePageState(page);
const locator = opts.selector?.trim()
? page.locator(opts.selector.trim())
: page.locator(":root");
const ariaSnapshot = await locator.ariaSnapshot();
const built = buildRoleSnapshotFromAriaSnapshot(
String(ariaSnapshot ?? ""),
opts.options,
);
state.roleRefs = built.refs;
return { snapshot: built.snapshot };
}
export async function clickViaPlaywright(opts: {
cdpUrl: string;
targetId?: string;
@@ -95,8 +127,7 @@ export async function hoverViaPlaywright(opts: {
ref: string;
timeoutMs?: number;
}): Promise<void> {
const ref = String(opts.ref ?? "").trim();
if (!ref) throw new Error("ref is required");
const ref = requireRef(opts.ref);
const page = await getPageForTargetId(opts);
ensurePageState(page);
await refLocator(page, ref).hover({
@@ -111,8 +142,8 @@ export async function dragViaPlaywright(opts: {
endRef: string;
timeoutMs?: number;
}): Promise<void> {
const startRef = String(opts.startRef ?? "").trim();
const endRef = String(opts.endRef ?? "").trim();
const startRef = requireRef(opts.startRef);
const endRef = requireRef(opts.endRef);
if (!startRef || !endRef) throw new Error("startRef and endRef are required");
const page = await getPageForTargetId(opts);
ensurePageState(page);
@@ -128,8 +159,7 @@ export async function selectOptionViaPlaywright(opts: {
values: string[];
timeoutMs?: number;
}): Promise<void> {
const ref = String(opts.ref ?? "").trim();
if (!ref) throw new Error("ref is required");
const ref = requireRef(opts.ref);
if (!opts.values?.length) throw new Error("values are required");
const page = await getPageForTargetId(opts);
ensurePageState(page);

View File

@@ -573,17 +573,55 @@ export function registerBrowserAgentRoutes(
maxCharsRaw > 0
? Math.floor(maxCharsRaw)
: undefined;
const interactive = toBoolean(req.query.interactive);
const compact = toBoolean(req.query.compact);
const depth = toNumber(req.query.depth);
const selector = toStringOrEmpty(req.query.selector);
try {
const tab = await profileCtx.ensureTabAvailable(targetId || undefined);
if (format === "ai") {
const pw = await requirePwAi(res, "ai snapshot");
if (!pw) return;
const snap = await pw.snapshotAiViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
...(maxChars ? { maxChars } : {}),
});
const wantsRoleSnapshot =
interactive === true ||
compact === true ||
depth !== undefined ||
Boolean(selector.trim());
const snap = wantsRoleSnapshot
? await pw.snapshotRoleViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
selector: selector.trim() || undefined,
options: {
interactive: interactive ?? undefined,
compact: compact ?? undefined,
maxDepth: depth ?? undefined,
},
})
: await pw
.snapshotAiViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
...(maxChars ? { maxChars } : {}),
})
.catch(async (err) => {
// Public-API fallback when Playwright's private _snapshotForAI is missing.
if (String(err).toLowerCase().includes("_snapshotforai")) {
return await pw.snapshotRoleViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
selector: selector.trim() || undefined,
options: {
interactive: interactive ?? undefined,
compact: compact ?? undefined,
maxDepth: depth ?? undefined,
},
});
}
throw err;
});
return res.json({
ok: true,
format,