feat(browser): add snapshot refs=aria mode

This commit is contained in:
Peter Steinberger
2026-01-15 10:16:33 +00:00
parent 0facc63019
commit 4f1a4ab072
13 changed files with 210 additions and 1 deletions

View File

@@ -118,6 +118,36 @@ describe("browser client", () => {
expect(parsed.searchParams.get("mode")).toBe("efficient");
});
it("adds refs=aria to snapshots when requested", async () => {
const calls: string[] = [];
vi.stubGlobal(
"fetch",
vi.fn(async (url: string) => {
calls.push(url);
return {
ok: true,
json: async () => ({
ok: true,
format: "ai",
targetId: "t1",
url: "https://x",
snapshot: "ok",
}),
} as unknown as Response;
}),
);
await browserSnapshot("http://127.0.0.1:18791", {
format: "ai",
refs: "aria",
});
const snapshotCall = calls.find((url) => url.includes("/snapshot?"));
expect(snapshotCall).toBeTruthy();
const parsed = new URL(snapshotCall as string);
expect(parsed.searchParams.get("refs")).toBe("aria");
});
it("uses the expected endpoints + methods for common calls", async () => {
const calls: Array<{ url: string; init?: RequestInit }> = [];

View File

@@ -270,6 +270,7 @@ export async function browserSnapshot(
targetId?: string;
limit?: number;
maxChars?: number;
refs?: "role" | "aria";
interactive?: boolean;
compact?: boolean;
depth?: number;
@@ -287,6 +288,7 @@ export async function browserSnapshot(
if (typeof opts.maxChars === "number" && Number.isFinite(opts.maxChars)) {
q.set("maxChars", String(opts.maxChars));
}
if (opts.refs === "aria" || opts.refs === "role") q.set("refs", opts.refs);
if (typeof opts.interactive === "boolean") q.set("interactive", String(opts.interactive));
if (typeof opts.compact === "boolean") q.set("compact", String(opts.compact));
if (typeof opts.depth === "number" && Number.isFinite(opts.depth))

View File

@@ -1,6 +1,7 @@
import { describe, expect, it } from "vitest";
import {
buildRoleSnapshotFromAiSnapshot,
buildRoleSnapshotFromAriaSnapshot,
getRoleSnapshotStats,
parseRoleRef,
@@ -67,4 +68,24 @@ describe("pw-role-snapshot", () => {
expect(parseRoleRef("12")).toBeNull();
expect(parseRoleRef("")).toBeNull();
});
it("preserves Playwright aria-ref ids in ai snapshots", () => {
const ai = [
'- navigation [ref=e1]:',
' - link "Home" [ref=e5]',
' - heading "Title" [ref=e6]',
' - button "Save" [ref=e7] [cursor=pointer]:',
" - paragraph: hello",
].join("\n");
const res = buildRoleSnapshotFromAiSnapshot(ai, { interactive: true });
expect(res.snapshot).toContain('[ref=e5]');
expect(res.snapshot).toContain('- link "Home"');
expect(res.snapshot).toContain('- button "Save"');
expect(res.snapshot).not.toContain("navigation");
expect(res.snapshot).not.toContain("heading");
expect(Object.keys(res.refs).sort()).toEqual(["e5", "e7"]);
expect(res.refs.e5).toMatchObject({ role: "link", name: "Home" });
expect(res.refs.e7).toMatchObject({ role: "button", name: "Save" });
});
});

View File

@@ -293,3 +293,75 @@ export function buildRoleSnapshotFromAriaSnapshot(
refs,
};
}
function parseAiSnapshotRef(suffix: string): string | null {
const match = suffix.match(/\[ref=(e\d+)\]/i);
return match ? match[1] : null;
}
/**
* Build a role snapshot from Playwright's AI snapshot output while preserving Playwright's own
* aria-ref ids (e.g. ref=e13). This makes the refs self-resolving across calls.
*/
export function buildRoleSnapshotFromAiSnapshot(
aiSnapshot: string,
options: RoleSnapshotOptions = {},
): { snapshot: string; refs: RoleRefMap } {
const lines = String(aiSnapshot ?? "").split("\n");
const refs: RoleRefMap = {};
if (options.interactive) {
const out: string[] = [];
for (const line of lines) {
const depth = getIndentLevel(line);
if (options.maxDepth !== undefined && depth > options.maxDepth) continue;
const match = line.match(/^(\s*-\s*)(\w+)(?:\s+"([^"]*)")?(.*)$/);
if (!match) continue;
const [, , roleRaw, name, suffix] = match;
if (roleRaw.startsWith("/")) continue;
const role = roleRaw.toLowerCase();
if (!INTERACTIVE_ROLES.has(role)) continue;
const ref = parseAiSnapshotRef(suffix);
if (!ref) continue;
refs[ref] = { role, ...(name ? { name } : {}) };
out.push(`- ${roleRaw}${name ? ` "${name}"` : ""}${suffix}`);
}
return {
snapshot: out.join("\n") || "(no interactive elements)",
refs,
};
}
const out: string[] = [];
for (const line of lines) {
const depth = getIndentLevel(line);
if (options.maxDepth !== undefined && depth > options.maxDepth) continue;
const match = line.match(/^(\s*-\s*)(\w+)(?:\s+"([^"]*)")?(.*)$/);
if (!match) {
out.push(line);
continue;
}
const [, , roleRaw, name, suffix] = match;
if (roleRaw.startsWith("/")) {
out.push(line);
continue;
}
const role = roleRaw.toLowerCase();
const isStructural = STRUCTURAL_ROLES.has(role);
if (options.compact && isStructural && !name) continue;
const ref = parseAiSnapshotRef(suffix);
if (ref) refs[ref] = { role, ...(name ? { name } : {}) };
out.push(line);
}
const tree = out.join("\n") || "(empty)";
return {
snapshot: options.compact ? compactTree(tree) : tree,
refs,
};
}

View File

@@ -62,6 +62,16 @@ describe("pw-session refLocator", () => {
expect(mocks.getByRole).toHaveBeenCalled();
});
it("uses aria-ref locators when refs mode is aria", () => {
const { page, mocks } = fakePage();
const state = ensurePageState(page);
state.roleRefsMode = "aria";
refLocator(page, "e1");
expect(mocks.locator).toHaveBeenCalledWith("aria-ref=e1");
});
});
describe("pw-session role refs cache", () => {

View File

@@ -64,9 +64,11 @@ type PageState = {
armIdDownload: number;
/**
* Role-based refs from the last role snapshot (e.g. e1/e2).
* These refs are NOT Playwright's `aria-ref` values.
* Mode "role" refs are generated from ariaSnapshot and resolved via getByRole.
* Mode "aria" refs are Playwright aria-ref ids and resolved via `aria-ref=...`.
*/
roleRefs?: Record<string, { role: string; name?: string; nth?: number }>;
roleRefsMode?: "role" | "aria";
roleRefsFrameSelector?: string;
};
@@ -74,6 +76,7 @@ type RoleRefs = NonNullable<PageState["roleRefs"]>;
type RoleRefsCacheEntry = {
refs: RoleRefs;
frameSelector?: string;
mode?: NonNullable<PageState["roleRefsMode"]>;
};
type ContextState = {
@@ -110,12 +113,14 @@ export function rememberRoleRefsForTarget(opts: {
targetId: string;
refs: RoleRefs;
frameSelector?: string;
mode?: NonNullable<PageState["roleRefsMode"]>;
}): void {
const targetId = opts.targetId.trim();
if (!targetId) return;
roleRefsByTarget.set(roleRefsKey(opts.cdpUrl, targetId), {
refs: opts.refs,
...(opts.frameSelector ? { frameSelector: opts.frameSelector } : {}),
...(opts.mode ? { mode: opts.mode } : {}),
});
while (roleRefsByTarget.size > MAX_ROLE_REFS_CACHE) {
const first = roleRefsByTarget.keys().next();
@@ -137,6 +142,7 @@ export function restoreRoleRefsForTarget(opts: {
if (state.roleRefs) return;
state.roleRefs = cached.refs;
state.roleRefsFrameSelector = cached.frameSelector;
state.roleRefsMode = cached.mode;
}
export function ensurePageState(page: Page): PageState {
@@ -339,6 +345,12 @@ export function refLocator(page: Page, ref: string) {
if (/^e\d+$/.test(normalized)) {
const state = pageStates.get(page);
if (state?.roleRefsMode === "aria") {
const scope = state.roleRefsFrameSelector
? page.frameLocator(state.roleRefsFrameSelector)
: page;
return scope.locator(`aria-ref=${normalized}`);
}
const info = state?.roleRefs?.[normalized];
if (!info) {
throw new Error(

View File

@@ -265,6 +265,7 @@ export async function scrollIntoViewViaPlaywright(opts: {
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page });
const timeout = normalizeTimeoutMs(opts.timeoutMs, 20_000);
const ref = requireRef(opts.ref);
@@ -340,6 +341,7 @@ export async function takeScreenshotViaPlaywright(opts: {
}): Promise<{ buffer: Buffer }> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page });
const type = opts.type ?? "png";
if (opts.ref) {
if (opts.fullPage) throw new Error("fullPage is not supported for element screenshots");
@@ -369,6 +371,7 @@ export async function screenshotWithLabelsViaPlaywright(opts: {
}): Promise<{ buffer: Buffer; labels: number; skipped: number }> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page });
const type = opts.type ?? "png";
const maxLabels =
typeof opts.maxLabels === "number" && Number.isFinite(opts.maxLabels)
@@ -495,6 +498,7 @@ export async function setInputFilesViaPlaywright(opts: {
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page });
if (!opts.paths.length) throw new Error("paths are required");
const inputRef = typeof opts.inputRef === "string" ? opts.inputRef.trim() : "";
const element = typeof opts.element === "string" ? opts.element.trim() : "";

View File

@@ -2,6 +2,7 @@ import type { Page } from "playwright-core";
import { type AriaSnapshotNode, formatAriaSnapshot, type RawAXNode } from "./cdp.js";
import {
buildRoleSnapshotFromAiSnapshot,
buildRoleSnapshotFromAriaSnapshot,
getRoleSnapshotStats,
type RoleSnapshotOptions,
@@ -76,6 +77,7 @@ export async function snapshotRoleViaPlaywright(opts: {
targetId?: string;
selector?: string;
frameSelector?: string;
refsMode?: "role" | "aria";
options?: RoleSnapshotOptions;
}): Promise<{
snapshot: string;
@@ -88,6 +90,37 @@ export async function snapshotRoleViaPlaywright(opts: {
});
const state = ensurePageState(page);
if (opts.refsMode === "aria") {
if (opts.selector?.trim() || opts.frameSelector?.trim()) {
throw new Error("refs=aria does not support selector/frame snapshots yet.");
}
const maybe = page as unknown as WithSnapshotForAI;
if (!maybe._snapshotForAI) {
throw new Error("refs=aria requires Playwright _snapshotForAI support.");
}
const result = await maybe._snapshotForAI({
timeout: 5000,
track: "response",
});
const built = buildRoleSnapshotFromAiSnapshot(String(result?.full ?? ""), opts.options);
state.roleRefs = built.refs;
state.roleRefsFrameSelector = undefined;
state.roleRefsMode = "aria";
if (opts.targetId) {
rememberRoleRefsForTarget({
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
refs: built.refs,
mode: "aria",
});
}
return {
snapshot: built.snapshot,
refs: built.refs,
stats: getRoleSnapshotStats(built.snapshot, built.refs),
};
}
const frameSelector = opts.frameSelector?.trim() || "";
const selector = opts.selector?.trim() || "";
const locator = frameSelector
@@ -102,12 +135,14 @@ export async function snapshotRoleViaPlaywright(opts: {
const built = buildRoleSnapshotFromAriaSnapshot(String(ariaSnapshot ?? ""), opts.options);
state.roleRefs = built.refs;
state.roleRefsFrameSelector = frameSelector || undefined;
state.roleRefsMode = "role";
if (opts.targetId) {
rememberRoleRefsForTarget({
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
refs: built.refs,
frameSelector: frameSelector || undefined,
mode: "role",
});
}
return {

View File

@@ -169,6 +169,8 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
const interactiveRaw = toBoolean(req.query.interactive);
const compactRaw = toBoolean(req.query.compact);
const depthRaw = toNumber(req.query.depth);
const refsModeRaw = toStringOrEmpty(req.query.refs).trim();
const refsMode = refsModeRaw === "aria" ? "aria" : refsModeRaw === "role" ? "role" : undefined;
const interactive = interactiveRaw ?? (mode === "efficient" ? true : undefined);
const compact = compactRaw ?? (mode === "efficient" ? true : undefined);
const depth =
@@ -199,6 +201,7 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
targetId: tab.targetId,
selector: selector.trim() || undefined,
frameSelector: frameSelector.trim() || undefined,
refsMode,
options: {
interactive: interactive ?? undefined,
compact: compact ?? undefined,
@@ -219,6 +222,7 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
targetId: tab.targetId,
selector: selector.trim() || undefined,
frameSelector: frameSelector.trim() || undefined,
refsMode,
options: {
interactive: interactive ?? undefined,
compact: compact ?? undefined,