feat(browser): add snapshot refs=aria mode

This commit is contained in:
Peter Steinberger
2026-01-15 10:16:33 +00:00
parent 0facc63019
commit 4f1a4ab072
13 changed files with 210 additions and 1 deletions

View File

@@ -39,6 +39,7 @@ const BROWSER_TARGETS = ["sandbox", "host", "custom"] as const;
const BROWSER_SNAPSHOT_FORMATS = ["aria", "ai"] as const;
const BROWSER_SNAPSHOT_MODES = ["efficient"] as const;
const BROWSER_SNAPSHOT_REFS = ["role", "aria"] as const;
const BROWSER_IMAGE_TYPES = ["png", "jpeg"] as const;
@@ -91,6 +92,7 @@ export const BrowserToolSchema = Type.Object({
maxChars: Type.Optional(Type.Number()),
mode: optionalStringEnum(BROWSER_SNAPSHOT_MODES),
format: optionalStringEnum(BROWSER_SNAPSHOT_FORMATS),
refs: optionalStringEnum(BROWSER_SNAPSHOT_REFS),
interactive: Type.Optional(Type.Boolean()),
compact: Type.Optional(Type.Boolean()),
depth: Type.Optional(Type.Number()),

View File

@@ -121,6 +121,19 @@ describe("browser tool snapshot maxChars", () => {
expect(browserClientMocks.browserProfiles).toHaveBeenCalledWith("http://127.0.0.1:18791");
});
it("passes refs mode through to browser snapshot", async () => {
const tool = createBrowserTool();
await tool.execute?.(null, { action: "snapshot", format: "ai", refs: "aria" });
expect(browserClientMocks.browserSnapshot).toHaveBeenCalledWith(
"http://127.0.0.1:18791",
expect.objectContaining({
format: "ai",
refs: "aria",
}),
);
});
});
describe("browser tool snapshot labels", () => {

View File

@@ -128,6 +128,7 @@ export function createBrowserTool(opts?: {
'Profiles: use profile="chrome" for Chrome extension relay takeover (your existing Chrome tabs). Use profile="clawd" for the isolated clawd-managed browser.',
"Chrome extension relay needs an attached tab: user must click the Clawdbot Browser Relay toolbar icon on the tab (badge ON). If no tab is connected, ask them to attach it.",
"When using refs from snapshot (e.g. e12), keep the same tab: prefer passing targetId from the snapshot response into subsequent actions (act/click/type/etc).",
'For stable, self-resolving refs across calls, use snapshot with refs="aria" (Playwright aria-ref ids). Default refs="role" are role+name-based.',
"Use snapshot+act for UI automation. Avoid act:wait by default; use only in exceptional cases when no reliable UI state exists.",
`target selects browser location (sandbox|host|custom). Default: ${targetDefault}.`,
"controlUrl implies target=custom (remote control server).",
@@ -190,6 +191,7 @@ export function createBrowserTool(opts?: {
: "ai";
const mode = params.mode === "efficient" ? "efficient" : undefined;
const labels = typeof params.labels === "boolean" ? params.labels : undefined;
const refs = params.refs === "aria" || params.refs === "role" ? params.refs : undefined;
const hasMaxChars = Object.hasOwn(params, "maxChars");
const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
const limit =
@@ -224,6 +226,7 @@ export function createBrowserTool(opts?: {
targetId,
limit,
...(typeof resolvedMaxChars === "number" ? { maxChars: resolvedMaxChars } : {}),
refs,
interactive,
compact,
depth,