fix(browser): register AI snapshot refs (#1282)

thanks @John-Rood

Co-authored-by: John Rood <62669593+John-Rood@users.noreply.github.com>
This commit is contained in:
Peter Steinberger
2026-01-20 14:12:50 +00:00
parent 710c681283
commit da4b124480
7 changed files with 88 additions and 47 deletions

View File

@@ -33,9 +33,8 @@ Docs: https://docs.clawd.bot
### Fixes
- Gateway: strip inbound envelope headers from chat history messages to keep clients clean.
- UI: prevent double-scroll in Control UI chat by locking chat layout to the viewport. (#1283) — thanks @bradleypriest.
### Fixes
- Config: allow Perplexity as a web_search provider in config validation. (#1230)
- Browser: register AI snapshot refs for act commands. (#1282) — thanks @John-Rood.
## 2026.1.19-2

View File

@@ -1,5 +1,9 @@
import type { Api, Model } from "@mariozechner/pi-ai";
function isOpenAICompletionsModel(model: Model<Api>): model is Model<"openai-completions"> {
return model.api === "openai-completions";
}
export function normalizeModelCompat(model: Model<Api>): Model<Api> {
const isOpenAICompletionsModel = (
candidate: Model<Api>,

View File

@@ -84,6 +84,35 @@ describe("pw-ai", () => {
expect(p2.session.detach).toHaveBeenCalledTimes(1);
});
it("registers aria refs from ai snapshots for act commands", async () => {
const { chromium } = await import("playwright-core");
const snapshot = ['- button "OK" [ref=e1]', '- link "Docs" [ref=e2]'].join("\n");
const p1 = createPage({ targetId: "T1", snapshotFull: snapshot });
const browser = createBrowser([p1.page]);
(chromium.connectOverCDP as unknown as ReturnType<typeof vi.fn>).mockResolvedValue(browser);
const mod = await importModule();
const res = await mod.snapshotAiViaPlaywright({
cdpUrl: "http://127.0.0.1:18792",
targetId: "T1",
});
expect(res.refs).toMatchObject({
e1: { role: "button", name: "OK" },
e2: { role: "link", name: "Docs" },
});
await mod.clickViaPlaywright({
cdpUrl: "http://127.0.0.1:18792",
targetId: "T1",
ref: "e1",
});
expect(p1.locator).toHaveBeenCalledWith("aria-ref=e1");
expect(p1.click).toHaveBeenCalledTimes(1);
});
it("truncates oversized snapshots", async () => {
const { chromium } = await import("playwright-core");
const longSnapshot = "A".repeat(20);

View File

@@ -130,6 +130,28 @@ export function rememberRoleRefsForTarget(opts: {
}
}
export function storeRoleRefsForTarget(opts: {
page: Page;
cdpUrl: string;
targetId?: string;
refs: RoleRefs;
frameSelector?: string;
mode: NonNullable<PageState["roleRefsMode"]>;
}): void {
const state = ensurePageState(opts.page);
state.roleRefs = opts.refs;
state.roleRefsFrameSelector = opts.frameSelector;
state.roleRefsMode = opts.mode;
if (!opts.targetId?.trim()) return;
rememberRoleRefsForTarget({
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
refs: opts.refs,
frameSelector: opts.frameSelector,
mode: opts.mode,
});
}
export function restoreRoleRefsForTarget(opts: {
cdpUrl: string;
targetId?: string;

View File

@@ -6,11 +6,12 @@ import {
buildRoleSnapshotFromAriaSnapshot,
getRoleSnapshotStats,
type RoleSnapshotOptions,
type RoleRefMap,
} from "./pw-role-snapshot.js";
import {
ensurePageState,
getPageForTargetId,
rememberRoleRefsForTarget,
storeRoleRefsForTarget,
type WithSnapshotForAI,
} from "./pw-session.js";
@@ -43,7 +44,7 @@ export async function snapshotAiViaPlaywright(opts: {
targetId?: string;
timeoutMs?: number;
maxChars?: number;
}): Promise<{ snapshot: string; truncated?: boolean }> {
}): Promise<{ snapshot: string; truncated?: boolean; refs: RoleRefMap }> {
const page = await getPageForTargetId({
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
@@ -65,11 +66,21 @@ export async function snapshotAiViaPlaywright(opts: {
typeof maxChars === "number" && Number.isFinite(maxChars) && maxChars > 0
? Math.floor(maxChars)
: undefined;
let truncated = false;
if (limit && snapshot.length > limit) {
snapshot = `${snapshot.slice(0, limit)}\n\n[...TRUNCATED - page too large]`;
return { snapshot, truncated: true };
truncated = true;
}
return { snapshot };
const built = buildRoleSnapshotFromAiSnapshot(snapshot);
storeRoleRefsForTarget({
page,
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
refs: built.refs,
mode: "aria",
});
return truncated ? { snapshot, truncated, refs: built.refs } : { snapshot, refs: built.refs };
}
export async function snapshotRoleViaPlaywright(opts: {
@@ -88,7 +99,7 @@ export async function snapshotRoleViaPlaywright(opts: {
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
});
const state = ensurePageState(page);
ensurePageState(page);
if (opts.refsMode === "aria") {
if (opts.selector?.trim() || opts.frameSelector?.trim()) {
@@ -103,17 +114,13 @@ export async function snapshotRoleViaPlaywright(opts: {
track: "response",
});
const built = buildRoleSnapshotFromAiSnapshot(String(result?.full ?? ""), opts.options);
state.roleRefs = built.refs;
state.roleRefsFrameSelector = undefined;
state.roleRefsMode = "aria";
if (opts.targetId) {
rememberRoleRefsForTarget({
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
refs: built.refs,
mode: "aria",
});
}
storeRoleRefsForTarget({
page,
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
refs: built.refs,
mode: "aria",
});
return {
snapshot: built.snapshot,
refs: built.refs,
@@ -133,18 +140,14 @@ export async function snapshotRoleViaPlaywright(opts: {
const ariaSnapshot = await locator.ariaSnapshot();
const built = buildRoleSnapshotFromAriaSnapshot(String(ariaSnapshot ?? ""), opts.options);
state.roleRefs = built.refs;
state.roleRefsFrameSelector = frameSelector || undefined;
state.roleRefsMode = "role";
if (opts.targetId) {
rememberRoleRefsForTarget({
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
refs: built.refs,
frameSelector: frameSelector || undefined,
mode: "role",
});
}
storeRoleRefsForTarget({
page,
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
refs: built.refs,
frameSelector: frameSelector || undefined,
mode: "role",
});
return {
snapshot: built.snapshot,
refs: built.refs,

View File

@@ -9,8 +9,6 @@ import {
DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS,
DEFAULT_AI_SNAPSHOT_MAX_CHARS,
} from "../constants.js";
import { buildRoleSnapshotFromAiSnapshot } from "../pw-role-snapshot.js";
import { rememberRoleRefsForTarget } from "../pw-session.js";
import {
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
@@ -216,20 +214,6 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
targetId: tab.targetId,
...(typeof resolvedMaxChars === "number" ? { maxChars: resolvedMaxChars } : {}),
})
.then((result) => {
// Extract and register refs from AI snapshot so act commands can resolve them.
// snapshotAiViaPlaywright returns raw text without ref registration.
const parsed = buildRoleSnapshotFromAiSnapshot(result.snapshot);
if (Object.keys(parsed.refs).length > 0) {
rememberRoleRefsForTarget({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
refs: parsed.refs,
mode: "aria",
});
}
return { ...result, refs: parsed.refs };
})
.catch(async (err) => {
// Public-API fallback when Playwright's private _snapshotForAI is missing.
if (String(err).toLowerCase().includes("_snapshotforai")) {

View File

@@ -188,11 +188,11 @@ export async function runTui(opts: TuiOptions) {
password: opts.password,
});
const tui = new TUI(new ProcessTerminal());
const header = new Text("", 1, 0);
const statusContainer = new Container();
const footer = new Text("", 1, 0);
const chatLog = new ChatLog();
const tui = new TUI(new ProcessTerminal());
const editor = new CustomEditor(tui, editorTheme);
const root = new Container();
root.addChild(header);