diff --git a/CHANGELOG.md b/CHANGELOG.md index 85c2b965d..acacb8d7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,9 +33,8 @@ Docs: https://docs.clawd.bot ### Fixes - Gateway: strip inbound envelope headers from chat history messages to keep clients clean. - UI: prevent double-scroll in Control UI chat by locking chat layout to the viewport. (#1283) — thanks @bradleypriest. - -### Fixes - Config: allow Perplexity as a web_search provider in config validation. (#1230) +- Browser: register AI snapshot refs for act commands. (#1282) — thanks @John-Rood. ## 2026.1.19-2 diff --git a/src/agents/model-compat.ts b/src/agents/model-compat.ts index 9ea9497ca..fa0749b3a 100644 --- a/src/agents/model-compat.ts +++ b/src/agents/model-compat.ts @@ -1,5 +1,9 @@ import type { Api, Model } from "@mariozechner/pi-ai"; +function isOpenAICompletionsModel(model: Model): model is Model<"openai-completions"> { + return model.api === "openai-completions"; +} + export function normalizeModelCompat(model: Model): Model { const isOpenAICompletionsModel = ( candidate: Model, diff --git a/src/browser/pw-ai.test.ts b/src/browser/pw-ai.test.ts index 29c33d774..75e52c3dd 100644 --- a/src/browser/pw-ai.test.ts +++ b/src/browser/pw-ai.test.ts @@ -84,6 +84,35 @@ describe("pw-ai", () => { expect(p2.session.detach).toHaveBeenCalledTimes(1); }); + it("registers aria refs from ai snapshots for act commands", async () => { + const { chromium } = await import("playwright-core"); + const snapshot = ['- button "OK" [ref=e1]', '- link "Docs" [ref=e2]'].join("\n"); + const p1 = createPage({ targetId: "T1", snapshotFull: snapshot }); + const browser = createBrowser([p1.page]); + + (chromium.connectOverCDP as unknown as ReturnType).mockResolvedValue(browser); + + const mod = await importModule(); + const res = await mod.snapshotAiViaPlaywright({ + cdpUrl: "http://127.0.0.1:18792", + targetId: "T1", + }); + + expect(res.refs).toMatchObject({ + e1: { role: "button", name: "OK" }, + e2: { role: "link", name: "Docs" }, + }); + + await mod.clickViaPlaywright({ + cdpUrl: "http://127.0.0.1:18792", + targetId: "T1", + ref: "e1", + }); + + expect(p1.locator).toHaveBeenCalledWith("aria-ref=e1"); + expect(p1.click).toHaveBeenCalledTimes(1); + }); + it("truncates oversized snapshots", async () => { const { chromium } = await import("playwright-core"); const longSnapshot = "A".repeat(20); diff --git a/src/browser/pw-session.ts b/src/browser/pw-session.ts index f15d3a918..0c7fa9f48 100644 --- a/src/browser/pw-session.ts +++ b/src/browser/pw-session.ts @@ -130,6 +130,28 @@ export function rememberRoleRefsForTarget(opts: { } } +export function storeRoleRefsForTarget(opts: { + page: Page; + cdpUrl: string; + targetId?: string; + refs: RoleRefs; + frameSelector?: string; + mode: NonNullable; +}): void { + const state = ensurePageState(opts.page); + state.roleRefs = opts.refs; + state.roleRefsFrameSelector = opts.frameSelector; + state.roleRefsMode = opts.mode; + if (!opts.targetId?.trim()) return; + rememberRoleRefsForTarget({ + cdpUrl: opts.cdpUrl, + targetId: opts.targetId, + refs: opts.refs, + frameSelector: opts.frameSelector, + mode: opts.mode, + }); +} + export function restoreRoleRefsForTarget(opts: { cdpUrl: string; targetId?: string; diff --git a/src/browser/pw-tools-core.snapshot.ts b/src/browser/pw-tools-core.snapshot.ts index 5cfa77e91..402513c69 100644 --- a/src/browser/pw-tools-core.snapshot.ts +++ b/src/browser/pw-tools-core.snapshot.ts @@ -6,11 +6,12 @@ import { buildRoleSnapshotFromAriaSnapshot, getRoleSnapshotStats, type RoleSnapshotOptions, + type RoleRefMap, } from "./pw-role-snapshot.js"; import { ensurePageState, getPageForTargetId, - rememberRoleRefsForTarget, + storeRoleRefsForTarget, type WithSnapshotForAI, } from "./pw-session.js"; @@ -43,7 +44,7 @@ export async function snapshotAiViaPlaywright(opts: { targetId?: string; timeoutMs?: number; maxChars?: number; -}): Promise<{ snapshot: string; truncated?: boolean }> { +}): Promise<{ snapshot: string; truncated?: boolean; refs: RoleRefMap }> { const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, @@ -65,11 +66,21 @@ export async function snapshotAiViaPlaywright(opts: { typeof maxChars === "number" && Number.isFinite(maxChars) && maxChars > 0 ? Math.floor(maxChars) : undefined; + let truncated = false; if (limit && snapshot.length > limit) { snapshot = `${snapshot.slice(0, limit)}\n\n[...TRUNCATED - page too large]`; - return { snapshot, truncated: true }; + truncated = true; } - return { snapshot }; + + const built = buildRoleSnapshotFromAiSnapshot(snapshot); + storeRoleRefsForTarget({ + page, + cdpUrl: opts.cdpUrl, + targetId: opts.targetId, + refs: built.refs, + mode: "aria", + }); + return truncated ? { snapshot, truncated, refs: built.refs } : { snapshot, refs: built.refs }; } export async function snapshotRoleViaPlaywright(opts: { @@ -88,7 +99,7 @@ export async function snapshotRoleViaPlaywright(opts: { cdpUrl: opts.cdpUrl, targetId: opts.targetId, }); - const state = ensurePageState(page); + ensurePageState(page); if (opts.refsMode === "aria") { if (opts.selector?.trim() || opts.frameSelector?.trim()) { @@ -103,17 +114,13 @@ export async function snapshotRoleViaPlaywright(opts: { track: "response", }); const built = buildRoleSnapshotFromAiSnapshot(String(result?.full ?? ""), opts.options); - state.roleRefs = built.refs; - state.roleRefsFrameSelector = undefined; - state.roleRefsMode = "aria"; - if (opts.targetId) { - rememberRoleRefsForTarget({ - cdpUrl: opts.cdpUrl, - targetId: opts.targetId, - refs: built.refs, - mode: "aria", - }); - } + storeRoleRefsForTarget({ + page, + cdpUrl: opts.cdpUrl, + targetId: opts.targetId, + refs: built.refs, + mode: "aria", + }); return { snapshot: built.snapshot, refs: built.refs, @@ -133,18 +140,14 @@ export async function snapshotRoleViaPlaywright(opts: { const ariaSnapshot = await locator.ariaSnapshot(); const built = buildRoleSnapshotFromAriaSnapshot(String(ariaSnapshot ?? ""), opts.options); - state.roleRefs = built.refs; - state.roleRefsFrameSelector = frameSelector || undefined; - state.roleRefsMode = "role"; - if (opts.targetId) { - rememberRoleRefsForTarget({ - cdpUrl: opts.cdpUrl, - targetId: opts.targetId, - refs: built.refs, - frameSelector: frameSelector || undefined, - mode: "role", - }); - } + storeRoleRefsForTarget({ + page, + cdpUrl: opts.cdpUrl, + targetId: opts.targetId, + refs: built.refs, + frameSelector: frameSelector || undefined, + mode: "role", + }); return { snapshot: built.snapshot, refs: built.refs, diff --git a/src/browser/routes/agent.snapshot.ts b/src/browser/routes/agent.snapshot.ts index e66647f3f..fdeb7f69e 100644 --- a/src/browser/routes/agent.snapshot.ts +++ b/src/browser/routes/agent.snapshot.ts @@ -9,8 +9,6 @@ import { DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS, DEFAULT_AI_SNAPSHOT_MAX_CHARS, } from "../constants.js"; -import { buildRoleSnapshotFromAiSnapshot } from "../pw-role-snapshot.js"; -import { rememberRoleRefsForTarget } from "../pw-session.js"; import { DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, @@ -216,20 +214,6 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br targetId: tab.targetId, ...(typeof resolvedMaxChars === "number" ? { maxChars: resolvedMaxChars } : {}), }) - .then((result) => { - // Extract and register refs from AI snapshot so act commands can resolve them. - // snapshotAiViaPlaywright returns raw text without ref registration. - const parsed = buildRoleSnapshotFromAiSnapshot(result.snapshot); - if (Object.keys(parsed.refs).length > 0) { - rememberRoleRefsForTarget({ - cdpUrl: profileCtx.profile.cdpUrl, - targetId: tab.targetId, - refs: parsed.refs, - mode: "aria", - }); - } - return { ...result, refs: parsed.refs }; - }) .catch(async (err) => { // Public-API fallback when Playwright's private _snapshotForAI is missing. if (String(err).toLowerCase().includes("_snapshotforai")) { diff --git a/src/tui/tui.ts b/src/tui/tui.ts index f70c8dd8e..a5e6e34d7 100644 --- a/src/tui/tui.ts +++ b/src/tui/tui.ts @@ -188,11 +188,11 @@ export async function runTui(opts: TuiOptions) { password: opts.password, }); + const tui = new TUI(new ProcessTerminal()); const header = new Text("", 1, 0); const statusContainer = new Container(); const footer = new Text("", 1, 0); const chatLog = new ChatLog(); - const tui = new TUI(new ProcessTerminal()); const editor = new CustomEditor(tui, editorTheme); const root = new Container(); root.addChild(header);