From 1236c4dafbfc9a308f6a0fadbb9f31900a1f3597 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 26 Dec 2025 19:02:19 +0000 Subject: [PATCH] refactor: make browser actions ref-only --- CHANGELOG.md | 4 +- .../Sources/Clawdis/MenuContentView.swift | 1 - .../Clawdis/MenuSessionsInjector.swift | 5 + docs/AGENTS.default.md | 2 +- docs/browser.md | 9 +- docs/tools.md | 3 +- src/agents/clawdis-tools.ts | 104 ++++++++++++------ src/browser/client-actions-core.ts | 6 +- src/browser/pw-ai.test.ts | 39 ------- src/browser/pw-tools-core.ts | 32 ++---- src/browser/routes/agent.ts | 27 +++-- src/browser/server.test.ts | 32 +----- src/cli/browser-cli-actions-input.ts | 40 ++----- src/cli/browser-cli-examples.ts | 4 +- src/cli/browser-cli-inspect.ts | 6 +- 15 files changed, 131 insertions(+), 183 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa5e08405..54fa31ce9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## 2.0.0-beta3 — Unreleased (2025-12-26) ### Highlights -- First-class Clawdis tools (browser, canvas, nodes, cron) replace the old `clawdis-*` skills; tool schemas are now injected directly into the agent runtime (including selector-based browser actions). +- First-class Clawdis tools (browser, canvas, nodes, cron) replace the old `clawdis-*` skills; tool schemas are now injected directly into the agent runtime. - Per-session model selection + custom model providers: `models.providers` merges into `~/.clawdis/agent/models.json` (merge/replace modes) for LiteLLM, local OpenAI-compatible servers, Anthropic proxies, etc. - Group chat activation modes: per-group `/activation mention|always` command with status visibility. - Discord bot transport for DMs and guild text channels, with allowlists + mention gating. @@ -17,6 +17,7 @@ - Config refactor: `inbound.*` removed; use top-level `routing` (allowlists + group rules + transcription), `messages` (prefixes/timestamps), and `session` (scoping/store/mainKey). No legacy keys read. - Heartbeat config moved to `agent.heartbeat`: set `every: "30m"` (duration string) and optional `model`. `agent.heartbeatMinutes` is removed, and heartbeats are disabled unless `agent.heartbeat.every` is set. - Heartbeats now run via the gateway runner (main session) and deliver to the last used channel by default. WhatsApp reply-heartbeat behavior is removed; use `agent.heartbeat.target`/`to` (or `target: "none"`) to control delivery. +- Browser `act` no longer accepts CSS `selector`; use `snapshot` refs (default `ai`) or `evaluate` as an escape hatch. ### Fixes - Heartbeat replies now strip repeated `HEARTBEAT_OK` tails to avoid accidental “OK OK” spam. @@ -56,6 +57,7 @@ ### macOS app - Update-ready state surfaced in the menu; menu sections regrouped with session submenus. +- Menu bar now shows a dedicated Nodes section under Context with inline rows, overflow submenu, and iconized actions. - Session list polish: sleeping/disconnected/error states, usage bar restored, padding + bar sizing tuned, syncing menu removed, header hidden when disconnected. - Chat UI polish: tool call cards + merged tool results, glass background, tighter composer spacing, visual effect host tweaks. - OAuth storage moved; legacy session syncing metadata removed. diff --git a/apps/macos/Sources/Clawdis/MenuContentView.swift b/apps/macos/Sources/Clawdis/MenuContentView.swift index 981431985..08f98bad1 100644 --- a/apps/macos/Sources/Clawdis/MenuContentView.swift +++ b/apps/macos/Sources/Clawdis/MenuContentView.swift @@ -86,7 +86,6 @@ struct MenuContent: View { } label: { Label("Open Dashboard", systemImage: "gauge") } - Divider() Toggle( isOn: Binding( get: { self.browserControlEnabled }, diff --git a/apps/macos/Sources/Clawdis/MenuSessionsInjector.swift b/apps/macos/Sources/Clawdis/MenuSessionsInjector.swift index ca41f359f..563788571 100644 --- a/apps/macos/Sources/Clawdis/MenuSessionsInjector.swift +++ b/apps/macos/Sources/Clawdis/MenuSessionsInjector.swift @@ -187,6 +187,11 @@ final class MenuSessionsInjector: NSObject, NSMenuDelegate { var cursor = insertIndex let entries = self.sortedNodeEntries() + let topSeparator = NSMenuItem.separator() + topSeparator.tag = self.nodesTag + menu.insertItem(topSeparator, at: cursor) + cursor += 1 + let header = self.makeNodesHeaderItem(width: width, count: entries.count) menu.insertItem(header, at: cursor) cursor += 1 diff --git a/docs/AGENTS.default.md b/docs/AGENTS.default.md index 1c1f625b7..4c967fd16 100644 --- a/docs/AGENTS.default.md +++ b/docs/AGENTS.default.md @@ -112,4 +112,4 @@ git commit -m "Add Clawd workspace" - Canvas UI runs full-screen with native overlays. Avoid placing critical controls in the top-left/top-right/bottom edges; add explicit gutters in the layout and don’t rely on safe-area insets. - For browser-driven verification, use `clawdis browser` (tabs/status/screenshot) with the clawd-managed Chrome profile. - For DOM inspection, use `clawdis browser eval|query|dom|snapshot` (and `--json`/`--out` when you need machine output). -- For interactions, use `clawdis browser click|type|hover|drag|select|upload|press|wait|navigate|back|evaluate|run` (click/type accept `--selector`). +- For interactions, use `clawdis browser click|type|hover|drag|select|upload|press|wait|navigate|back|evaluate|run` (click/type require snapshot refs; use `evaluate` for CSS selectors). diff --git a/docs/browser.md b/docs/browser.md index 0358ef0b0..8ac4f935d 100644 --- a/docs/browser.md +++ b/docs/browser.md @@ -167,16 +167,14 @@ Inspection: - `clawdis browser screenshot` - `clawdis browser screenshot --full-page` - `clawdis browser screenshot --ref 12` +- `clawdis browser snapshot` - `clawdis browser snapshot --format aria --limit 200` -- `clawdis browser snapshot --format ai` Actions: - `clawdis browser navigate https://example.com` - `clawdis browser resize 1280 720` - `clawdis browser click 12 --double` -- `clawdis browser click --selector 'button.save'` - `clawdis browser type 23 "hello" --submit` -- `clawdis browser type --selector "input[name=q]" "hello"` - `clawdis browser press Enter` - `clawdis browser hover 44` - `clawdis browser drag 10 11` @@ -186,14 +184,15 @@ Actions: - `clawdis browser dialog --accept` - `clawdis browser wait --text "Done"` - `clawdis browser evaluate --fn '(el) => el.textContent' --ref 7` +- `clawdis browser evaluate --fn "document.querySelector('.my-class').click()"` - `clawdis browser console --level error` - `clawdis browser pdf` Notes: - `upload` and `dialog` are **arming** calls; run them before the click/press that triggers the chooser/dialog. - The arm default timeout is **2 minutes** (clamped to max 2 minutes); pass `timeoutMs` if you need shorter. -- `snapshot --format ai` returns AI snapshot markup used for ref-based actions. -- `click`/`type` accept `--selector` to target CSS selectors instead of AI refs. +- `snapshot` defaults to `ai`; `aria` returns an accessibility tree for debugging. +- `click`/`type` require `ref` from `snapshot --format ai`; use `evaluate` for rare CSS selector one-offs. ## Security & privacy notes diff --git a/docs/tools.md b/docs/tools.md index ca470fbe1..002db02a4 100644 --- a/docs/tools.md +++ b/docs/tools.md @@ -49,7 +49,8 @@ Core actions: Notes: - Requires `browser.enabled=true` in `~/.clawdis/clawdis.json`. - Uses `browser.controlUrl` unless `controlUrl` is passed explicitly. -- `act` supports CSS selectors for `click`/`type` via `selector` (use `ref` for AI snapshot targets). +- `snapshot` defaults to `ai`; use `aria` for the accessibility tree. +- `act` requires `ref` from `snapshot --format ai`; use `evaluate` for rare CSS selector needs. ### `clawdis_canvas` Drive the node Canvas (present, eval, snapshot, A2UI). diff --git a/src/agents/clawdis-tools.ts b/src/agents/clawdis-tools.ts index 941bc89d8..6f4711eb7 100644 --- a/src/agents/clawdis-tools.ts +++ b/src/agents/clawdis-tools.ts @@ -341,40 +341,74 @@ async function resolveNodeId( ); } -const BrowserActSchema = Type.Object({ - kind: Type.Union([ - Type.Literal("click"), - Type.Literal("type"), - Type.Literal("press"), - Type.Literal("hover"), - Type.Literal("drag"), - Type.Literal("select"), - Type.Literal("fill"), - Type.Literal("resize"), - Type.Literal("wait"), - Type.Literal("evaluate"), - Type.Literal("close"), - ]), - ref: Type.Optional(Type.String()), - selector: Type.Optional(Type.String()), - targetId: Type.Optional(Type.String()), - doubleClick: Type.Optional(Type.Boolean()), - button: Type.Optional(Type.String()), - modifiers: Type.Optional(Type.Array(Type.String())), - text: Type.Optional(Type.String()), - submit: Type.Optional(Type.Boolean()), - slowly: Type.Optional(Type.Boolean()), - key: Type.Optional(Type.String()), - startRef: Type.Optional(Type.String()), - endRef: Type.Optional(Type.String()), - values: Type.Optional(Type.Array(Type.String())), - fields: Type.Optional(Type.Array(Type.Record(Type.String(), Type.Unknown()))), - width: Type.Optional(Type.Number()), - height: Type.Optional(Type.Number()), - timeMs: Type.Optional(Type.Number()), - textGone: Type.Optional(Type.String()), - fn: Type.Optional(Type.String()), -}); +const BrowserActSchema = Type.Union([ + Type.Object({ + kind: Type.Literal("click"), + ref: Type.String(), + targetId: Type.Optional(Type.String()), + doubleClick: Type.Optional(Type.Boolean()), + button: Type.Optional(Type.String()), + modifiers: Type.Optional(Type.Array(Type.String())), + }), + Type.Object({ + kind: Type.Literal("type"), + ref: Type.String(), + text: Type.String(), + targetId: Type.Optional(Type.String()), + submit: Type.Optional(Type.Boolean()), + slowly: Type.Optional(Type.Boolean()), + }), + Type.Object({ + kind: Type.Literal("press"), + key: Type.String(), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + kind: Type.Literal("hover"), + ref: Type.String(), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + kind: Type.Literal("drag"), + startRef: Type.String(), + endRef: Type.String(), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + kind: Type.Literal("select"), + ref: Type.String(), + values: Type.Array(Type.String()), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + kind: Type.Literal("fill"), + fields: Type.Array(Type.Record(Type.String(), Type.Unknown())), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + kind: Type.Literal("resize"), + width: Type.Number(), + height: Type.Number(), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + kind: Type.Literal("wait"), + timeMs: Type.Optional(Type.Number()), + text: Type.Optional(Type.String()), + textGone: Type.Optional(Type.String()), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + kind: Type.Literal("evaluate"), + fn: Type.String(), + ref: Type.Optional(Type.String()), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + kind: Type.Literal("close"), + targetId: Type.Optional(Type.String()), + }), +]); const BrowserToolSchema = Type.Union([ Type.Object({ @@ -514,7 +548,7 @@ function createBrowserTool(): AnyAgentTool { const format = params.format === "ai" || params.format === "aria" ? (params.format as "ai" | "aria") - : "aria"; + : "ai"; const targetId = typeof params.targetId === "string" ? params.targetId.trim() diff --git a/src/browser/client-actions-core.ts b/src/browser/client-actions-core.ts index 273a5be6a..117167349 100644 --- a/src/browser/client-actions-core.ts +++ b/src/browser/client-actions-core.ts @@ -14,8 +14,7 @@ export type BrowserFormField = { export type BrowserActRequest = | { kind: "click"; - ref?: string; - selector?: string; + ref: string; targetId?: string; doubleClick?: boolean; button?: string; @@ -23,8 +22,7 @@ export type BrowserActRequest = } | { kind: "type"; - ref?: string; - selector?: string; + ref: string; text: string; targetId?: string; submit?: boolean; diff --git a/src/browser/pw-ai.test.ts b/src/browser/pw-ai.test.ts index 36c43b5ee..6c7beaf28 100644 --- a/src/browser/pw-ai.test.ts +++ b/src/browser/pw-ai.test.ts @@ -111,45 +111,6 @@ describe("pw-ai", () => { expect(p1.click).toHaveBeenCalledTimes(1); }); - it("clicks a css selector when provided", async () => { - const { chromium } = await import("playwright-core"); - const p1 = createPage({ targetId: "T1" }); - const browser = createBrowser([p1.page]); - ( - chromium.connectOverCDP as unknown as ReturnType - ).mockResolvedValue(browser); - - const mod = await importModule(); - await mod.clickViaPlaywright({ - cdpPort: 18792, - targetId: "T1", - selector: "button.save", - }); - - expect(p1.locator).toHaveBeenCalledWith("button.save"); - expect(p1.click).toHaveBeenCalledTimes(1); - }); - - it("types via css selector when provided", async () => { - const { chromium } = await import("playwright-core"); - const p1 = createPage({ targetId: "T1" }); - const browser = createBrowser([p1.page]); - ( - chromium.connectOverCDP as unknown as ReturnType - ).mockResolvedValue(browser); - - const mod = await importModule(); - await mod.typeViaPlaywright({ - cdpPort: 18792, - targetId: "T1", - selector: "input[name=q]", - text: "hello", - }); - - expect(p1.locator).toHaveBeenCalledWith("input[name=q]"); - expect(p1.fill).toHaveBeenCalledTimes(1); - }); - it("fails with a clear error when _snapshotForAI is missing", async () => { const { chromium } = await import("playwright-core"); const p1 = createPage({ targetId: "T1", hasSnapshotForAI: false }); diff --git a/src/browser/pw-tools-core.ts b/src/browser/pw-tools-core.ts index 89d4a8b60..f5be4a331 100644 --- a/src/browser/pw-tools-core.ts +++ b/src/browser/pw-tools-core.ts @@ -10,18 +10,10 @@ import { let nextUploadArmId = 0; let nextDialogArmId = 0; -type LocatorPage = Parameters[0]; - -function resolveLocator( - page: LocatorPage, - opts: { ref?: string; selector?: string }, -) { - const selector = - typeof opts.selector === "string" ? opts.selector.trim() : ""; - if (selector) return page.locator(selector); - const ref = typeof opts.ref === "string" ? opts.ref.trim() : ""; - if (ref) return refLocator(page, ref); - throw new Error("ref or selector is required"); +function requireRef(value: unknown): string { + const ref = typeof value === "string" ? value.trim() : ""; + if (!ref) throw new Error("ref is required"); + return ref; } export async function snapshotAiViaPlaywright(opts: { @@ -55,8 +47,7 @@ export async function snapshotAiViaPlaywright(opts: { export async function clickViaPlaywright(opts: { cdpPort: number; targetId?: string; - ref?: string; - selector?: string; + ref: string; doubleClick?: boolean; button?: "left" | "right" | "middle"; modifiers?: Array<"Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift">; @@ -67,10 +58,7 @@ export async function clickViaPlaywright(opts: { targetId: opts.targetId, }); ensurePageState(page); - const locator = resolveLocator(page, { - ref: opts.ref, - selector: opts.selector, - }); + const locator = refLocator(page, requireRef(opts.ref)); const timeout = Math.max( 500, Math.min(60_000, Math.floor(opts.timeoutMs ?? 8000)), @@ -157,8 +145,7 @@ export async function pressKeyViaPlaywright(opts: { export async function typeViaPlaywright(opts: { cdpPort: number; targetId?: string; - ref?: string; - selector?: string; + ref: string; text: string; submit?: boolean; slowly?: boolean; @@ -167,10 +154,7 @@ export async function typeViaPlaywright(opts: { const text = String(opts.text ?? ""); const page = await getPageForTargetId(opts); ensurePageState(page); - const locator = resolveLocator(page, { - ref: opts.ref, - selector: opts.selector, - }); + const locator = refLocator(page, requireRef(opts.ref)); const timeout = Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000)); if (opts.slowly) { await locator.click({ timeout }); diff --git a/src/browser/routes/agent.ts b/src/browser/routes/agent.ts index df89dfb2a..e5151ac45 100644 --- a/src/browser/routes/agent.ts +++ b/src/browser/routes/agent.ts @@ -35,6 +35,16 @@ type ActKind = type ClickButton = "left" | "right" | "middle"; type ClickModifier = "Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift"; +const SELECTOR_UNSUPPORTED_MESSAGE = [ + "Error: 'selector' is not supported. Use 'ref' from snapshot instead.", + "", + "Example workflow:", + "1. snapshot action to get page state with refs", + '2. act with ref: "e123" to interact with element', + "", + "This is more reliable for modern SPAs.", +].join("\n"); + function readBody(req: express.Request): Record { const body = req.body as Record | undefined; if (!body || typeof body !== "object" || Array.isArray(body)) return {}; @@ -113,6 +123,9 @@ export function registerBrowserAgentRoutes( const body = readBody(req); const kind = toStringOrEmpty(body.kind) as ActKind; const targetId = toStringOrEmpty(body.targetId) || undefined; + if (Object.prototype.hasOwnProperty.call(body, "selector")) { + return jsonError(res, 400, SELECTOR_UNSUPPORTED_MESSAGE); + } if ( kind !== "click" && @@ -139,9 +152,7 @@ export function registerBrowserAgentRoutes( switch (kind) { case "click": { const ref = toStringOrEmpty(body.ref); - const selector = toStringOrEmpty(body.selector); - if (!ref && !selector) - return jsonError(res, 400, "ref or selector is required"); + if (!ref) return jsonError(res, 400, "ref is required"); const doubleClick = toBoolean(body.doubleClick) ?? false; const buttonRaw = toStringOrEmpty(body.button) || ""; const button = buttonRaw ? parseClickButton(buttonRaw) : undefined; @@ -171,10 +182,9 @@ export function registerBrowserAgentRoutes( const clickRequest: Parameters[0] = { cdpPort, targetId: tab.targetId, + ref, doubleClick, }; - if (ref) clickRequest.ref = ref; - if (selector) clickRequest.selector = selector; if (button) clickRequest.button = button; if (modifiers) clickRequest.modifiers = modifiers; await pw.clickViaPlaywright(clickRequest); @@ -182,9 +192,7 @@ export function registerBrowserAgentRoutes( } case "type": { const ref = toStringOrEmpty(body.ref); - const selector = toStringOrEmpty(body.selector); - if (!ref && !selector) - return jsonError(res, 400, "ref or selector is required"); + if (!ref) return jsonError(res, 400, "ref is required"); if (typeof body.text !== "string") return jsonError(res, 400, "text is required"); const text = body.text; @@ -193,12 +201,11 @@ export function registerBrowserAgentRoutes( const typeRequest: Parameters[0] = { cdpPort, targetId: tab.targetId, + ref, text, submit, slowly, }; - if (ref) typeRequest.ref = ref; - if (selector) typeRequest.selector = selector; await pw.typeViaPlaywright(typeRequest); return res.json({ ok: true, targetId: tab.targetId }); } diff --git a/src/browser/server.test.ts b/src/browser/server.test.ts index be7624cbc..aa07953cf 100644 --- a/src/browser/server.test.ts +++ b/src/browser/server.test.ts @@ -327,21 +327,17 @@ describe("browser control server", () => { modifiers: ["Shift"], }); - const clickSelector = (await realFetch(`${base}/act`, { + const clickSelector = await realFetch(`${base}/act`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ kind: "click", selector: "button.save", }), - }).then((r) => r.json())) as { ok: boolean }; - expect(clickSelector.ok).toBe(true); - expect(pwMocks.clickViaPlaywright).toHaveBeenNthCalledWith(2, { - cdpPort: testPort + 1, - targetId: "abcd1234", - selector: "button.save", - doubleClick: false, }); + expect(clickSelector.status).toBe(400); + const clickSelectorBody = (await clickSelector.json()) as { error?: string }; + expect(clickSelectorBody.error).toMatch(/selector is not supported/i); const type = (await realFetch(`${base}/act`, { method: "POST", @@ -358,26 +354,6 @@ describe("browser control server", () => { slowly: false, }); - const typeSelector = (await realFetch(`${base}/act`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - kind: "type", - selector: "input[name=q]", - text: "hello", - submit: true, - }), - }).then((r) => r.json())) as { ok: boolean }; - expect(typeSelector.ok).toBe(true); - expect(pwMocks.typeViaPlaywright).toHaveBeenNthCalledWith(2, { - cdpPort: testPort + 1, - targetId: "abcd1234", - selector: "input[name=q]", - text: "hello", - submit: true, - slowly: false, - }); - const press = (await realFetch(`${base}/act`, { method: "POST", headers: { "Content-Type": "application/json" }, diff --git a/src/cli/browser-cli-actions-input.ts b/src/cli/browser-cli-actions-input.ts index 31b0e13e5..d6f14ac28 100644 --- a/src/cli/browser-cli-actions-input.ts +++ b/src/cli/browser-cli-actions-input.ts @@ -114,9 +114,8 @@ export function registerBrowserActionInputCommands( browser .command("click") - .description("Click an element by ai ref or CSS selector") - .argument("[ref]", "Ref id from ai snapshot") - .option("--selector ", "CSS selector (instead of ref)") + .description("Click an element by ref from snapshot") + .argument("", "Ref id from ai snapshot") .option("--target-id ", "CDP target id (or unique prefix)") .option("--double", "Double click", false) .option("--button ", "Mouse button to use") @@ -124,11 +123,9 @@ export function registerBrowserActionInputCommands( .action(async (ref: string | undefined, opts, cmd) => { const parent = parentOpts(cmd); const baseUrl = resolveBrowserControlUrl(parent?.url); - const selector = - typeof opts.selector === "string" ? opts.selector.trim() : ""; const refValue = typeof ref === "string" ? ref.trim() : ""; - if (!selector && !refValue) { - defaultRuntime.error(danger("ref or --selector is required")); + if (!refValue) { + defaultRuntime.error(danger("ref is required")); defaultRuntime.exit(1); return; } @@ -141,8 +138,7 @@ export function registerBrowserActionInputCommands( try { const result = await browserAct(baseUrl, { kind: "click", - ref: refValue || undefined, - selector: selector || undefined, + ref: refValue, targetId: opts.targetId?.trim() || undefined, doubleClick: Boolean(opts.double), button: opts.button?.trim() || undefined, @@ -153,11 +149,7 @@ export function registerBrowserActionInputCommands( return; } const suffix = result.url ? ` on ${result.url}` : ""; - if (selector) { - defaultRuntime.log(`clicked ${selector}${suffix}`); - } else { - defaultRuntime.log(`clicked ref ${refValue}${suffix}`); - } + defaultRuntime.log(`clicked ref ${refValue}${suffix}`); } catch (err) { defaultRuntime.error(danger(String(err))); defaultRuntime.exit(1); @@ -166,29 +158,25 @@ export function registerBrowserActionInputCommands( browser .command("type") - .description("Type into an element by ai ref or CSS selector") - .argument("[ref]", "Ref id from ai snapshot") + .description("Type into an element by ref from snapshot") + .argument("", "Ref id from ai snapshot") .argument("", "Text to type") - .option("--selector ", "CSS selector (instead of ref)") .option("--submit", "Press Enter after typing", false) .option("--slowly", "Type slowly (human-like)", false) .option("--target-id ", "CDP target id (or unique prefix)") .action(async (ref: string | undefined, text: string, opts, cmd) => { const parent = parentOpts(cmd); const baseUrl = resolveBrowserControlUrl(parent?.url); - const selector = - typeof opts.selector === "string" ? opts.selector.trim() : ""; const refValue = typeof ref === "string" ? ref.trim() : ""; - if (!selector && !refValue) { - defaultRuntime.error(danger("ref or --selector is required")); + if (!refValue) { + defaultRuntime.error(danger("ref is required")); defaultRuntime.exit(1); return; } try { const result = await browserAct(baseUrl, { kind: "type", - ref: refValue || undefined, - selector: selector || undefined, + ref: refValue, text, submit: Boolean(opts.submit), slowly: Boolean(opts.slowly), @@ -198,11 +186,7 @@ export function registerBrowserActionInputCommands( defaultRuntime.log(JSON.stringify(result, null, 2)); return; } - if (selector) { - defaultRuntime.log(`typed into ${selector}`); - } else { - defaultRuntime.log(`typed into ref ${refValue}`); - } + defaultRuntime.log(`typed into ref ${refValue}`); } catch (err) { defaultRuntime.error(danger(String(err))); defaultRuntime.exit(1); diff --git a/src/cli/browser-cli-examples.ts b/src/cli/browser-cli-examples.ts index 04c96cc65..c9476921f 100644 --- a/src/cli/browser-cli-examples.ts +++ b/src/cli/browser-cli-examples.ts @@ -9,17 +9,15 @@ export const browserCoreExamples = [ "clawdis browser screenshot", "clawdis browser screenshot --full-page", "clawdis browser screenshot --ref 12", + "clawdis browser snapshot", "clawdis browser snapshot --format aria --limit 200", - "clawdis browser snapshot --format ai", ]; export const browserActionExamples = [ "clawdis browser navigate https://example.com", "clawdis browser resize 1280 720", "clawdis browser click 12 --double", - "clawdis browser click --selector 'button.save'", 'clawdis browser type 23 "hello" --submit', - 'clawdis browser type --selector "input[name=q]" "hello"', "clawdis browser press Enter", "clawdis browser hover 44", "clawdis browser drag 10 11", diff --git a/src/cli/browser-cli-inspect.ts b/src/cli/browser-cli-inspect.ts index 09aeabf96..52499ef29 100644 --- a/src/cli/browser-cli-inspect.ts +++ b/src/cli/browser-cli-inspect.ts @@ -45,8 +45,8 @@ export function registerBrowserInspectCommands( browser .command("snapshot") - .description("Capture an AI-friendly snapshot (aria or ai)") - .option("--format ", "Snapshot format (default: aria)", "aria") + .description("Capture a snapshot (default: ai; aria is the accessibility tree)") + .option("--format ", "Snapshot format (default: ai)", "ai") .option("--target-id ", "CDP target id (or unique prefix)") .option("--limit ", "Max nodes (default: 500/800)", (v: string) => Number(v), @@ -55,7 +55,7 @@ export function registerBrowserInspectCommands( .action(async (opts, cmd) => { const parent = parentOpts(cmd); const baseUrl = resolveBrowserControlUrl(parent?.url); - const format = opts.format === "ai" ? "ai" : "aria"; + const format = opts.format === "aria" ? "aria" : "ai"; try { const result = await browserSnapshot(baseUrl, { format,