From 7b675864a8e482f0884c6a4737bbbe0ec46de999 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 13 Dec 2025 18:32:29 +0000 Subject: [PATCH] feat(browser): add DOM inspection commands --- apps/macos/Package.resolved | 6 +- .../macos/Sources/ClawdisCLI/BrowserCLI.swift | 230 +++++++++ .../macos/Sources/ClawdisCLI/ClawdisCLI.swift | 6 +- docs/AGENTS.default.md | 1 + docs/mac/browser.md | 15 +- src/browser/cdp.test.ts | 91 +++- src/browser/cdp.ts | 478 +++++++++++++++--- src/browser/client.ts | 154 ++++++ src/browser/server.ts | 187 +++++++ src/cli/program.ts | 234 +++++++++ 10 files changed, 1320 insertions(+), 82 deletions(-) diff --git a/apps/macos/Package.resolved b/apps/macos/Package.resolved index bb3ce2bfc..9c2f71908 100644 --- a/apps/macos/Package.resolved +++ b/apps/macos/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "5de6834e5cb92c45c61a2e6792b780ac231c5741def70f1efa9ec857fa12f8cb", + "originHash" : "d8a19a95c479a3c7cb20aded07bd18cfeda5d85b95284983da83dbee7c941e5c", "pins" : [ { "identity" : "eventsource", @@ -69,8 +69,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-configuration", "state" : { - "branch" : "main", - "revision" : "3528deb75256d7dcbb0d71fa75077caae0a8c749" + "revision" : "3528deb75256d7dcbb0d71fa75077caae0a8c749", + "version" : "1.0.0" } }, { diff --git a/apps/macos/Sources/ClawdisCLI/BrowserCLI.swift b/apps/macos/Sources/ClawdisCLI/BrowserCLI.swift index 01156d811..00f25354a 100644 --- a/apps/macos/Sources/ClawdisCLI/BrowserCLI.swift +++ b/apps/macos/Sources/ClawdisCLI/BrowserCLI.swift @@ -20,6 +20,15 @@ enum BrowserCLI { var overrideURL: String? var fullPage = false var targetId: String? + var awaitPromise = false + var js: String? + var jsFile: String? + var jsStdin = false + var selector: String? + var format: String? + var limit: Int? + var maxChars: Int? + var outPath: String? var rest: [String] = [] while !args.isEmpty { @@ -31,6 +40,24 @@ enum BrowserCLI { fullPage = true case "--target-id": targetId = args.popFirst() + case "--await": + awaitPromise = true + case "--js": + js = args.popFirst() + case "--js-file": + jsFile = args.popFirst() + case "--js-stdin": + jsStdin = true + case "--selector": + selector = args.popFirst() + case "--format": + format = args.popFirst() + case "--limit": + limit = args.popFirst().flatMap(Int.init) + case "--max-chars": + maxChars = args.popFirst().flatMap(Int.init) + case "--out": + outPath = args.popFirst() default: rest.append(arg) } @@ -145,6 +172,133 @@ enum BrowserCLI { } return 0 + case "eval": + if jsStdin, jsFile != nil { + self.printHelp() + return 2 + } + + let code: String = try { + if let jsFile, !jsFile.isEmpty { + return try String(contentsOfFile: jsFile, encoding: .utf8) + } + if jsStdin { + let data = FileHandle.standardInput.readDataToEndOfFile() + return String(data: data, encoding: .utf8) ?? "" + } + if let js, !js.isEmpty { return js } + if !rest.isEmpty { return rest.joined(separator: " ") } + return "" + }() + + if code.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + self.printHelp() + return 2 + } + + let res = try await self.httpJSON( + method: "POST", + url: baseURL.appendingPathComponent("/eval"), + body: [ + "js": code, + "targetId": targetId ?? "", + "await": awaitPromise, + ], + timeoutInterval: 15.0) + + if jsonOutput { + self.printJSON(ok: true, result: res) + } else { + self.printEval(res: res) + } + return 0 + + case "query": + let sel = (selector ?? rest.first ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + if sel.isEmpty { + self.printHelp() + return 2 + } + var url = baseURL.appendingPathComponent("/query") + var items: [URLQueryItem] = [URLQueryItem(name: "selector", value: sel)] + if let targetId, !targetId.isEmpty { + items.append(URLQueryItem(name: "targetId", value: targetId)) + } + if let limit, limit > 0 { + items.append(URLQueryItem(name: "limit", value: String(limit))) + } + url = self.withQuery(url, items: items) + let res = try await self.httpJSON(method: "GET", url: url, timeoutInterval: 15.0) + if jsonOutput || format == "json" { + self.printJSON(ok: true, result: res) + } else { + self.printQuery(res: res) + } + return 0 + + case "dom": + let fmt = (format == "text") ? "text" : "html" + var url = baseURL.appendingPathComponent("/dom") + var items: [URLQueryItem] = [URLQueryItem(name: "format", value: fmt)] + if let targetId, !targetId.isEmpty { + items.append(URLQueryItem(name: "targetId", value: targetId)) + } + if let selector = selector?.trimmingCharacters(in: .whitespacesAndNewlines), !selector.isEmpty { + items.append(URLQueryItem(name: "selector", value: selector)) + } + if let maxChars, maxChars > 0 { + items.append(URLQueryItem(name: "maxChars", value: String(maxChars))) + } + url = self.withQuery(url, items: items) + let res = try await self.httpJSON(method: "GET", url: url, timeoutInterval: 20.0) + let text = (res["text"] as? String) ?? "" + if let out = outPath, !out.isEmpty { + try Data(text.utf8).write(to: URL(fileURLWithPath: out)) + if jsonOutput { + self.printJSON(ok: true, result: ["ok": true, "out": out]) + } else { + print(out) + } + return 0 + } + if jsonOutput { + self.printJSON(ok: true, result: res) + } else { + print(text) + } + return 0 + + case "snapshot": + let fmt = (format == "domSnapshot") ? "domSnapshot" : "aria" + var url = baseURL.appendingPathComponent("/snapshot") + var items: [URLQueryItem] = [URLQueryItem(name: "format", value: fmt)] + if let targetId, !targetId.isEmpty { + items.append(URLQueryItem(name: "targetId", value: targetId)) + } + if let limit, limit > 0 { + items.append(URLQueryItem(name: "limit", value: String(limit))) + } + url = self.withQuery(url, items: items) + let res = try await self.httpJSON(method: "GET", url: url, timeoutInterval: 20.0) + + if let out = outPath, !out.isEmpty { + let data = try JSONSerialization.data(withJSONObject: res, options: [.prettyPrinted]) + try data.write(to: URL(fileURLWithPath: out)) + if jsonOutput { + self.printJSON(ok: true, result: ["ok": true, "out": out]) + } else { + print(out) + } + return 0 + } + + if jsonOutput || fmt == "domSnapshot" { + self.printJSON(ok: true, result: res) + } else { + self.printSnapshotAria(res: res) + } + return 0 + default: self.printHelp() return 2 @@ -295,6 +449,74 @@ enum BrowserCLI { } } + private static func printEval(res: [String: Any]) { + guard let obj = res["result"] as? [String: Any] else { + self.printResult(jsonOutput: false, res: res) + return + } + + if let value = obj["value"] { + if JSONSerialization.isValidJSONObject(value), + let data = try? JSONSerialization.data(withJSONObject: value, options: [.prettyPrinted]), + let text = String(data: data, encoding: .utf8) + { + print(text) + } else { + print(String(describing: value)) + } + return + } + + if let desc = obj["description"] as? String, !desc.isEmpty { + print(desc) + return + } + + self.printResult(jsonOutput: false, res: obj) + } + + private static func printQuery(res: [String: Any]) { + guard let matches = res["matches"] as? [[String: Any]] else { + self.printResult(jsonOutput: false, res: res) + return + } + if matches.isEmpty { + print("No matches.") + return + } + for m in matches { + let index = (m["index"] as? Int) ?? 0 + let tag = (m["tag"] as? String) ?? "" + let id = (m["id"] as? String).map { "#\($0)" } ?? "" + let className = (m["className"] as? String) ?? "" + let classes = className.split(separator: " ").prefix(3).map(String.init) + let cls = classes.isEmpty ? "" : "." + classes.joined(separator: ".") + let head = "\(index). <\(tag)\(id)\(cls)>" + print(head) + if let text = m["text"] as? String, !text.isEmpty { + print(" \(text)") + } + } + } + + private static func printSnapshotAria(res: [String: Any]) { + guard let nodes = res["nodes"] as? [[String: Any]] else { + self.printResult(jsonOutput: false, res: res) + return + } + for n in nodes { + let depth = (n["depth"] as? Int) ?? 0 + let role = (n["role"] as? String) ?? "unknown" + let name = (n["name"] as? String) ?? "" + let value = (n["value"] as? String) ?? "" + let indent = String(repeating: " ", count: min(depth, 20)) + var line = "\(indent)- \(role)" + if !name.isEmpty { line += " \"\(name)\"" } + if !value.isEmpty { line += " = \"\(value)\"" } + print(line) + } + } + #if SWIFT_PACKAGE static func _testFormatTabs(res: [String: Any]) -> [String] { self.formatTabs(res: res) @@ -325,6 +547,14 @@ enum BrowserCLI { clawdis-mac browser focus [--url <...>] clawdis-mac browser close [--url <...>] clawdis-mac browser screenshot [--target-id ] [--full-page] [--url <...>] + clawdis-mac browser eval [] [--js ] [--js-file ] [--js-stdin] + [--target-id ] [--await] [--url <...>] + clawdis-mac browser query [--limit ] [--format ] + [--target-id ] [--url <...>] + clawdis-mac browser dom [--format ] [--selector ] [--max-chars ] + [--out ] [--target-id ] [--url <...>] + clawdis-mac browser snapshot [--format ] [--limit ] [--out ] + [--target-id ] [--url <...>] Notes: - Config defaults come from ~/.clawdis/clawdis.json (browser.enabled, browser.controlUrl). diff --git a/apps/macos/Sources/ClawdisCLI/ClawdisCLI.swift b/apps/macos/Sources/ClawdisCLI/ClawdisCLI.swift index 97e9df996..3a2e82640 100644 --- a/apps/macos/Sources/ClawdisCLI/ClawdisCLI.swift +++ b/apps/macos/Sources/ClawdisCLI/ClawdisCLI.swift @@ -412,7 +412,7 @@ struct ClawdisCLI { clawdis-mac canvas snapshot [--out ] [--session ] Browser (clawd): - clawdis-mac browser status|start|stop|tabs|open|focus|close|screenshot + clawdis-mac browser status|start|stop|tabs|open|focus|close|screenshot|eval|query|dom|snapshot Browser notes: - Uses clawd’s dedicated Chrome/Chromium profile (separate user-data dir). @@ -426,6 +426,10 @@ struct ClawdisCLI { clawdis-mac browser open https://example.com clawdis-mac browser tabs clawdis-mac browser screenshot --full-page + clawdis-mac browser eval \"location.href\" + clawdis-mac browser query \"a\" --limit 5 + clawdis-mac browser dom --format text --max-chars 5000 + clawdis-mac browser snapshot --format aria --limit 200 Output: Default output is text. Use --json for machine-readable output. diff --git a/docs/AGENTS.default.md b/docs/AGENTS.default.md index 23c3ccebd..9a1d711a4 100644 --- a/docs/AGENTS.default.md +++ b/docs/AGENTS.default.md @@ -35,3 +35,4 @@ read_when: - For MCPs, mcporter writes to the home-scope config; re-run installs if you rotate tokens. - Keep heartbeats enabled so the assistant can schedule reminders, monitor inboxes, and trigger camera captures. - For browser-driven verification, use `clawdis browser` (tabs/status/screenshot) with the clawd-managed Chrome profile. +- For DOM inspection, use `clawdis browser eval|query|dom|snapshot` (and `--json`/`--out` when you need machine output). diff --git a/docs/mac/browser.md b/docs/mac/browser.md index e3b56c8aa..bd8dd4f55 100644 --- a/docs/mac/browser.md +++ b/docs/mac/browser.md @@ -109,9 +109,20 @@ Minimum endpoints/methods (names illustrative): - `browser.screenshot` - params: `{ targetId?, fullPage?: false }` → returns a `MEDIA:` attachment URL (via the existing Clawdis media host) +DOM + inspection (v1): +- `browser.eval` + - params: `{ js, targetId?, await?: false }` → returns the CDP `Runtime.evaluate` result (best-effort `returnByValue`) +- `browser.query` + - params: `{ selector, targetId?, limit? }` → returns basic element summaries (tag/id/class/text/value/href/outerHTML) +- `browser.dom` + - params: `{ format: "html"|"text", targetId?, selector?, maxChars? }` → returns a truncated dump (`text` field) +- `browser.snapshot` + - params: `{ format: "aria"|"domSnapshot", targetId?, limit? }` + - `aria`: simplified Accessibility tree with `backendDOMNodeId` when available (future click/type hooks) + - `domSnapshot`: lightweight DOM walk snapshot (tree-ish, bounded by `limit`) + Nice-to-have (later): -- `browser.snapshot.aria` (AI-friendly snapshot with stable refs) -- `browser.click` / `browser.type` / `browser.waitFor` helpers built atop snapshot refs +- `browser.click` / `browser.type` / `browser.waitFor` helpers built atop snapshot refs / backend node ids ### “Is it open or closed?” diff --git a/src/browser/cdp.test.ts b/src/browser/cdp.test.ts index 65836d288..d574690c7 100644 --- a/src/browser/cdp.test.ts +++ b/src/browser/cdp.test.ts @@ -3,7 +3,7 @@ import { createServer } from "node:http"; import { afterEach, describe, expect, it } from "vitest"; import { WebSocketServer } from "ws"; -import { createTargetViaCdp } from "./cdp.js"; +import { createTargetViaCdp, evaluateJavaScript, snapshotAria } from "./cdp.js"; describe("cdp", () => { let httpServer: ReturnType | null = null; @@ -70,4 +70,93 @@ describe("cdp", () => { expect(created.targetId).toBe("TARGET_123"); }); + + it("evaluates javascript via CDP", async () => { + wsServer = new WebSocketServer({ port: 0, host: "127.0.0.1" }); + await new Promise((resolve) => wsServer?.once("listening", resolve)); + const wsPort = (wsServer.address() as { port: number }).port; + + wsServer.on("connection", (socket) => { + socket.on("message", (data) => { + const msg = JSON.parse(String(data)) as { + id?: number; + method?: string; + params?: { expression?: string }; + }; + if (msg.method === "Runtime.enable") { + socket.send(JSON.stringify({ id: msg.id, result: {} })); + return; + } + if (msg.method === "Runtime.evaluate") { + expect(msg.params?.expression).toBe("1+1"); + socket.send( + JSON.stringify({ + id: msg.id, + result: { result: { type: "number", value: 2 } }, + }), + ); + } + }); + }); + + const res = await evaluateJavaScript({ + wsUrl: `ws://127.0.0.1:${wsPort}`, + expression: "1+1", + }); + + expect(res.result.type).toBe("number"); + expect(res.result.value).toBe(2); + }); + + it("captures an aria snapshot via CDP", async () => { + wsServer = new WebSocketServer({ port: 0, host: "127.0.0.1" }); + await new Promise((resolve) => wsServer?.once("listening", resolve)); + const wsPort = (wsServer.address() as { port: number }).port; + + wsServer.on("connection", (socket) => { + socket.on("message", (data) => { + const msg = JSON.parse(String(data)) as { + id?: number; + method?: string; + }; + if (msg.method === "Accessibility.enable") { + socket.send(JSON.stringify({ id: msg.id, result: {} })); + return; + } + if (msg.method === "Accessibility.getFullAXTree") { + socket.send( + JSON.stringify({ + id: msg.id, + result: { + nodes: [ + { + nodeId: "1", + role: { value: "RootWebArea" }, + name: { value: "" }, + childIds: ["2"], + }, + { + nodeId: "2", + role: { value: "button" }, + name: { value: "OK" }, + backendDOMNodeId: 42, + childIds: [], + }, + ], + }, + }), + ); + return; + } + }); + }); + + const snap = await snapshotAria({ wsUrl: `ws://127.0.0.1:${wsPort}` }); + expect(snap.nodes.length).toBe(2); + expect(snap.nodes[0]?.role).toBe("RootWebArea"); + expect(snap.nodes[1]?.role).toBe("button"); + expect(snap.nodes[1]?.name).toBe("OK"); + expect(snap.nodes[1]?.backendDOMNodeId).toBe(42); + expect(snap.nodes[1]?.depth).toBe(1); + }); }); diff --git a/src/browser/cdp.ts b/src/browser/cdp.ts index f572f3fd4..adb1dc694 100644 --- a/src/browser/cdp.ts +++ b/src/browser/cdp.ts @@ -78,6 +78,34 @@ async function fetchJson(url: string, timeoutMs = 1500): Promise { } } +async function withCdpSocket( + wsUrl: string, + fn: (send: CdpSendFn) => Promise, +): Promise { + const ws = new WebSocket(wsUrl, { handshakeTimeout: 5000 }); + const { send, closeWithError } = createCdpSender(ws); + + const openPromise = new Promise((resolve, reject) => { + ws.once("open", () => resolve()); + ws.once("error", (err) => reject(err)); + }); + + await openPromise; + + try { + return await fn(send); + } catch (err) { + closeWithError(err instanceof Error ? err : new Error(String(err))); + throw err; + } finally { + try { + ws.close(); + } catch { + // ignore + } + } +} + export async function captureScreenshotPng(opts: { wsUrl: string; fullPage?: boolean; @@ -95,61 +123,43 @@ export async function captureScreenshot(opts: { format?: "png" | "jpeg"; quality?: number; // jpeg only (0..100) }): Promise { - const ws = new WebSocket(opts.wsUrl, { handshakeTimeout: 5000 }); - const { send, closeWithError } = createCdpSender(ws); + return await withCdpSocket(opts.wsUrl, async (send) => { + await send("Page.enable"); - const openPromise = new Promise((resolve, reject) => { - ws.once("open", () => resolve()); - ws.once("error", (err) => reject(err)); - }); - - await openPromise; - - await send("Page.enable"); - - let clip: - | { x: number; y: number; width: number; height: number; scale: number } - | undefined; - if (opts.fullPage) { - const metrics = (await send("Page.getLayoutMetrics")) as { - cssContentSize?: { width?: number; height?: number }; - contentSize?: { width?: number; height?: number }; - }; - const size = metrics?.cssContentSize ?? metrics?.contentSize; - const width = Number(size?.width ?? 0); - const height = Number(size?.height ?? 0); - if (width > 0 && height > 0) { - clip = { x: 0, y: 0, width, height, scale: 1 }; + let clip: + | { x: number; y: number; width: number; height: number; scale: number } + | undefined; + if (opts.fullPage) { + const metrics = (await send("Page.getLayoutMetrics")) as { + cssContentSize?: { width?: number; height?: number }; + contentSize?: { width?: number; height?: number }; + }; + const size = metrics?.cssContentSize ?? metrics?.contentSize; + const width = Number(size?.width ?? 0); + const height = Number(size?.height ?? 0); + if (width > 0 && height > 0) { + clip = { x: 0, y: 0, width, height, scale: 1 }; + } } - } - const format = opts.format ?? "png"; - const quality = - format === "jpeg" - ? Math.max(0, Math.min(100, Math.round(opts.quality ?? 85))) - : undefined; + const format = opts.format ?? "png"; + const quality = + format === "jpeg" + ? Math.max(0, Math.min(100, Math.round(opts.quality ?? 85))) + : undefined; - const result = (await send("Page.captureScreenshot", { - format, - ...(quality !== undefined ? { quality } : {}), - fromSurface: true, - captureBeyondViewport: true, - ...(clip ? { clip } : {}), - })) as { data?: string }; + const result = (await send("Page.captureScreenshot", { + format, + ...(quality !== undefined ? { quality } : {}), + fromSurface: true, + captureBeyondViewport: true, + ...(clip ? { clip } : {}), + })) as { data?: string }; - const base64 = result?.data; - if (!base64) { - closeWithError(new Error("Screenshot failed: missing data")); - throw new Error("Screenshot failed: missing data"); - } - - try { - ws.close(); - } catch { - // ignore - } - - return Buffer.from(base64, "base64"); + const base64 = result?.data; + if (!base64) throw new Error("Screenshot failed: missing data"); + return Buffer.from(base64, "base64"); + }); } export async function createTargetViaCdp(opts: { @@ -163,30 +173,348 @@ export async function createTargetViaCdp(opts: { const wsUrl = String(version?.webSocketDebuggerUrl ?? "").trim(); if (!wsUrl) throw new Error("CDP /json/version missing webSocketDebuggerUrl"); - const ws = new WebSocket(wsUrl, { handshakeTimeout: 5000 }); - const { send, closeWithError } = createCdpSender(ws); - - const openPromise = new Promise((resolve, reject) => { - ws.once("open", () => resolve()); - ws.once("error", (err) => reject(err)); + return await withCdpSocket(wsUrl, async (send) => { + const created = (await send("Target.createTarget", { url: opts.url })) as { + targetId?: string; + }; + const targetId = String(created?.targetId ?? "").trim(); + if (!targetId) + throw new Error("CDP Target.createTarget returned no targetId"); + return { targetId }; }); - - await openPromise; - - const created = (await send("Target.createTarget", { url: opts.url })) as { - targetId?: string; - }; - const targetId = String(created?.targetId ?? "").trim(); - if (!targetId) { - closeWithError(new Error("CDP Target.createTarget returned no targetId")); - throw new Error("CDP Target.createTarget returned no targetId"); - } - - try { - ws.close(); - } catch { - // ignore - } - - return { targetId }; } + +export type CdpRemoteObject = { + type: string; + subtype?: string; + value?: unknown; + description?: string; + unserializableValue?: string; + preview?: unknown; +}; + +export type CdpExceptionDetails = { + text?: string; + lineNumber?: number; + columnNumber?: number; + exception?: CdpRemoteObject; + stackTrace?: unknown; +}; + +export async function evaluateJavaScript(opts: { + wsUrl: string; + expression: string; + awaitPromise?: boolean; + returnByValue?: boolean; +}): Promise<{ + result: CdpRemoteObject; + exceptionDetails?: CdpExceptionDetails; +}> { + return await withCdpSocket(opts.wsUrl, async (send) => { + await send("Runtime.enable").catch(() => {}); + const evaluated = (await send("Runtime.evaluate", { + expression: opts.expression, + awaitPromise: Boolean(opts.awaitPromise), + returnByValue: opts.returnByValue ?? true, + userGesture: true, + includeCommandLineAPI: true, + })) as { + result?: CdpRemoteObject; + exceptionDetails?: CdpExceptionDetails; + }; + + const result = evaluated?.result; + if (!result) throw new Error("CDP Runtime.evaluate returned no result"); + return { result, exceptionDetails: evaluated.exceptionDetails }; + }); +} + +export type AriaSnapshotNode = { + ref: string; + role: string; + name: string; + value?: string; + description?: string; + backendDOMNodeId?: number; + depth: number; +}; + +type RawAXNode = { + nodeId?: string; + role?: { value?: string }; + name?: { value?: string }; + value?: { value?: string }; + description?: { value?: string }; + childIds?: string[]; + backendDOMNodeId?: number; +}; + +function axValue(v: unknown): string { + if (!v || typeof v !== "object") return ""; + const value = (v as { value?: unknown }).value; + return typeof value === "string" ? value : String(value ?? ""); +} + +function formatAriaSnapshot( + nodes: RawAXNode[], + limit: number, +): AriaSnapshotNode[] { + const byId = new Map(); + for (const n of nodes) { + if (n.nodeId) byId.set(n.nodeId, n); + } + + // Heuristic: pick a root-ish node (one that is not referenced as a child), else first. + const referenced = new Set(); + for (const n of nodes) { + for (const c of n.childIds ?? []) referenced.add(c); + } + const root = + nodes.find((n) => n.nodeId && !referenced.has(n.nodeId)) ?? nodes[0]; + if (!root?.nodeId) return []; + + const out: AriaSnapshotNode[] = []; + const stack: Array<{ id: string; depth: number }> = [ + { id: root.nodeId, depth: 0 }, + ]; + while (stack.length && out.length < limit) { + const popped = stack.pop(); + if (!popped) break; + const { id, depth } = popped; + const n = byId.get(id); + if (!n) continue; + const role = axValue(n.role); + const name = axValue(n.name); + const value = axValue(n.value); + const description = axValue(n.description); + const ref = `ax${out.length + 1}`; + out.push({ + ref, + role: role || "unknown", + name: name || "", + ...(value ? { value } : {}), + ...(description ? { description } : {}), + ...(typeof n.backendDOMNodeId === "number" + ? { backendDOMNodeId: n.backendDOMNodeId } + : {}), + depth, + }); + + const children = (n.childIds ?? []).filter((c) => byId.has(c)); + for (let i = children.length - 1; i >= 0; i--) { + const child = children[i]; + if (child) stack.push({ id: child, depth: depth + 1 }); + } + } + + return out; +} + +export async function snapshotAria(opts: { + wsUrl: string; + limit?: number; +}): Promise<{ nodes: AriaSnapshotNode[] }> { + const limit = Math.max(1, Math.min(2000, Math.floor(opts.limit ?? 500))); + return await withCdpSocket(opts.wsUrl, async (send) => { + await send("Accessibility.enable").catch(() => {}); + const res = (await send("Accessibility.getFullAXTree")) as { + nodes?: RawAXNode[]; + }; + const nodes = Array.isArray(res?.nodes) ? res.nodes : []; + return { nodes: formatAriaSnapshot(nodes, limit) }; + }); +} + +export async function snapshotDom(opts: { + wsUrl: string; + limit?: number; + maxTextChars?: number; +}): Promise<{ + nodes: DomSnapshotNode[]; +}> { + const limit = Math.max(1, Math.min(5000, Math.floor(opts.limit ?? 800))); + const maxTextChars = Math.max( + 0, + Math.min(5000, Math.floor(opts.maxTextChars ?? 220)), + ); + + const expression = `(() => { + const maxNodes = ${JSON.stringify(limit)}; + const maxText = ${JSON.stringify(maxTextChars)}; + const nodes = []; + const root = document.documentElement; + if (!root) return { nodes }; + const stack = [{ el: root, depth: 0, parentRef: null }]; + while (stack.length && nodes.length < maxNodes) { + const cur = stack.pop(); + const el = cur.el; + if (!el || el.nodeType !== 1) continue; + const ref = "n" + String(nodes.length + 1); + const tag = (el.tagName || "").toLowerCase(); + const id = el.id ? String(el.id) : undefined; + const className = el.className ? String(el.className).slice(0, 300) : undefined; + const role = el.getAttribute && el.getAttribute("role") ? String(el.getAttribute("role")) : undefined; + const name = el.getAttribute && el.getAttribute("aria-label") ? String(el.getAttribute("aria-label")) : undefined; + let text = ""; + try { text = String(el.innerText || "").trim(); } catch {} + if (maxText && text.length > maxText) text = text.slice(0, maxText) + "…"; + const href = (el.href !== undefined && el.href !== null) ? String(el.href) : undefined; + const type = (el.type !== undefined && el.type !== null) ? String(el.type) : undefined; + const value = (el.value !== undefined && el.value !== null) ? String(el.value).slice(0, 500) : undefined; + nodes.push({ + ref, + parentRef: cur.parentRef, + depth: cur.depth, + tag, + ...(id ? { id } : {}), + ...(className ? { className } : {}), + ...(role ? { role } : {}), + ...(name ? { name } : {}), + ...(text ? { text } : {}), + ...(href ? { href } : {}), + ...(type ? { type } : {}), + ...(value ? { value } : {}), + }); + const children = el.children ? Array.from(el.children) : []; + for (let i = children.length - 1; i >= 0; i--) { + stack.push({ el: children[i], depth: cur.depth + 1, parentRef: ref }); + } + } + return { nodes }; + })()`; + + const evaluated = await evaluateJavaScript({ + wsUrl: opts.wsUrl, + expression, + awaitPromise: true, + returnByValue: true, + }); + const value = evaluated.result?.value as unknown; + if (!value || typeof value !== "object") return { nodes: [] }; + const nodes = (value as { nodes?: unknown }).nodes; + return { nodes: Array.isArray(nodes) ? (nodes as DomSnapshotNode[]) : [] }; +} + +export type DomSnapshotNode = { + ref: string; + parentRef: string | null; + depth: number; + tag: string; + id?: string; + className?: string; + role?: string; + name?: string; + text?: string; + href?: string; + type?: string; + value?: string; +}; + +export async function getDomText(opts: { + wsUrl: string; + format: "html" | "text"; + maxChars?: number; + selector?: string; +}): Promise<{ text: string }> { + const maxChars = Math.max( + 0, + Math.min(5_000_000, Math.floor(opts.maxChars ?? 200_000)), + ); + const selectorExpr = opts.selector ? JSON.stringify(opts.selector) : "null"; + const expression = `(() => { + const fmt = ${JSON.stringify(opts.format)}; + const max = ${JSON.stringify(maxChars)}; + const sel = ${selectorExpr}; + const pick = sel ? document.querySelector(sel) : null; + let out = ""; + if (fmt === "text") { + const el = pick || document.body || document.documentElement; + try { out = String(el && el.innerText ? el.innerText : ""); } catch { out = ""; } + } else { + const el = pick || document.documentElement; + try { out = String(el && el.outerHTML ? el.outerHTML : ""); } catch { out = ""; } + } + if (max && out.length > max) out = out.slice(0, max) + "\\n"; + return out; + })()`; + + const evaluated = await evaluateJavaScript({ + wsUrl: opts.wsUrl, + expression, + awaitPromise: true, + returnByValue: true, + }); + const text = String(evaluated.result?.value ?? ""); + return { text }; +} + +export async function querySelector(opts: { + wsUrl: string; + selector: string; + limit?: number; + maxTextChars?: number; + maxHtmlChars?: number; +}): Promise<{ + matches: QueryMatch[]; +}> { + const limit = Math.max(1, Math.min(200, Math.floor(opts.limit ?? 20))); + const maxText = Math.max( + 0, + Math.min(5000, Math.floor(opts.maxTextChars ?? 500)), + ); + const maxHtml = Math.max( + 0, + Math.min(20000, Math.floor(opts.maxHtmlChars ?? 1500)), + ); + + const expression = `(() => { + const sel = ${JSON.stringify(opts.selector)}; + const lim = ${JSON.stringify(limit)}; + const maxText = ${JSON.stringify(maxText)}; + const maxHtml = ${JSON.stringify(maxHtml)}; + const els = Array.from(document.querySelectorAll(sel)).slice(0, lim); + return els.map((el, i) => { + const tag = (el.tagName || "").toLowerCase(); + const id = el.id ? String(el.id) : undefined; + const className = el.className ? String(el.className).slice(0, 300) : undefined; + let text = ""; + try { text = String(el.innerText || "").trim(); } catch {} + if (maxText && text.length > maxText) text = text.slice(0, maxText) + "…"; + const value = (el.value !== undefined && el.value !== null) ? String(el.value).slice(0, 500) : undefined; + const href = (el.href !== undefined && el.href !== null) ? String(el.href) : undefined; + let outerHTML = ""; + try { outerHTML = String(el.outerHTML || ""); } catch {} + if (maxHtml && outerHTML.length > maxHtml) outerHTML = outerHTML.slice(0, maxHtml) + "…"; + return { + index: i + 1, + tag, + ...(id ? { id } : {}), + ...(className ? { className } : {}), + ...(text ? { text } : {}), + ...(value ? { value } : {}), + ...(href ? { href } : {}), + ...(outerHTML ? { outerHTML } : {}), + }; + }); + })()`; + + const evaluated = await evaluateJavaScript({ + wsUrl: opts.wsUrl, + expression, + awaitPromise: true, + returnByValue: true, + }); + const matches = evaluated.result?.value; + return { matches: Array.isArray(matches) ? (matches as QueryMatch[]) : [] }; +} + +export type QueryMatch = { + index: number; + tag: string; + id?: string; + className?: string; + text?: string; + value?: string; + href?: string; + outerHTML?: string; +}; diff --git a/src/browser/client.ts b/src/browser/client.ts index 64b147031..369496886 100644 --- a/src/browser/client.ts +++ b/src/browser/client.ts @@ -28,6 +28,83 @@ export type ScreenshotResult = { url: string; }; +export type EvalResult = { + ok: true; + targetId: string; + url: string; + result: { + type: string; + subtype?: string; + value?: unknown; + description?: string; + unserializableValue?: string; + preview?: unknown; + }; +}; + +export type QueryResult = { + ok: true; + targetId: string; + url: string; + matches: Array<{ + index: number; + tag: string; + id?: string; + className?: string; + text?: string; + value?: string; + href?: string; + outerHTML?: string; + }>; +}; + +export type DomResult = { + ok: true; + targetId: string; + url: string; + format: "html" | "text"; + text: string; +}; + +export type SnapshotAriaNode = { + ref: string; + role: string; + name: string; + value?: string; + description?: string; + backendDOMNodeId?: number; + depth: number; +}; + +export type SnapshotResult = + | { + ok: true; + format: "aria"; + targetId: string; + url: string; + nodes: SnapshotAriaNode[]; + } + | { + ok: true; + format: "domSnapshot"; + targetId: string; + url: string; + nodes: Array<{ + ref: string; + parentRef: string | null; + depth: number; + tag: string; + id?: string; + className?: string; + role?: string; + name?: string; + text?: string; + href?: string; + type?: string; + value?: string; + }>; + }; + function unwrapCause(err: unknown): unknown { if (!err || typeof err !== "object") return null; const cause = (err as { cause?: unknown }).cause; @@ -172,3 +249,80 @@ export async function browserScreenshot( timeoutMs: 20000, }); } + +export async function browserEval( + baseUrl: string, + opts: { + js: string; + targetId?: string; + awaitPromise?: boolean; + }, +): Promise { + return await fetchJson(`${baseUrl}/eval`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + js: opts.js, + targetId: opts.targetId, + await: Boolean(opts.awaitPromise), + }), + timeoutMs: 15000, + }); +} + +export async function browserQuery( + baseUrl: string, + opts: { + selector: string; + targetId?: string; + limit?: number; + }, +): Promise { + const q = new URLSearchParams(); + q.set("selector", opts.selector); + if (opts.targetId) q.set("targetId", opts.targetId); + if (typeof opts.limit === "number") q.set("limit", String(opts.limit)); + return await fetchJson(`${baseUrl}/query?${q.toString()}`, { + timeoutMs: 15000, + }); +} + +export async function browserDom( + baseUrl: string, + opts: { + format: "html" | "text"; + targetId?: string; + maxChars?: number; + selector?: string; + }, +): Promise { + const q = new URLSearchParams(); + q.set("format", opts.format); + if (opts.targetId) q.set("targetId", opts.targetId); + if (typeof opts.maxChars === "number") + q.set("maxChars", String(opts.maxChars)); + if (opts.selector) q.set("selector", opts.selector); + return await fetchJson(`${baseUrl}/dom?${q.toString()}`, { + timeoutMs: 20000, + }); +} + +export async function browserSnapshot( + baseUrl: string, + opts: { + format: "aria" | "domSnapshot"; + targetId?: string; + limit?: number; + }, +): Promise { + const q = new URLSearchParams(); + q.set("format", opts.format); + if (opts.targetId) q.set("targetId", opts.targetId); + if (typeof opts.limit === "number") q.set("limit", String(opts.limit)); + return await fetchJson( + `${baseUrl}/snapshot?${q.toString()}`, + { + timeoutMs: 20000, + }, + ); +} diff --git a/src/browser/server.ts b/src/browser/server.ts index 77a099c14..7b57718d8 100644 --- a/src/browser/server.ts +++ b/src/browser/server.ts @@ -10,6 +10,11 @@ import { captureScreenshot, captureScreenshotPng, createTargetViaCdp, + evaluateJavaScript, + getDomText, + querySelector, + snapshotAria, + snapshotDom, } from "./cdp.js"; import { isChromeReachable, @@ -178,6 +183,34 @@ async function ensureBrowserAvailable(runtime: RuntimeEnv): Promise { return; } +async function ensureTabAvailable(runtime: RuntimeEnv, targetId?: string) { + if (!state) throw new Error("Browser server not started"); + await ensureBrowserAvailable(runtime); + + const tabs1 = await listTabs(state.cdpPort); + if (tabs1.length === 0) { + await openTab(state.cdpPort, "about:blank"); + } + + const tabs = await listTabs(state.cdpPort); + const chosen = targetId + ? (() => { + const resolved = resolveTargetIdFromTabs(targetId, tabs); + if (!resolved.ok) { + if (resolved.reason === "ambiguous") return "AMBIGUOUS" as const; + return null; + } + return tabs.find((t) => t.targetId === resolved.targetId) ?? null; + })() + : (tabs.at(0) ?? null); + + if (chosen === "AMBIGUOUS") { + throw new Error("ambiguous target id prefix"); + } + if (!chosen?.wsUrl) throw new Error("tab not found"); + return chosen; +} + export async function startBrowserControlServerFromConfig( runtime: RuntimeEnv = defaultRuntime, ): Promise { @@ -374,6 +407,160 @@ export async function startBrowserControlServerFromConfig( } }); + function mapTabError(err: unknown) { + const msg = String(err); + if (msg.includes("ambiguous target id prefix")) { + return { status: 409, message: "ambiguous target id prefix" }; + } + if (msg.includes("tab not found")) { + return { status: 404, message: "tab not found" }; + } + return null; + } + + app.post("/eval", async (req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + const js = String((req.body as { js?: unknown })?.js ?? "").trim(); + const targetId = String( + (req.body as { targetId?: unknown })?.targetId ?? "", + ).trim(); + const awaitPromise = Boolean((req.body as { await?: unknown })?.await); + + if (!js) return jsonError(res, 400, "js is required"); + + try { + const tab = await ensureTabAvailable(runtime, targetId || undefined); + const evaluated = await evaluateJavaScript({ + wsUrl: tab.wsUrl ?? "", + expression: js, + awaitPromise, + returnByValue: true, + }); + + if (evaluated.exceptionDetails) { + const msg = + evaluated.exceptionDetails.exception?.description || + evaluated.exceptionDetails.text || + "JavaScript evaluation failed"; + return jsonError(res, 400, msg); + } + + res.json({ + ok: true, + targetId: tab.targetId, + url: tab.url, + result: evaluated.result, + }); + } catch (err) { + const mapped = mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.get("/query", async (req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + const selector = + typeof req.query.selector === "string" ? req.query.selector.trim() : ""; + const targetId = + typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; + const limit = + typeof req.query.limit === "string" ? Number(req.query.limit) : undefined; + + if (!selector) return jsonError(res, 400, "selector is required"); + + try { + const tab = await ensureTabAvailable(runtime, targetId || undefined); + const result = await querySelector({ + wsUrl: tab.wsUrl ?? "", + selector, + limit, + }); + res.json({ ok: true, targetId: tab.targetId, url: tab.url, ...result }); + } catch (err) { + const mapped = mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.get("/dom", async (req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + const targetId = + typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; + const format = req.query.format === "text" ? "text" : "html"; + const selector = + typeof req.query.selector === "string" ? req.query.selector.trim() : ""; + const maxChars = + typeof req.query.maxChars === "string" + ? Number(req.query.maxChars) + : undefined; + + try { + const tab = await ensureTabAvailable(runtime, targetId || undefined); + const result = await getDomText({ + wsUrl: tab.wsUrl ?? "", + format, + maxChars, + selector: selector || undefined, + }); + res.json({ + ok: true, + targetId: tab.targetId, + url: tab.url, + format, + ...result, + }); + } catch (err) { + const mapped = mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.get("/snapshot", async (req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + const targetId = + typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; + const format = req.query.format === "domSnapshot" ? "domSnapshot" : "aria"; + const limit = + typeof req.query.limit === "string" ? Number(req.query.limit) : undefined; + + try { + const tab = await ensureTabAvailable(runtime, targetId || undefined); + + if (format === "aria") { + const snap = await snapshotAria({ + wsUrl: tab.wsUrl ?? "", + limit, + }); + return res.json({ + ok: true, + format, + targetId: tab.targetId, + url: tab.url, + ...snap, + }); + } + + const snap = await snapshotDom({ + wsUrl: tab.wsUrl ?? "", + limit, + }); + return res.json({ + ok: true, + format, + targetId: tab.targetId, + url: tab.url, + ...snap, + }); + } catch (err) { + const mapped = mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + const port = resolved.controlPort; const server = await new Promise((resolve, reject) => { const s = app.listen(port, "127.0.0.1", () => resolve(s)); diff --git a/src/cli/program.ts b/src/cli/program.ts index a0084284c..ddf0993a4 100644 --- a/src/cli/program.ts +++ b/src/cli/program.ts @@ -2,9 +2,13 @@ import chalk from "chalk"; import { Command } from "commander"; import { browserCloseTab, + browserDom, + browserEval, browserFocusTab, browserOpenTab, + browserQuery, browserScreenshot, + browserSnapshot, browserStart, browserStatus, browserStop, @@ -433,6 +437,10 @@ Examples: clawdis browser open https://example.com clawdis browser screenshot # emits MEDIA: clawdis browser screenshot --full-page + clawdis browser eval "location.href" + clawdis browser query "a" --limit 5 + clawdis browser dom --format text --max-chars 5000 + clawdis browser snapshot --format aria --limit 200 `, ) .action(() => { @@ -628,5 +636,231 @@ Examples: } }); + browser + .command("eval") + .description("Evaluate JavaScript in the page context") + .argument("[js]", "JavaScript expression (or use --js-file/--js-stdin)") + .option("--target-id ", "CDP target id (or unique prefix)") + .option("--await", "Await promises (Runtime.evaluate awaitPromise)", false) + .option("--js-file ", "Read JavaScript from a file") + .option("--js-stdin", "Read JavaScript from stdin", false) + .action(async (jsArg: string | undefined, opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + + let js = jsArg?.trim() ?? ""; + if (opts.jsFile && opts.jsStdin) { + defaultRuntime.error(danger("Use either --js-file or --js-stdin.")); + defaultRuntime.exit(2); + return; + } + if (opts.jsFile) { + const fs = await import("node:fs/promises"); + js = await fs.readFile(opts.jsFile, "utf8"); + } else if (opts.jsStdin) { + js = await new Promise((resolve, reject) => { + let buf = ""; + process.stdin.setEncoding("utf8"); + process.stdin.on("data", (c) => { + buf += c; + }); + process.stdin.on("end", () => resolve(buf)); + process.stdin.on("error", (e) => reject(e)); + }); + } + + if (!js.trim()) { + defaultRuntime.error( + danger("Missing JavaScript. Pass or use --js-file/--js-stdin."), + ); + defaultRuntime.exit(2); + return; + } + + try { + const result = await browserEval(baseUrl, { + js, + targetId: opts.targetId?.trim() || undefined, + awaitPromise: Boolean(opts.await), + }); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + const v = result.result; + if (Object.hasOwn(v, "value")) { + const value = (v as { value?: unknown }).value; + defaultRuntime.log( + typeof value === "string" ? value : JSON.stringify(value, null, 2), + ); + return; + } + defaultRuntime.log(v.description ?? JSON.stringify(v, null, 2)); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("query") + .description("Query elements by CSS selector") + .argument("", "CSS selector") + .option("--target-id ", "CDP target id (or unique prefix)") + .option("--limit ", "Max matches (default: 20)", (v: string) => + Number(v), + ) + .option( + "--format ", + "Text output format (default: text)", + "text", + ) + .action(async (selector: string, opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const result = await browserQuery(baseUrl, { + selector, + targetId: opts.targetId?.trim() || undefined, + limit: Number.isFinite(opts.limit) ? opts.limit : undefined, + }); + if (parent?.json || opts.format === "json") { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + if (!result.matches.length) { + defaultRuntime.log("No matches."); + return; + } + defaultRuntime.log( + result.matches + .map((m) => { + const id = m.id ? `#${m.id}` : ""; + const cls = m.className + ? `.${m.className + .split(/\s+/) + .filter(Boolean) + .slice(0, 3) + .join(".")}` + : ""; + const head = `${m.index}. <${m.tag}${id}${cls}>`; + const text = m.text ? `\n ${m.text}` : ""; + return `${head}${text}`; + }) + .join("\n"), + ); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("dom") + .description("Dump DOM (html or text) with truncation") + .option("--format ", "Output format (default: html)", "html") + .option("--target-id ", "CDP target id (or unique prefix)") + .option("--selector ", "Optional CSS selector to scope the dump") + .option( + "--max-chars ", + "Max characters (default: 200000)", + (v: string) => Number(v), + ) + .option("--out ", "Write output to a file") + .action(async (opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + const format = opts.format === "text" ? "text" : "html"; + try { + const result = await browserDom(baseUrl, { + format, + targetId: opts.targetId?.trim() || undefined, + maxChars: Number.isFinite(opts.maxChars) ? opts.maxChars : undefined, + selector: opts.selector?.trim() || undefined, + }); + if (opts.out) { + const fs = await import("node:fs/promises"); + await fs.writeFile(opts.out, result.text, "utf8"); + if (parent?.json) { + defaultRuntime.log( + JSON.stringify({ ok: true, out: opts.out }, null, 2), + ); + } else { + defaultRuntime.log(opts.out); + } + return; + } + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + defaultRuntime.log(result.text); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("snapshot") + .description("Capture an AI-friendly snapshot (aria or domSnapshot)") + .option( + "--format ", + "Snapshot format (default: aria)", + "aria", + ) + .option("--target-id ", "CDP target id (or unique prefix)") + .option("--limit ", "Max nodes (default: 500/800)", (v: string) => + Number(v), + ) + .option("--out ", "Write JSON snapshot to a file") + .action(async (opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + const format = opts.format === "domSnapshot" ? "domSnapshot" : "aria"; + try { + const result = await browserSnapshot(baseUrl, { + format, + targetId: opts.targetId?.trim() || undefined, + limit: Number.isFinite(opts.limit) ? opts.limit : undefined, + }); + + const payload = JSON.stringify(result, null, 2); + if (opts.out) { + const fs = await import("node:fs/promises"); + await fs.writeFile(opts.out, payload, "utf8"); + if (parent?.json) { + defaultRuntime.log( + JSON.stringify({ ok: true, out: opts.out }, null, 2), + ); + } else { + defaultRuntime.log(opts.out); + } + return; + } + + if (parent?.json || format === "domSnapshot") { + defaultRuntime.log(payload); + return; + } + + // aria text rendering + const nodes = "nodes" in result ? result.nodes : []; + defaultRuntime.log( + nodes + .map((n) => { + const indent = " ".repeat(Math.min(20, n.depth)); + const name = n.name ? ` "${n.name}"` : ""; + const value = n.value ? ` = "${n.value}"` : ""; + return `${indent}- ${n.role}${name}${value}`; + }) + .join("\n"), + ); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + return program; }