feat(browser): add DOM inspection commands

This commit is contained in:
Peter Steinberger
2025-12-13 18:32:29 +00:00
parent 3b853b329f
commit 7b675864a8
10 changed files with 1320 additions and 82 deletions

View File

@@ -1,5 +1,5 @@
{
"originHash" : "5de6834e5cb92c45c61a2e6792b780ac231c5741def70f1efa9ec857fa12f8cb",
"originHash" : "d8a19a95c479a3c7cb20aded07bd18cfeda5d85b95284983da83dbee7c941e5c",
"pins" : [
{
"identity" : "eventsource",
@@ -69,8 +69,8 @@
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-configuration",
"state" : {
"branch" : "main",
"revision" : "3528deb75256d7dcbb0d71fa75077caae0a8c749"
"revision" : "3528deb75256d7dcbb0d71fa75077caae0a8c749",
"version" : "1.0.0"
}
},
{

View File

@@ -20,6 +20,15 @@ enum BrowserCLI {
var overrideURL: String?
var fullPage = false
var targetId: String?
var awaitPromise = false
var js: String?
var jsFile: String?
var jsStdin = false
var selector: String?
var format: String?
var limit: Int?
var maxChars: Int?
var outPath: String?
var rest: [String] = []
while !args.isEmpty {
@@ -31,6 +40,24 @@ enum BrowserCLI {
fullPage = true
case "--target-id":
targetId = args.popFirst()
case "--await":
awaitPromise = true
case "--js":
js = args.popFirst()
case "--js-file":
jsFile = args.popFirst()
case "--js-stdin":
jsStdin = true
case "--selector":
selector = args.popFirst()
case "--format":
format = args.popFirst()
case "--limit":
limit = args.popFirst().flatMap(Int.init)
case "--max-chars":
maxChars = args.popFirst().flatMap(Int.init)
case "--out":
outPath = args.popFirst()
default:
rest.append(arg)
}
@@ -145,6 +172,133 @@ enum BrowserCLI {
}
return 0
case "eval":
if jsStdin, jsFile != nil {
self.printHelp()
return 2
}
let code: String = try {
if let jsFile, !jsFile.isEmpty {
return try String(contentsOfFile: jsFile, encoding: .utf8)
}
if jsStdin {
let data = FileHandle.standardInput.readDataToEndOfFile()
return String(data: data, encoding: .utf8) ?? ""
}
if let js, !js.isEmpty { return js }
if !rest.isEmpty { return rest.joined(separator: " ") }
return ""
}()
if code.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
self.printHelp()
return 2
}
let res = try await self.httpJSON(
method: "POST",
url: baseURL.appendingPathComponent("/eval"),
body: [
"js": code,
"targetId": targetId ?? "",
"await": awaitPromise,
],
timeoutInterval: 15.0)
if jsonOutput {
self.printJSON(ok: true, result: res)
} else {
self.printEval(res: res)
}
return 0
case "query":
let sel = (selector ?? rest.first ?? "").trimmingCharacters(in: .whitespacesAndNewlines)
if sel.isEmpty {
self.printHelp()
return 2
}
var url = baseURL.appendingPathComponent("/query")
var items: [URLQueryItem] = [URLQueryItem(name: "selector", value: sel)]
if let targetId, !targetId.isEmpty {
items.append(URLQueryItem(name: "targetId", value: targetId))
}
if let limit, limit > 0 {
items.append(URLQueryItem(name: "limit", value: String(limit)))
}
url = self.withQuery(url, items: items)
let res = try await self.httpJSON(method: "GET", url: url, timeoutInterval: 15.0)
if jsonOutput || format == "json" {
self.printJSON(ok: true, result: res)
} else {
self.printQuery(res: res)
}
return 0
case "dom":
let fmt = (format == "text") ? "text" : "html"
var url = baseURL.appendingPathComponent("/dom")
var items: [URLQueryItem] = [URLQueryItem(name: "format", value: fmt)]
if let targetId, !targetId.isEmpty {
items.append(URLQueryItem(name: "targetId", value: targetId))
}
if let selector = selector?.trimmingCharacters(in: .whitespacesAndNewlines), !selector.isEmpty {
items.append(URLQueryItem(name: "selector", value: selector))
}
if let maxChars, maxChars > 0 {
items.append(URLQueryItem(name: "maxChars", value: String(maxChars)))
}
url = self.withQuery(url, items: items)
let res = try await self.httpJSON(method: "GET", url: url, timeoutInterval: 20.0)
let text = (res["text"] as? String) ?? ""
if let out = outPath, !out.isEmpty {
try Data(text.utf8).write(to: URL(fileURLWithPath: out))
if jsonOutput {
self.printJSON(ok: true, result: ["ok": true, "out": out])
} else {
print(out)
}
return 0
}
if jsonOutput {
self.printJSON(ok: true, result: res)
} else {
print(text)
}
return 0
case "snapshot":
let fmt = (format == "domSnapshot") ? "domSnapshot" : "aria"
var url = baseURL.appendingPathComponent("/snapshot")
var items: [URLQueryItem] = [URLQueryItem(name: "format", value: fmt)]
if let targetId, !targetId.isEmpty {
items.append(URLQueryItem(name: "targetId", value: targetId))
}
if let limit, limit > 0 {
items.append(URLQueryItem(name: "limit", value: String(limit)))
}
url = self.withQuery(url, items: items)
let res = try await self.httpJSON(method: "GET", url: url, timeoutInterval: 20.0)
if let out = outPath, !out.isEmpty {
let data = try JSONSerialization.data(withJSONObject: res, options: [.prettyPrinted])
try data.write(to: URL(fileURLWithPath: out))
if jsonOutput {
self.printJSON(ok: true, result: ["ok": true, "out": out])
} else {
print(out)
}
return 0
}
if jsonOutput || fmt == "domSnapshot" {
self.printJSON(ok: true, result: res)
} else {
self.printSnapshotAria(res: res)
}
return 0
default:
self.printHelp()
return 2
@@ -295,6 +449,74 @@ enum BrowserCLI {
}
}
private static func printEval(res: [String: Any]) {
guard let obj = res["result"] as? [String: Any] else {
self.printResult(jsonOutput: false, res: res)
return
}
if let value = obj["value"] {
if JSONSerialization.isValidJSONObject(value),
let data = try? JSONSerialization.data(withJSONObject: value, options: [.prettyPrinted]),
let text = String(data: data, encoding: .utf8)
{
print(text)
} else {
print(String(describing: value))
}
return
}
if let desc = obj["description"] as? String, !desc.isEmpty {
print(desc)
return
}
self.printResult(jsonOutput: false, res: obj)
}
private static func printQuery(res: [String: Any]) {
guard let matches = res["matches"] as? [[String: Any]] else {
self.printResult(jsonOutput: false, res: res)
return
}
if matches.isEmpty {
print("No matches.")
return
}
for m in matches {
let index = (m["index"] as? Int) ?? 0
let tag = (m["tag"] as? String) ?? ""
let id = (m["id"] as? String).map { "#\($0)" } ?? ""
let className = (m["className"] as? String) ?? ""
let classes = className.split(separator: " ").prefix(3).map(String.init)
let cls = classes.isEmpty ? "" : "." + classes.joined(separator: ".")
let head = "\(index). <\(tag)\(id)\(cls)>"
print(head)
if let text = m["text"] as? String, !text.isEmpty {
print(" \(text)")
}
}
}
private static func printSnapshotAria(res: [String: Any]) {
guard let nodes = res["nodes"] as? [[String: Any]] else {
self.printResult(jsonOutput: false, res: res)
return
}
for n in nodes {
let depth = (n["depth"] as? Int) ?? 0
let role = (n["role"] as? String) ?? "unknown"
let name = (n["name"] as? String) ?? ""
let value = (n["value"] as? String) ?? ""
let indent = String(repeating: " ", count: min(depth, 20))
var line = "\(indent)- \(role)"
if !name.isEmpty { line += " \"\(name)\"" }
if !value.isEmpty { line += " = \"\(value)\"" }
print(line)
}
}
#if SWIFT_PACKAGE
static func _testFormatTabs(res: [String: Any]) -> [String] {
self.formatTabs(res: res)
@@ -325,6 +547,14 @@ enum BrowserCLI {
clawdis-mac browser focus <targetId> [--url <...>]
clawdis-mac browser close <targetId> [--url <...>]
clawdis-mac browser screenshot [--target-id <id>] [--full-page] [--url <...>]
clawdis-mac browser eval [<js>] [--js <js>] [--js-file <path>] [--js-stdin]
[--target-id <id>] [--await] [--url <...>]
clawdis-mac browser query <selector> [--limit <n>] [--format <text|json>]
[--target-id <id>] [--url <...>]
clawdis-mac browser dom [--format <html|text>] [--selector <css>] [--max-chars <n>]
[--out <path>] [--target-id <id>] [--url <...>]
clawdis-mac browser snapshot [--format <aria|domSnapshot>] [--limit <n>] [--out <path>]
[--target-id <id>] [--url <...>]
Notes:
- Config defaults come from ~/.clawdis/clawdis.json (browser.enabled, browser.controlUrl).

View File

@@ -412,7 +412,7 @@ struct ClawdisCLI {
clawdis-mac canvas snapshot [--out <path>] [--session <key>]
Browser (clawd):
clawdis-mac browser status|start|stop|tabs|open|focus|close|screenshot
clawdis-mac browser status|start|stop|tabs|open|focus|close|screenshot|eval|query|dom|snapshot
Browser notes:
- Uses clawds dedicated Chrome/Chromium profile (separate user-data dir).
@@ -426,6 +426,10 @@ struct ClawdisCLI {
clawdis-mac browser open https://example.com
clawdis-mac browser tabs
clawdis-mac browser screenshot --full-page
clawdis-mac browser eval \"location.href\"
clawdis-mac browser query \"a\" --limit 5
clawdis-mac browser dom --format text --max-chars 5000
clawdis-mac browser snapshot --format aria --limit 200
Output:
Default output is text. Use --json for machine-readable output.

View File

@@ -35,3 +35,4 @@ read_when:
- For MCPs, mcporter writes to the home-scope config; re-run installs if you rotate tokens.
- Keep heartbeats enabled so the assistant can schedule reminders, monitor inboxes, and trigger camera captures.
- For browser-driven verification, use `clawdis browser` (tabs/status/screenshot) with the clawd-managed Chrome profile.
- For DOM inspection, use `clawdis browser eval|query|dom|snapshot` (and `--json`/`--out` when you need machine output).

View File

@@ -109,9 +109,20 @@ Minimum endpoints/methods (names illustrative):
- `browser.screenshot`
- params: `{ targetId?, fullPage?: false }` → returns a `MEDIA:` attachment URL (via the existing Clawdis media host)
DOM + inspection (v1):
- `browser.eval`
- params: `{ js, targetId?, await?: false }` → returns the CDP `Runtime.evaluate` result (best-effort `returnByValue`)
- `browser.query`
- params: `{ selector, targetId?, limit? }` → returns basic element summaries (tag/id/class/text/value/href/outerHTML)
- `browser.dom`
- params: `{ format: "html"|"text", targetId?, selector?, maxChars? }` → returns a truncated dump (`text` field)
- `browser.snapshot`
- params: `{ format: "aria"|"domSnapshot", targetId?, limit? }`
- `aria`: simplified Accessibility tree with `backendDOMNodeId` when available (future click/type hooks)
- `domSnapshot`: lightweight DOM walk snapshot (tree-ish, bounded by `limit`)
Nice-to-have (later):
- `browser.snapshot.aria` (AI-friendly snapshot with stable refs)
- `browser.click` / `browser.type` / `browser.waitFor` helpers built atop snapshot refs
- `browser.click` / `browser.type` / `browser.waitFor` helpers built atop snapshot refs / backend node ids
### “Is it open or closed?”

View File

@@ -3,7 +3,7 @@ import { createServer } from "node:http";
import { afterEach, describe, expect, it } from "vitest";
import { WebSocketServer } from "ws";
import { createTargetViaCdp } from "./cdp.js";
import { createTargetViaCdp, evaluateJavaScript, snapshotAria } from "./cdp.js";
describe("cdp", () => {
let httpServer: ReturnType<typeof createServer> | null = null;
@@ -70,4 +70,93 @@ describe("cdp", () => {
expect(created.targetId).toBe("TARGET_123");
});
it("evaluates javascript via CDP", async () => {
wsServer = new WebSocketServer({ port: 0, host: "127.0.0.1" });
await new Promise<void>((resolve) => wsServer?.once("listening", resolve));
const wsPort = (wsServer.address() as { port: number }).port;
wsServer.on("connection", (socket) => {
socket.on("message", (data) => {
const msg = JSON.parse(String(data)) as {
id?: number;
method?: string;
params?: { expression?: string };
};
if (msg.method === "Runtime.enable") {
socket.send(JSON.stringify({ id: msg.id, result: {} }));
return;
}
if (msg.method === "Runtime.evaluate") {
expect(msg.params?.expression).toBe("1+1");
socket.send(
JSON.stringify({
id: msg.id,
result: { result: { type: "number", value: 2 } },
}),
);
}
});
});
const res = await evaluateJavaScript({
wsUrl: `ws://127.0.0.1:${wsPort}`,
expression: "1+1",
});
expect(res.result.type).toBe("number");
expect(res.result.value).toBe(2);
});
it("captures an aria snapshot via CDP", async () => {
wsServer = new WebSocketServer({ port: 0, host: "127.0.0.1" });
await new Promise<void>((resolve) => wsServer?.once("listening", resolve));
const wsPort = (wsServer.address() as { port: number }).port;
wsServer.on("connection", (socket) => {
socket.on("message", (data) => {
const msg = JSON.parse(String(data)) as {
id?: number;
method?: string;
};
if (msg.method === "Accessibility.enable") {
socket.send(JSON.stringify({ id: msg.id, result: {} }));
return;
}
if (msg.method === "Accessibility.getFullAXTree") {
socket.send(
JSON.stringify({
id: msg.id,
result: {
nodes: [
{
nodeId: "1",
role: { value: "RootWebArea" },
name: { value: "" },
childIds: ["2"],
},
{
nodeId: "2",
role: { value: "button" },
name: { value: "OK" },
backendDOMNodeId: 42,
childIds: [],
},
],
},
}),
);
return;
}
});
});
const snap = await snapshotAria({ wsUrl: `ws://127.0.0.1:${wsPort}` });
expect(snap.nodes.length).toBe(2);
expect(snap.nodes[0]?.role).toBe("RootWebArea");
expect(snap.nodes[1]?.role).toBe("button");
expect(snap.nodes[1]?.name).toBe("OK");
expect(snap.nodes[1]?.backendDOMNodeId).toBe(42);
expect(snap.nodes[1]?.depth).toBe(1);
});
});

View File

@@ -78,6 +78,34 @@ async function fetchJson<T>(url: string, timeoutMs = 1500): Promise<T> {
}
}
async function withCdpSocket<T>(
wsUrl: string,
fn: (send: CdpSendFn) => Promise<T>,
): Promise<T> {
const ws = new WebSocket(wsUrl, { handshakeTimeout: 5000 });
const { send, closeWithError } = createCdpSender(ws);
const openPromise = new Promise<void>((resolve, reject) => {
ws.once("open", () => resolve());
ws.once("error", (err) => reject(err));
});
await openPromise;
try {
return await fn(send);
} catch (err) {
closeWithError(err instanceof Error ? err : new Error(String(err)));
throw err;
} finally {
try {
ws.close();
} catch {
// ignore
}
}
}
export async function captureScreenshotPng(opts: {
wsUrl: string;
fullPage?: boolean;
@@ -95,61 +123,43 @@ export async function captureScreenshot(opts: {
format?: "png" | "jpeg";
quality?: number; // jpeg only (0..100)
}): Promise<Buffer> {
const ws = new WebSocket(opts.wsUrl, { handshakeTimeout: 5000 });
const { send, closeWithError } = createCdpSender(ws);
return await withCdpSocket(opts.wsUrl, async (send) => {
await send("Page.enable");
const openPromise = new Promise<void>((resolve, reject) => {
ws.once("open", () => resolve());
ws.once("error", (err) => reject(err));
});
await openPromise;
await send("Page.enable");
let clip:
| { x: number; y: number; width: number; height: number; scale: number }
| undefined;
if (opts.fullPage) {
const metrics = (await send("Page.getLayoutMetrics")) as {
cssContentSize?: { width?: number; height?: number };
contentSize?: { width?: number; height?: number };
};
const size = metrics?.cssContentSize ?? metrics?.contentSize;
const width = Number(size?.width ?? 0);
const height = Number(size?.height ?? 0);
if (width > 0 && height > 0) {
clip = { x: 0, y: 0, width, height, scale: 1 };
let clip:
| { x: number; y: number; width: number; height: number; scale: number }
| undefined;
if (opts.fullPage) {
const metrics = (await send("Page.getLayoutMetrics")) as {
cssContentSize?: { width?: number; height?: number };
contentSize?: { width?: number; height?: number };
};
const size = metrics?.cssContentSize ?? metrics?.contentSize;
const width = Number(size?.width ?? 0);
const height = Number(size?.height ?? 0);
if (width > 0 && height > 0) {
clip = { x: 0, y: 0, width, height, scale: 1 };
}
}
}
const format = opts.format ?? "png";
const quality =
format === "jpeg"
? Math.max(0, Math.min(100, Math.round(opts.quality ?? 85)))
: undefined;
const format = opts.format ?? "png";
const quality =
format === "jpeg"
? Math.max(0, Math.min(100, Math.round(opts.quality ?? 85)))
: undefined;
const result = (await send("Page.captureScreenshot", {
format,
...(quality !== undefined ? { quality } : {}),
fromSurface: true,
captureBeyondViewport: true,
...(clip ? { clip } : {}),
})) as { data?: string };
const result = (await send("Page.captureScreenshot", {
format,
...(quality !== undefined ? { quality } : {}),
fromSurface: true,
captureBeyondViewport: true,
...(clip ? { clip } : {}),
})) as { data?: string };
const base64 = result?.data;
if (!base64) {
closeWithError(new Error("Screenshot failed: missing data"));
throw new Error("Screenshot failed: missing data");
}
try {
ws.close();
} catch {
// ignore
}
return Buffer.from(base64, "base64");
const base64 = result?.data;
if (!base64) throw new Error("Screenshot failed: missing data");
return Buffer.from(base64, "base64");
});
}
export async function createTargetViaCdp(opts: {
@@ -163,30 +173,348 @@ export async function createTargetViaCdp(opts: {
const wsUrl = String(version?.webSocketDebuggerUrl ?? "").trim();
if (!wsUrl) throw new Error("CDP /json/version missing webSocketDebuggerUrl");
const ws = new WebSocket(wsUrl, { handshakeTimeout: 5000 });
const { send, closeWithError } = createCdpSender(ws);
const openPromise = new Promise<void>((resolve, reject) => {
ws.once("open", () => resolve());
ws.once("error", (err) => reject(err));
return await withCdpSocket(wsUrl, async (send) => {
const created = (await send("Target.createTarget", { url: opts.url })) as {
targetId?: string;
};
const targetId = String(created?.targetId ?? "").trim();
if (!targetId)
throw new Error("CDP Target.createTarget returned no targetId");
return { targetId };
});
await openPromise;
const created = (await send("Target.createTarget", { url: opts.url })) as {
targetId?: string;
};
const targetId = String(created?.targetId ?? "").trim();
if (!targetId) {
closeWithError(new Error("CDP Target.createTarget returned no targetId"));
throw new Error("CDP Target.createTarget returned no targetId");
}
try {
ws.close();
} catch {
// ignore
}
return { targetId };
}
export type CdpRemoteObject = {
type: string;
subtype?: string;
value?: unknown;
description?: string;
unserializableValue?: string;
preview?: unknown;
};
export type CdpExceptionDetails = {
text?: string;
lineNumber?: number;
columnNumber?: number;
exception?: CdpRemoteObject;
stackTrace?: unknown;
};
export async function evaluateJavaScript(opts: {
wsUrl: string;
expression: string;
awaitPromise?: boolean;
returnByValue?: boolean;
}): Promise<{
result: CdpRemoteObject;
exceptionDetails?: CdpExceptionDetails;
}> {
return await withCdpSocket(opts.wsUrl, async (send) => {
await send("Runtime.enable").catch(() => {});
const evaluated = (await send("Runtime.evaluate", {
expression: opts.expression,
awaitPromise: Boolean(opts.awaitPromise),
returnByValue: opts.returnByValue ?? true,
userGesture: true,
includeCommandLineAPI: true,
})) as {
result?: CdpRemoteObject;
exceptionDetails?: CdpExceptionDetails;
};
const result = evaluated?.result;
if (!result) throw new Error("CDP Runtime.evaluate returned no result");
return { result, exceptionDetails: evaluated.exceptionDetails };
});
}
export type AriaSnapshotNode = {
ref: string;
role: string;
name: string;
value?: string;
description?: string;
backendDOMNodeId?: number;
depth: number;
};
type RawAXNode = {
nodeId?: string;
role?: { value?: string };
name?: { value?: string };
value?: { value?: string };
description?: { value?: string };
childIds?: string[];
backendDOMNodeId?: number;
};
function axValue(v: unknown): string {
if (!v || typeof v !== "object") return "";
const value = (v as { value?: unknown }).value;
return typeof value === "string" ? value : String(value ?? "");
}
function formatAriaSnapshot(
nodes: RawAXNode[],
limit: number,
): AriaSnapshotNode[] {
const byId = new Map<string, RawAXNode>();
for (const n of nodes) {
if (n.nodeId) byId.set(n.nodeId, n);
}
// Heuristic: pick a root-ish node (one that is not referenced as a child), else first.
const referenced = new Set<string>();
for (const n of nodes) {
for (const c of n.childIds ?? []) referenced.add(c);
}
const root =
nodes.find((n) => n.nodeId && !referenced.has(n.nodeId)) ?? nodes[0];
if (!root?.nodeId) return [];
const out: AriaSnapshotNode[] = [];
const stack: Array<{ id: string; depth: number }> = [
{ id: root.nodeId, depth: 0 },
];
while (stack.length && out.length < limit) {
const popped = stack.pop();
if (!popped) break;
const { id, depth } = popped;
const n = byId.get(id);
if (!n) continue;
const role = axValue(n.role);
const name = axValue(n.name);
const value = axValue(n.value);
const description = axValue(n.description);
const ref = `ax${out.length + 1}`;
out.push({
ref,
role: role || "unknown",
name: name || "",
...(value ? { value } : {}),
...(description ? { description } : {}),
...(typeof n.backendDOMNodeId === "number"
? { backendDOMNodeId: n.backendDOMNodeId }
: {}),
depth,
});
const children = (n.childIds ?? []).filter((c) => byId.has(c));
for (let i = children.length - 1; i >= 0; i--) {
const child = children[i];
if (child) stack.push({ id: child, depth: depth + 1 });
}
}
return out;
}
export async function snapshotAria(opts: {
wsUrl: string;
limit?: number;
}): Promise<{ nodes: AriaSnapshotNode[] }> {
const limit = Math.max(1, Math.min(2000, Math.floor(opts.limit ?? 500)));
return await withCdpSocket(opts.wsUrl, async (send) => {
await send("Accessibility.enable").catch(() => {});
const res = (await send("Accessibility.getFullAXTree")) as {
nodes?: RawAXNode[];
};
const nodes = Array.isArray(res?.nodes) ? res.nodes : [];
return { nodes: formatAriaSnapshot(nodes, limit) };
});
}
export async function snapshotDom(opts: {
wsUrl: string;
limit?: number;
maxTextChars?: number;
}): Promise<{
nodes: DomSnapshotNode[];
}> {
const limit = Math.max(1, Math.min(5000, Math.floor(opts.limit ?? 800)));
const maxTextChars = Math.max(
0,
Math.min(5000, Math.floor(opts.maxTextChars ?? 220)),
);
const expression = `(() => {
const maxNodes = ${JSON.stringify(limit)};
const maxText = ${JSON.stringify(maxTextChars)};
const nodes = [];
const root = document.documentElement;
if (!root) return { nodes };
const stack = [{ el: root, depth: 0, parentRef: null }];
while (stack.length && nodes.length < maxNodes) {
const cur = stack.pop();
const el = cur.el;
if (!el || el.nodeType !== 1) continue;
const ref = "n" + String(nodes.length + 1);
const tag = (el.tagName || "").toLowerCase();
const id = el.id ? String(el.id) : undefined;
const className = el.className ? String(el.className).slice(0, 300) : undefined;
const role = el.getAttribute && el.getAttribute("role") ? String(el.getAttribute("role")) : undefined;
const name = el.getAttribute && el.getAttribute("aria-label") ? String(el.getAttribute("aria-label")) : undefined;
let text = "";
try { text = String(el.innerText || "").trim(); } catch {}
if (maxText && text.length > maxText) text = text.slice(0, maxText) + "…";
const href = (el.href !== undefined && el.href !== null) ? String(el.href) : undefined;
const type = (el.type !== undefined && el.type !== null) ? String(el.type) : undefined;
const value = (el.value !== undefined && el.value !== null) ? String(el.value).slice(0, 500) : undefined;
nodes.push({
ref,
parentRef: cur.parentRef,
depth: cur.depth,
tag,
...(id ? { id } : {}),
...(className ? { className } : {}),
...(role ? { role } : {}),
...(name ? { name } : {}),
...(text ? { text } : {}),
...(href ? { href } : {}),
...(type ? { type } : {}),
...(value ? { value } : {}),
});
const children = el.children ? Array.from(el.children) : [];
for (let i = children.length - 1; i >= 0; i--) {
stack.push({ el: children[i], depth: cur.depth + 1, parentRef: ref });
}
}
return { nodes };
})()`;
const evaluated = await evaluateJavaScript({
wsUrl: opts.wsUrl,
expression,
awaitPromise: true,
returnByValue: true,
});
const value = evaluated.result?.value as unknown;
if (!value || typeof value !== "object") return { nodes: [] };
const nodes = (value as { nodes?: unknown }).nodes;
return { nodes: Array.isArray(nodes) ? (nodes as DomSnapshotNode[]) : [] };
}
export type DomSnapshotNode = {
ref: string;
parentRef: string | null;
depth: number;
tag: string;
id?: string;
className?: string;
role?: string;
name?: string;
text?: string;
href?: string;
type?: string;
value?: string;
};
export async function getDomText(opts: {
wsUrl: string;
format: "html" | "text";
maxChars?: number;
selector?: string;
}): Promise<{ text: string }> {
const maxChars = Math.max(
0,
Math.min(5_000_000, Math.floor(opts.maxChars ?? 200_000)),
);
const selectorExpr = opts.selector ? JSON.stringify(opts.selector) : "null";
const expression = `(() => {
const fmt = ${JSON.stringify(opts.format)};
const max = ${JSON.stringify(maxChars)};
const sel = ${selectorExpr};
const pick = sel ? document.querySelector(sel) : null;
let out = "";
if (fmt === "text") {
const el = pick || document.body || document.documentElement;
try { out = String(el && el.innerText ? el.innerText : ""); } catch { out = ""; }
} else {
const el = pick || document.documentElement;
try { out = String(el && el.outerHTML ? el.outerHTML : ""); } catch { out = ""; }
}
if (max && out.length > max) out = out.slice(0, max) + "\\n<!-- …truncated… -->";
return out;
})()`;
const evaluated = await evaluateJavaScript({
wsUrl: opts.wsUrl,
expression,
awaitPromise: true,
returnByValue: true,
});
const text = String(evaluated.result?.value ?? "");
return { text };
}
export async function querySelector(opts: {
wsUrl: string;
selector: string;
limit?: number;
maxTextChars?: number;
maxHtmlChars?: number;
}): Promise<{
matches: QueryMatch[];
}> {
const limit = Math.max(1, Math.min(200, Math.floor(opts.limit ?? 20)));
const maxText = Math.max(
0,
Math.min(5000, Math.floor(opts.maxTextChars ?? 500)),
);
const maxHtml = Math.max(
0,
Math.min(20000, Math.floor(opts.maxHtmlChars ?? 1500)),
);
const expression = `(() => {
const sel = ${JSON.stringify(opts.selector)};
const lim = ${JSON.stringify(limit)};
const maxText = ${JSON.stringify(maxText)};
const maxHtml = ${JSON.stringify(maxHtml)};
const els = Array.from(document.querySelectorAll(sel)).slice(0, lim);
return els.map((el, i) => {
const tag = (el.tagName || "").toLowerCase();
const id = el.id ? String(el.id) : undefined;
const className = el.className ? String(el.className).slice(0, 300) : undefined;
let text = "";
try { text = String(el.innerText || "").trim(); } catch {}
if (maxText && text.length > maxText) text = text.slice(0, maxText) + "…";
const value = (el.value !== undefined && el.value !== null) ? String(el.value).slice(0, 500) : undefined;
const href = (el.href !== undefined && el.href !== null) ? String(el.href) : undefined;
let outerHTML = "";
try { outerHTML = String(el.outerHTML || ""); } catch {}
if (maxHtml && outerHTML.length > maxHtml) outerHTML = outerHTML.slice(0, maxHtml) + "…";
return {
index: i + 1,
tag,
...(id ? { id } : {}),
...(className ? { className } : {}),
...(text ? { text } : {}),
...(value ? { value } : {}),
...(href ? { href } : {}),
...(outerHTML ? { outerHTML } : {}),
};
});
})()`;
const evaluated = await evaluateJavaScript({
wsUrl: opts.wsUrl,
expression,
awaitPromise: true,
returnByValue: true,
});
const matches = evaluated.result?.value;
return { matches: Array.isArray(matches) ? (matches as QueryMatch[]) : [] };
}
export type QueryMatch = {
index: number;
tag: string;
id?: string;
className?: string;
text?: string;
value?: string;
href?: string;
outerHTML?: string;
};

View File

@@ -28,6 +28,83 @@ export type ScreenshotResult = {
url: string;
};
export type EvalResult = {
ok: true;
targetId: string;
url: string;
result: {
type: string;
subtype?: string;
value?: unknown;
description?: string;
unserializableValue?: string;
preview?: unknown;
};
};
export type QueryResult = {
ok: true;
targetId: string;
url: string;
matches: Array<{
index: number;
tag: string;
id?: string;
className?: string;
text?: string;
value?: string;
href?: string;
outerHTML?: string;
}>;
};
export type DomResult = {
ok: true;
targetId: string;
url: string;
format: "html" | "text";
text: string;
};
export type SnapshotAriaNode = {
ref: string;
role: string;
name: string;
value?: string;
description?: string;
backendDOMNodeId?: number;
depth: number;
};
export type SnapshotResult =
| {
ok: true;
format: "aria";
targetId: string;
url: string;
nodes: SnapshotAriaNode[];
}
| {
ok: true;
format: "domSnapshot";
targetId: string;
url: string;
nodes: Array<{
ref: string;
parentRef: string | null;
depth: number;
tag: string;
id?: string;
className?: string;
role?: string;
name?: string;
text?: string;
href?: string;
type?: string;
value?: string;
}>;
};
function unwrapCause(err: unknown): unknown {
if (!err || typeof err !== "object") return null;
const cause = (err as { cause?: unknown }).cause;
@@ -172,3 +249,80 @@ export async function browserScreenshot(
timeoutMs: 20000,
});
}
export async function browserEval(
baseUrl: string,
opts: {
js: string;
targetId?: string;
awaitPromise?: boolean;
},
): Promise<EvalResult> {
return await fetchJson<EvalResult>(`${baseUrl}/eval`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
js: opts.js,
targetId: opts.targetId,
await: Boolean(opts.awaitPromise),
}),
timeoutMs: 15000,
});
}
export async function browserQuery(
baseUrl: string,
opts: {
selector: string;
targetId?: string;
limit?: number;
},
): Promise<QueryResult> {
const q = new URLSearchParams();
q.set("selector", opts.selector);
if (opts.targetId) q.set("targetId", opts.targetId);
if (typeof opts.limit === "number") q.set("limit", String(opts.limit));
return await fetchJson<QueryResult>(`${baseUrl}/query?${q.toString()}`, {
timeoutMs: 15000,
});
}
export async function browserDom(
baseUrl: string,
opts: {
format: "html" | "text";
targetId?: string;
maxChars?: number;
selector?: string;
},
): Promise<DomResult> {
const q = new URLSearchParams();
q.set("format", opts.format);
if (opts.targetId) q.set("targetId", opts.targetId);
if (typeof opts.maxChars === "number")
q.set("maxChars", String(opts.maxChars));
if (opts.selector) q.set("selector", opts.selector);
return await fetchJson<DomResult>(`${baseUrl}/dom?${q.toString()}`, {
timeoutMs: 20000,
});
}
export async function browserSnapshot(
baseUrl: string,
opts: {
format: "aria" | "domSnapshot";
targetId?: string;
limit?: number;
},
): Promise<SnapshotResult> {
const q = new URLSearchParams();
q.set("format", opts.format);
if (opts.targetId) q.set("targetId", opts.targetId);
if (typeof opts.limit === "number") q.set("limit", String(opts.limit));
return await fetchJson<SnapshotResult>(
`${baseUrl}/snapshot?${q.toString()}`,
{
timeoutMs: 20000,
},
);
}

View File

@@ -10,6 +10,11 @@ import {
captureScreenshot,
captureScreenshotPng,
createTargetViaCdp,
evaluateJavaScript,
getDomText,
querySelector,
snapshotAria,
snapshotDom,
} from "./cdp.js";
import {
isChromeReachable,
@@ -178,6 +183,34 @@ async function ensureBrowserAvailable(runtime: RuntimeEnv): Promise<void> {
return;
}
async function ensureTabAvailable(runtime: RuntimeEnv, targetId?: string) {
if (!state) throw new Error("Browser server not started");
await ensureBrowserAvailable(runtime);
const tabs1 = await listTabs(state.cdpPort);
if (tabs1.length === 0) {
await openTab(state.cdpPort, "about:blank");
}
const tabs = await listTabs(state.cdpPort);
const chosen = targetId
? (() => {
const resolved = resolveTargetIdFromTabs(targetId, tabs);
if (!resolved.ok) {
if (resolved.reason === "ambiguous") return "AMBIGUOUS" as const;
return null;
}
return tabs.find((t) => t.targetId === resolved.targetId) ?? null;
})()
: (tabs.at(0) ?? null);
if (chosen === "AMBIGUOUS") {
throw new Error("ambiguous target id prefix");
}
if (!chosen?.wsUrl) throw new Error("tab not found");
return chosen;
}
export async function startBrowserControlServerFromConfig(
runtime: RuntimeEnv = defaultRuntime,
): Promise<BrowserServerState | null> {
@@ -374,6 +407,160 @@ export async function startBrowserControlServerFromConfig(
}
});
function mapTabError(err: unknown) {
const msg = String(err);
if (msg.includes("ambiguous target id prefix")) {
return { status: 409, message: "ambiguous target id prefix" };
}
if (msg.includes("tab not found")) {
return { status: 404, message: "tab not found" };
}
return null;
}
app.post("/eval", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const js = String((req.body as { js?: unknown })?.js ?? "").trim();
const targetId = String(
(req.body as { targetId?: unknown })?.targetId ?? "",
).trim();
const awaitPromise = Boolean((req.body as { await?: unknown })?.await);
if (!js) return jsonError(res, 400, "js is required");
try {
const tab = await ensureTabAvailable(runtime, targetId || undefined);
const evaluated = await evaluateJavaScript({
wsUrl: tab.wsUrl ?? "",
expression: js,
awaitPromise,
returnByValue: true,
});
if (evaluated.exceptionDetails) {
const msg =
evaluated.exceptionDetails.exception?.description ||
evaluated.exceptionDetails.text ||
"JavaScript evaluation failed";
return jsonError(res, 400, msg);
}
res.json({
ok: true,
targetId: tab.targetId,
url: tab.url,
result: evaluated.result,
});
} catch (err) {
const mapped = mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.get("/query", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const selector =
typeof req.query.selector === "string" ? req.query.selector.trim() : "";
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const limit =
typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
if (!selector) return jsonError(res, 400, "selector is required");
try {
const tab = await ensureTabAvailable(runtime, targetId || undefined);
const result = await querySelector({
wsUrl: tab.wsUrl ?? "",
selector,
limit,
});
res.json({ ok: true, targetId: tab.targetId, url: tab.url, ...result });
} catch (err) {
const mapped = mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.get("/dom", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const format = req.query.format === "text" ? "text" : "html";
const selector =
typeof req.query.selector === "string" ? req.query.selector.trim() : "";
const maxChars =
typeof req.query.maxChars === "string"
? Number(req.query.maxChars)
: undefined;
try {
const tab = await ensureTabAvailable(runtime, targetId || undefined);
const result = await getDomText({
wsUrl: tab.wsUrl ?? "",
format,
maxChars,
selector: selector || undefined,
});
res.json({
ok: true,
targetId: tab.targetId,
url: tab.url,
format,
...result,
});
} catch (err) {
const mapped = mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.get("/snapshot", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const format = req.query.format === "domSnapshot" ? "domSnapshot" : "aria";
const limit =
typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
try {
const tab = await ensureTabAvailable(runtime, targetId || undefined);
if (format === "aria") {
const snap = await snapshotAria({
wsUrl: tab.wsUrl ?? "",
limit,
});
return res.json({
ok: true,
format,
targetId: tab.targetId,
url: tab.url,
...snap,
});
}
const snap = await snapshotDom({
wsUrl: tab.wsUrl ?? "",
limit,
});
return res.json({
ok: true,
format,
targetId: tab.targetId,
url: tab.url,
...snap,
});
} catch (err) {
const mapped = mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
const port = resolved.controlPort;
const server = await new Promise<Server>((resolve, reject) => {
const s = app.listen(port, "127.0.0.1", () => resolve(s));

View File

@@ -2,9 +2,13 @@ import chalk from "chalk";
import { Command } from "commander";
import {
browserCloseTab,
browserDom,
browserEval,
browserFocusTab,
browserOpenTab,
browserQuery,
browserScreenshot,
browserSnapshot,
browserStart,
browserStatus,
browserStop,
@@ -433,6 +437,10 @@ Examples:
clawdis browser open https://example.com
clawdis browser screenshot # emits MEDIA:<path>
clawdis browser screenshot <targetId> --full-page
clawdis browser eval "location.href"
clawdis browser query "a" --limit 5
clawdis browser dom --format text --max-chars 5000
clawdis browser snapshot --format aria --limit 200
`,
)
.action(() => {
@@ -628,5 +636,231 @@ Examples:
}
});
browser
.command("eval")
.description("Evaluate JavaScript in the page context")
.argument("[js]", "JavaScript expression (or use --js-file/--js-stdin)")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--await", "Await promises (Runtime.evaluate awaitPromise)", false)
.option("--js-file <path>", "Read JavaScript from a file")
.option("--js-stdin", "Read JavaScript from stdin", false)
.action(async (jsArg: string | undefined, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
let js = jsArg?.trim() ?? "";
if (opts.jsFile && opts.jsStdin) {
defaultRuntime.error(danger("Use either --js-file or --js-stdin."));
defaultRuntime.exit(2);
return;
}
if (opts.jsFile) {
const fs = await import("node:fs/promises");
js = await fs.readFile(opts.jsFile, "utf8");
} else if (opts.jsStdin) {
js = await new Promise<string>((resolve, reject) => {
let buf = "";
process.stdin.setEncoding("utf8");
process.stdin.on("data", (c) => {
buf += c;
});
process.stdin.on("end", () => resolve(buf));
process.stdin.on("error", (e) => reject(e));
});
}
if (!js.trim()) {
defaultRuntime.error(
danger("Missing JavaScript. Pass <js> or use --js-file/--js-stdin."),
);
defaultRuntime.exit(2);
return;
}
try {
const result = await browserEval(baseUrl, {
js,
targetId: opts.targetId?.trim() || undefined,
awaitPromise: Boolean(opts.await),
});
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
const v = result.result;
if (Object.hasOwn(v, "value")) {
const value = (v as { value?: unknown }).value;
defaultRuntime.log(
typeof value === "string" ? value : JSON.stringify(value, null, 2),
);
return;
}
defaultRuntime.log(v.description ?? JSON.stringify(v, null, 2));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("query")
.description("Query elements by CSS selector")
.argument("<selector>", "CSS selector")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--limit <n>", "Max matches (default: 20)", (v: string) =>
Number(v),
)
.option(
"--format <text|json>",
"Text output format (default: text)",
"text",
)
.action(async (selector: string, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const result = await browserQuery(baseUrl, {
selector,
targetId: opts.targetId?.trim() || undefined,
limit: Number.isFinite(opts.limit) ? opts.limit : undefined,
});
if (parent?.json || opts.format === "json") {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
if (!result.matches.length) {
defaultRuntime.log("No matches.");
return;
}
defaultRuntime.log(
result.matches
.map((m) => {
const id = m.id ? `#${m.id}` : "";
const cls = m.className
? `.${m.className
.split(/\s+/)
.filter(Boolean)
.slice(0, 3)
.join(".")}`
: "";
const head = `${m.index}. <${m.tag}${id}${cls}>`;
const text = m.text ? `\n ${m.text}` : "";
return `${head}${text}`;
})
.join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("dom")
.description("Dump DOM (html or text) with truncation")
.option("--format <html|text>", "Output format (default: html)", "html")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--selector <css>", "Optional CSS selector to scope the dump")
.option(
"--max-chars <n>",
"Max characters (default: 200000)",
(v: string) => Number(v),
)
.option("--out <path>", "Write output to a file")
.action(async (opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
const format = opts.format === "text" ? "text" : "html";
try {
const result = await browserDom(baseUrl, {
format,
targetId: opts.targetId?.trim() || undefined,
maxChars: Number.isFinite(opts.maxChars) ? opts.maxChars : undefined,
selector: opts.selector?.trim() || undefined,
});
if (opts.out) {
const fs = await import("node:fs/promises");
await fs.writeFile(opts.out, result.text, "utf8");
if (parent?.json) {
defaultRuntime.log(
JSON.stringify({ ok: true, out: opts.out }, null, 2),
);
} else {
defaultRuntime.log(opts.out);
}
return;
}
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
defaultRuntime.log(result.text);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("snapshot")
.description("Capture an AI-friendly snapshot (aria or domSnapshot)")
.option(
"--format <aria|domSnapshot>",
"Snapshot format (default: aria)",
"aria",
)
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--limit <n>", "Max nodes (default: 500/800)", (v: string) =>
Number(v),
)
.option("--out <path>", "Write JSON snapshot to a file")
.action(async (opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
const format = opts.format === "domSnapshot" ? "domSnapshot" : "aria";
try {
const result = await browserSnapshot(baseUrl, {
format,
targetId: opts.targetId?.trim() || undefined,
limit: Number.isFinite(opts.limit) ? opts.limit : undefined,
});
const payload = JSON.stringify(result, null, 2);
if (opts.out) {
const fs = await import("node:fs/promises");
await fs.writeFile(opts.out, payload, "utf8");
if (parent?.json) {
defaultRuntime.log(
JSON.stringify({ ok: true, out: opts.out }, null, 2),
);
} else {
defaultRuntime.log(opts.out);
}
return;
}
if (parent?.json || format === "domSnapshot") {
defaultRuntime.log(payload);
return;
}
// aria text rendering
const nodes = "nodes" in result ? result.nodes : [];
defaultRuntime.log(
nodes
.map((n) => {
const indent = " ".repeat(Math.min(20, n.depth));
const name = n.name ? ` "${n.name}"` : "";
const value = n.value ? ` = "${n.value}"` : "";
return `${indent}- ${n.role}${name}${value}`;
})
.join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
return program;
}