feat(browser): add MCP tool dispatch

This commit is contained in:
Peter Steinberger
2025-12-19 23:57:26 +00:00
parent 0ac7a93c28
commit fa54950d2e
19 changed files with 2991 additions and 1243 deletions

View File

@@ -21,6 +21,11 @@ export type BrowserTab = {
type?: string;
};
export type BrowserToolResponse = {
ok: true;
[key: string]: unknown;
};
export type ScreenshotResult = {
ok: true;
path: string;
@@ -354,3 +359,23 @@ export async function browserClickRef(
},
);
}
export async function browserTool(
baseUrl: string,
opts: {
name: string;
args?: Record<string, unknown>;
targetId?: string;
},
): Promise<BrowserToolResponse> {
return await fetchJson<BrowserToolResponse>(`${baseUrl}/tool`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
name: opts.name,
args: opts.args ?? {},
targetId: opts.targetId,
}),
timeoutMs: 20000,
});
}

View File

@@ -28,11 +28,13 @@ function createPage(opts: {
};
const click = vi.fn().mockResolvedValue(undefined);
const locator = vi.fn().mockReturnValue({ click });
const dblclick = vi.fn().mockResolvedValue(undefined);
const locator = vi.fn().mockReturnValue({ click, dblclick });
const page = {
context: () => context,
locator,
on: vi.fn(),
...(opts.hasSnapshotForAI === false
? {}
: {
@@ -48,6 +50,7 @@ function createPage(opts: {
function createBrowser(pages: unknown[]) {
const ctx = {
pages: () => pages,
on: vi.fn(),
};
return {
contexts: () => [ctx],

View File

@@ -1,153 +1,47 @@
import type { Browser, Page } from "playwright-core";
import { chromium } from "playwright-core";
export {
type BrowserConsoleMessage,
type BrowserNetworkRequest,
closePlaywrightBrowserConnection,
ensurePageState,
getPageForTargetId,
refLocator,
type WithSnapshotForAI,
} from "./pw-session.js";
type SnapshotForAIResult = { full: string; incremental?: string };
type SnapshotForAIOptions = { timeout?: number; track?: string };
export {
clickRefViaPlaywright,
clickViaPlaywright,
closePageViaPlaywright,
dragViaPlaywright,
evaluateViaPlaywright,
fileUploadViaPlaywright,
fillFormViaPlaywright,
handleDialogViaPlaywright,
hoverViaPlaywright,
navigateBackViaPlaywright,
navigateViaPlaywright,
pdfViaPlaywright,
pressKeyViaPlaywright,
resizeViewportViaPlaywright,
runCodeViaPlaywright,
selectOptionViaPlaywright,
snapshotAiViaPlaywright,
takeScreenshotViaPlaywright,
typeViaPlaywright,
waitForViaPlaywright,
} from "./pw-tools-core.js";
type WithSnapshotForAI = {
_snapshotForAI?: (
options?: SnapshotForAIOptions,
) => Promise<SnapshotForAIResult>;
};
type TargetInfoResponse = {
targetInfo?: {
targetId?: string;
};
};
type ConnectedBrowser = {
browser: Browser;
endpoint: string;
};
let cached: ConnectedBrowser | null = null;
let connecting: Promise<ConnectedBrowser> | null = null;
function endpointForCdpPort(cdpPort: number) {
return `http://127.0.0.1:${cdpPort}`;
}
async function connectBrowser(endpoint: string): Promise<ConnectedBrowser> {
if (cached?.endpoint === endpoint) return cached;
if (connecting) return await connecting;
connecting = chromium
.connectOverCDP(endpoint, { timeout: 5000 })
.then((browser) => {
const connected: ConnectedBrowser = { browser, endpoint };
cached = connected;
browser.on("disconnected", () => {
if (cached?.browser === browser) cached = null;
});
return connected;
})
.finally(() => {
connecting = null;
});
return await connecting;
}
async function getAllPages(browser: Browser): Promise<Page[]> {
const contexts = browser.contexts();
const pages = contexts.flatMap((c) => c.pages());
return pages;
}
async function pageTargetId(page: Page): Promise<string | null> {
const session = await page.context().newCDPSession(page);
try {
const info = (await session.send(
"Target.getTargetInfo",
)) as TargetInfoResponse;
const targetId = String(info?.targetInfo?.targetId ?? "").trim();
return targetId || null;
} finally {
await session.detach().catch(() => {});
}
}
async function findPageByTargetId(
browser: Browser,
targetId: string,
): Promise<Page | null> {
const pages = await getAllPages(browser);
for (const page of pages) {
const tid = await pageTargetId(page).catch(() => null);
if (tid && tid === targetId) return page;
}
return null;
}
async function getPageForTargetId(opts: {
cdpPort: number;
targetId?: string;
}): Promise<Page> {
const endpoint = endpointForCdpPort(opts.cdpPort);
const { browser } = await connectBrowser(endpoint);
const pages = await getAllPages(browser);
if (!pages.length)
throw new Error("No pages available in the connected browser.");
const first = pages[0];
if (!opts.targetId) return first;
const found = await findPageByTargetId(browser, opts.targetId);
if (!found) throw new Error("tab not found");
return found;
}
export async function snapshotAiViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
timeoutMs?: number;
}): Promise<{ snapshot: string }> {
const page = await getPageForTargetId({
cdpPort: opts.cdpPort,
targetId: opts.targetId,
});
const maybe = page as unknown as WithSnapshotForAI;
if (!maybe._snapshotForAI) {
throw new Error(
"Playwright _snapshotForAI is not available. Upgrade playwright-core.",
);
}
const result = await maybe._snapshotForAI({
timeout: Math.max(
500,
Math.min(60_000, Math.floor(opts.timeoutMs ?? 5000)),
),
track: "response",
});
return { snapshot: String(result?.full ?? "") };
}
export async function clickRefViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref: string;
timeoutMs?: number;
}): Promise<void> {
const ref = String(opts.ref ?? "").trim();
if (!ref) throw new Error("ref is required");
const page = await getPageForTargetId({
cdpPort: opts.cdpPort,
targetId: opts.targetId,
});
await page.locator(`aria-ref=${ref}`).click({
timeout: Math.max(
500,
Math.min(60_000, Math.floor(opts.timeoutMs ?? 8000)),
),
});
}
export async function closePlaywrightBrowserConnection(): Promise<void> {
const cur = cached;
cached = null;
if (!cur) return;
await cur.browser.close().catch(() => {});
}
export {
generateLocatorForRef,
getConsoleMessagesViaPlaywright,
getNetworkRequestsViaPlaywright,
mouseClickViaPlaywright,
mouseDragViaPlaywright,
mouseMoveViaPlaywright,
startTracingViaPlaywright,
stopTracingViaPlaywright,
verifyElementVisibleViaPlaywright,
verifyListVisibleViaPlaywright,
verifyTextVisibleViaPlaywright,
verifyValueViaPlaywright,
} from "./pw-tools-observe.js";

218
src/browser/pw-session.ts Normal file
View File

@@ -0,0 +1,218 @@
import type {
Browser,
BrowserContext,
ConsoleMessage,
Page,
Request,
} from "playwright-core";
import { chromium } from "playwright-core";
export type BrowserConsoleMessage = {
type: string;
text: string;
timestamp: string;
location?: { url?: string; lineNumber?: number; columnNumber?: number };
};
export type BrowserNetworkRequest = {
url: string;
method: string;
resourceType?: string;
status?: number;
ok?: boolean;
fromCache?: boolean;
failureText?: string;
timestamp: string;
};
type SnapshotForAIResult = { full: string; incremental?: string };
type SnapshotForAIOptions = { timeout?: number; track?: string };
export type WithSnapshotForAI = {
_snapshotForAI?: (
options?: SnapshotForAIOptions,
) => Promise<SnapshotForAIResult>;
};
type TargetInfoResponse = {
targetInfo?: {
targetId?: string;
};
};
type ConnectedBrowser = {
browser: Browser;
endpoint: string;
};
type PageState = {
console: BrowserConsoleMessage[];
network: BrowserNetworkRequest[];
requestMap: Map<Request, BrowserNetworkRequest>;
};
const pageStates = new WeakMap<Page, PageState>();
const observedContexts = new WeakSet<BrowserContext>();
const observedPages = new WeakSet<Page>();
const MAX_CONSOLE_MESSAGES = 500;
const MAX_NETWORK_REQUESTS = 1000;
let cached: ConnectedBrowser | null = null;
let connecting: Promise<ConnectedBrowser> | null = null;
function endpointForCdpPort(cdpPort: number) {
return `http://127.0.0.1:${cdpPort}`;
}
export function ensurePageState(page: Page): PageState {
const existing = pageStates.get(page);
if (existing) return existing;
const state: PageState = {
console: [],
network: [],
requestMap: new Map(),
};
pageStates.set(page, state);
if (!observedPages.has(page)) {
observedPages.add(page);
page.on("console", (msg: ConsoleMessage) => {
const entry: BrowserConsoleMessage = {
type: msg.type(),
text: msg.text(),
timestamp: new Date().toISOString(),
location: msg.location(),
};
state.console.push(entry);
if (state.console.length > MAX_CONSOLE_MESSAGES) state.console.shift();
});
page.on("request", (req: Request) => {
const entry: BrowserNetworkRequest = {
url: req.url(),
method: req.method(),
resourceType: req.resourceType(),
timestamp: new Date().toISOString(),
};
state.network.push(entry);
state.requestMap.set(req, entry);
if (state.network.length > MAX_NETWORK_REQUESTS) state.network.shift();
});
page.on("requestfinished", async (req: Request) => {
const entry = state.requestMap.get(req);
if (!entry) return;
const response = await req.response().catch(() => null);
if (response) {
entry.status = response.status();
entry.ok = response.ok();
entry.fromCache = response.fromServiceWorker();
}
state.requestMap.delete(req);
});
page.on("requestfailed", (req: Request) => {
const entry = state.requestMap.get(req);
if (!entry) return;
entry.failureText = req.failure()?.errorText;
state.requestMap.delete(req);
});
page.on("close", () => {
pageStates.delete(page);
observedPages.delete(page);
});
}
return state;
}
function observeContext(context: BrowserContext) {
if (observedContexts.has(context)) return;
observedContexts.add(context);
for (const page of context.pages()) ensurePageState(page);
context.on("page", (page) => ensurePageState(page));
}
function observeBrowser(browser: Browser) {
for (const context of browser.contexts()) observeContext(context);
}
async function connectBrowser(endpoint: string): Promise<ConnectedBrowser> {
if (cached?.endpoint === endpoint) return cached;
if (connecting) return await connecting;
connecting = chromium
.connectOverCDP(endpoint, { timeout: 5000 })
.then((browser) => {
const connected: ConnectedBrowser = { browser, endpoint };
cached = connected;
observeBrowser(browser);
browser.on("disconnected", () => {
if (cached?.browser === browser) cached = null;
});
return connected;
})
.finally(() => {
connecting = null;
});
return await connecting;
}
async function getAllPages(browser: Browser): Promise<Page[]> {
const contexts = browser.contexts();
const pages = contexts.flatMap((c) => c.pages());
return pages;
}
async function pageTargetId(page: Page): Promise<string | null> {
const session = await page.context().newCDPSession(page);
try {
const info = (await session.send(
"Target.getTargetInfo",
)) as TargetInfoResponse;
const targetId = String(info?.targetInfo?.targetId ?? "").trim();
return targetId || null;
} finally {
await session.detach().catch(() => {});
}
}
async function findPageByTargetId(
browser: Browser,
targetId: string,
): Promise<Page | null> {
const pages = await getAllPages(browser);
for (const page of pages) {
const tid = await pageTargetId(page).catch(() => null);
if (tid && tid === targetId) return page;
}
return null;
}
export async function getPageForTargetId(opts: {
cdpPort: number;
targetId?: string;
}): Promise<Page> {
const endpoint = endpointForCdpPort(opts.cdpPort);
const { browser } = await connectBrowser(endpoint);
const pages = await getAllPages(browser);
if (!pages.length)
throw new Error("No pages available in the connected browser.");
const first = pages[0];
if (!opts.targetId) return first;
const found = await findPageByTargetId(browser, opts.targetId);
if (!found) throw new Error("tab not found");
return found;
}
export function refLocator(page: Page, ref: string) {
return page.locator(`aria-ref=${ref}`);
}
export async function closePlaywrightBrowserConnection(): Promise<void> {
const cur = cached;
cached = null;
if (!cur) return;
await cur.browser.close().catch(() => {});
}

View File

@@ -0,0 +1,393 @@
import type { Page } from "playwright-core";
import {
ensurePageState,
getPageForTargetId,
refLocator,
type WithSnapshotForAI,
} from "./pw-session.js";
export async function snapshotAiViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
timeoutMs?: number;
}): Promise<{ snapshot: string }> {
const page = await getPageForTargetId({
cdpPort: opts.cdpPort,
targetId: opts.targetId,
});
ensurePageState(page);
const maybe = page as unknown as WithSnapshotForAI;
if (!maybe._snapshotForAI) {
throw new Error(
"Playwright _snapshotForAI is not available. Upgrade playwright-core.",
);
}
const result = await maybe._snapshotForAI({
timeout: Math.max(
500,
Math.min(60_000, Math.floor(opts.timeoutMs ?? 5000)),
),
track: "response",
});
return { snapshot: String(result?.full ?? "") };
}
export async function clickRefViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref: string;
timeoutMs?: number;
}): Promise<void> {
await clickViaPlaywright(opts);
}
export async function clickViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref: string;
doubleClick?: boolean;
button?: "left" | "right" | "middle";
modifiers?: Array<"Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift">;
timeoutMs?: number;
}): Promise<void> {
const ref = String(opts.ref ?? "").trim();
if (!ref) throw new Error("ref is required");
const page = await getPageForTargetId({
cdpPort: opts.cdpPort,
targetId: opts.targetId,
});
ensurePageState(page);
const locator = refLocator(page, ref);
const timeout = Math.max(
500,
Math.min(60_000, Math.floor(opts.timeoutMs ?? 8000)),
);
if (opts.doubleClick) {
await locator.dblclick({
timeout,
button: opts.button,
modifiers: opts.modifiers,
});
} else {
await locator.click({
timeout,
button: opts.button,
modifiers: opts.modifiers,
});
}
}
export async function hoverViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref: string;
timeoutMs?: number;
}): Promise<void> {
const ref = String(opts.ref ?? "").trim();
if (!ref) throw new Error("ref is required");
const page = await getPageForTargetId(opts);
ensurePageState(page);
await refLocator(page, ref).hover({
timeout: Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000)),
});
}
export async function dragViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
startRef: string;
endRef: string;
timeoutMs?: number;
}): Promise<void> {
const startRef = String(opts.startRef ?? "").trim();
const endRef = String(opts.endRef ?? "").trim();
if (!startRef || !endRef) throw new Error("startRef and endRef are required");
const page = await getPageForTargetId(opts);
ensurePageState(page);
await refLocator(page, startRef).dragTo(refLocator(page, endRef), {
timeout: Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000)),
});
}
export async function selectOptionViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref: string;
values: string[];
timeoutMs?: number;
}): Promise<void> {
const ref = String(opts.ref ?? "").trim();
if (!ref) throw new Error("ref is required");
if (!opts.values?.length) throw new Error("values are required");
const page = await getPageForTargetId(opts);
ensurePageState(page);
await refLocator(page, ref).selectOption(opts.values, {
timeout: Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000)),
});
}
export async function pressKeyViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
key: string;
delayMs?: number;
}): Promise<void> {
const key = String(opts.key ?? "").trim();
if (!key) throw new Error("key is required");
const page = await getPageForTargetId(opts);
ensurePageState(page);
await page.keyboard.press(key, {
delay: Math.max(0, Math.floor(opts.delayMs ?? 0)),
});
}
export async function typeViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref: string;
text: string;
submit?: boolean;
slowly?: boolean;
timeoutMs?: number;
}): Promise<void> {
const ref = String(opts.ref ?? "").trim();
if (!ref) throw new Error("ref is required");
const text = String(opts.text ?? "");
const page = await getPageForTargetId(opts);
ensurePageState(page);
const locator = refLocator(page, ref);
const timeout = Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000));
if (opts.slowly) {
await locator.click({ timeout });
await locator.type(text, { timeout, delay: 75 });
} else {
await locator.fill(text, { timeout });
}
if (opts.submit) {
await locator.press("Enter", { timeout });
}
}
export async function fillFormViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
fields: Array<Record<string, unknown>>;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
for (const field of opts.fields) {
const ref = String(field.ref ?? "").trim();
const type = String(field.type ?? "").trim();
const value = String(field.value ?? "");
if (!ref || !type) continue;
const locator = refLocator(page, ref);
if (type === "checkbox" || type === "radio") {
await locator.setChecked(value === "true");
continue;
}
await locator.fill(value);
}
}
export async function evaluateViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
fn: string;
ref?: string;
}): Promise<unknown> {
const fnText = String(opts.fn ?? "").trim();
if (!fnText) throw new Error("function is required");
const page = await getPageForTargetId(opts);
ensurePageState(page);
if (opts.ref) {
const locator = refLocator(page, opts.ref);
return await locator.evaluate((el, fnBody) => {
const runner = new Function(
"element",
`"use strict"; const fn = ${fnBody}; return fn(element);`,
) as (element: Element) => unknown;
return runner(el as Element);
}, fnText);
}
return await page.evaluate((fnBody) => {
const runner = new Function(
`"use strict"; const fn = ${fnBody}; return fn();`,
) as () => unknown;
return runner();
}, fnText);
}
export async function fileUploadViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
paths?: string[];
timeoutMs?: number;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const timeout = Math.max(500, Math.min(60_000, opts.timeoutMs ?? 10_000));
const fileChooser = await page.waitForEvent("filechooser", { timeout });
if (!opts.paths?.length) {
await fileChooser.cancel();
return;
}
await fileChooser.setFiles(opts.paths);
}
export async function handleDialogViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
accept: boolean;
promptText?: string;
timeoutMs?: number;
}): Promise<{ message: string; type: string }> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const timeout = Math.max(500, Math.min(60_000, opts.timeoutMs ?? 10_000));
const dialog = await page.waitForEvent("dialog", { timeout });
const message = dialog.message();
const type = dialog.type();
if (opts.accept) await dialog.accept(opts.promptText);
else await dialog.dismiss();
return { message, type };
}
export async function navigateViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
url: string;
timeoutMs?: number;
}): Promise<{ url: string }> {
const url = String(opts.url ?? "").trim();
if (!url) throw new Error("url is required");
const page = await getPageForTargetId(opts);
ensurePageState(page);
await page.goto(url, {
timeout: Math.max(1000, Math.min(120_000, opts.timeoutMs ?? 20_000)),
});
return { url: page.url() };
}
export async function navigateBackViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
timeoutMs?: number;
}): Promise<{ url: string }> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
await page.goBack({
timeout: Math.max(1000, Math.min(120_000, opts.timeoutMs ?? 20_000)),
});
return { url: page.url() };
}
export async function waitForViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
time?: number;
text?: string;
textGone?: string;
timeoutMs?: number;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
if (typeof opts.time === "number" && Number.isFinite(opts.time)) {
await page.waitForTimeout(Math.max(0, opts.time) * 1000);
}
if (opts.text) {
await page
.getByText(opts.text)
.first()
.waitFor({
state: "visible",
timeout: Math.max(500, Math.min(120_000, opts.timeoutMs ?? 20_000)),
});
}
if (opts.textGone) {
await page
.getByText(opts.textGone)
.first()
.waitFor({
state: "hidden",
timeout: Math.max(500, Math.min(120_000, opts.timeoutMs ?? 20_000)),
});
}
}
export async function runCodeViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
code: string;
}): Promise<unknown> {
const code = String(opts.code ?? "").trim();
if (!code) throw new Error("code is required");
const page = await getPageForTargetId(opts);
ensurePageState(page);
const fn = new Function(`return (${code});`)() as
| ((page: Page) => unknown)
| undefined;
if (typeof fn !== "function") throw new Error("code is not a function");
return await fn(page);
}
export async function takeScreenshotViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref?: string;
element?: string;
fullPage?: boolean;
type?: "png" | "jpeg";
}): Promise<{ buffer: Buffer }> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const type = opts.type ?? "png";
if (opts.ref) {
if (opts.fullPage)
throw new Error("fullPage is not supported for element screenshots");
const locator = refLocator(page, opts.ref);
const buffer = await locator.screenshot({ type });
return { buffer };
}
const buffer = await page.screenshot({
type,
fullPage: Boolean(opts.fullPage),
});
return { buffer };
}
export async function resizeViewportViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
width: number;
height: number;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
await page.setViewportSize({
width: Math.max(1, Math.floor(opts.width)),
height: Math.max(1, Math.floor(opts.height)),
});
}
export async function closePageViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
await page.close();
}
export async function pdfViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
}): Promise<{ buffer: Buffer }> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const buffer = await page.pdf({ printBackground: true });
return { buffer };
}

View File

@@ -0,0 +1,200 @@
import crypto from "node:crypto";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import {
type BrowserConsoleMessage,
type BrowserNetworkRequest,
ensurePageState,
getPageForTargetId,
refLocator,
} from "./pw-session.js";
const STATIC_RESOURCE_TYPES = new Set(["image", "font", "stylesheet", "media"]);
const tracingContexts = new WeakSet<unknown>();
function consolePriority(level: string) {
switch (level) {
case "error":
return 3;
case "warning":
return 2;
case "info":
case "log":
return 1;
case "debug":
return 0;
default:
return 1;
}
}
export async function startTracingViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const context = page.context();
if (tracingContexts.has(context)) throw new Error("Tracing already started");
await context.tracing.start({
screenshots: true,
snapshots: true,
sources: true,
});
tracingContexts.add(context);
}
export async function stopTracingViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
}): Promise<{ buffer: Buffer }> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const context = page.context();
if (!tracingContexts.has(context)) throw new Error("Tracing not started");
const fileName = `clawd-trace-${crypto.randomUUID()}.zip`;
const filePath = path.join(os.tmpdir(), fileName);
await context.tracing.stop({ path: filePath });
tracingContexts.delete(context);
const buffer = await fs.readFile(filePath);
await fs.rm(filePath).catch(() => {});
return { buffer };
}
export async function getConsoleMessagesViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
level?: string;
}): Promise<BrowserConsoleMessage[]> {
const page = await getPageForTargetId(opts);
const state = ensurePageState(page);
if (!opts.level) return [...state.console];
const min = consolePriority(opts.level);
return state.console.filter((msg) => consolePriority(msg.type) >= min);
}
export async function getNetworkRequestsViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
includeStatic?: boolean;
}): Promise<BrowserNetworkRequest[]> {
const page = await getPageForTargetId(opts);
const state = ensurePageState(page);
if (opts.includeStatic) return [...state.network];
return state.network.filter(
(req) => !req.resourceType || !STATIC_RESOURCE_TYPES.has(req.resourceType),
);
}
export async function mouseMoveViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
x: number;
y: number;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
await page.mouse.move(opts.x, opts.y);
}
export async function mouseClickViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
x: number;
y: number;
button?: "left" | "right" | "middle";
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
await page.mouse.click(opts.x, opts.y, {
button: opts.button,
});
}
export async function mouseDragViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
startX: number;
startY: number;
endX: number;
endY: number;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
await page.mouse.move(opts.startX, opts.startY);
await page.mouse.down();
await page.mouse.move(opts.endX, opts.endY);
await page.mouse.up();
}
export async function verifyElementVisibleViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
role: string;
accessibleName: string;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const locator = page.getByRole(opts.role as never, {
name: opts.accessibleName,
});
if ((await locator.count()) === 0) throw new Error("element not found");
if (!(await locator.first().isVisible()))
throw new Error("element not visible");
}
export async function verifyTextVisibleViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
text: string;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const locator = page.getByText(opts.text).filter({ visible: true });
if ((await locator.count()) === 0) throw new Error("text not found");
}
export async function verifyListVisibleViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref: string;
items: string[];
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const locator = refLocator(page, opts.ref);
for (const item of opts.items) {
const itemLocator = locator.getByText(item);
if ((await itemLocator.count()) === 0)
throw new Error(`item "${item}" not found`);
}
}
export async function verifyValueViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref: string;
type: string;
value: string;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const locator = refLocator(page, opts.ref);
if (opts.type === "checkbox" || opts.type === "radio") {
const checked = await locator.isChecked();
const expected = opts.value === "true";
if (checked !== expected)
throw new Error(`expected ${opts.value}, got ${String(checked)}`);
return;
}
const value = await locator.inputValue();
if (value !== opts.value)
throw new Error(`expected ${opts.value}, got ${value}`);
}
export function generateLocatorForRef(ref: string) {
return `locator('aria-ref=${ref}')`;
}

View File

@@ -0,0 +1,50 @@
import type express from "express";
import type { BrowserRouteContext } from "../server-context.js";
import { jsonError } from "./utils.js";
export function registerBrowserBasicRoutes(
app: express.Express,
ctx: BrowserRouteContext,
) {
app.get("/", async (_req, res) => {
let current: ReturnType<typeof ctx.state>;
try {
current = ctx.state();
} catch {
return jsonError(res, 503, "browser server not started");
}
const reachable = await ctx.isReachable(300);
res.json({
enabled: current.resolved.enabled,
controlUrl: current.resolved.controlUrl,
running: reachable,
pid: current.running?.pid ?? null,
cdpPort: current.cdpPort,
chosenBrowser: current.running?.exe.kind ?? null,
userDataDir: current.running?.userDataDir ?? null,
color: current.resolved.color,
headless: current.resolved.headless,
attachOnly: current.resolved.attachOnly,
});
});
app.post("/start", async (_req, res) => {
try {
await ctx.ensureBrowserAvailable();
res.json({ ok: true });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.post("/stop", async (_req, res) => {
try {
const result = await ctx.stopRunningBrowser();
res.json({ ok: true, stopped: result.stopped });
} catch (err) {
jsonError(res, 500, String(err));
}
});
}

View File

@@ -0,0 +1,17 @@
import type express from "express";
import type { BrowserRouteContext } from "../server-context.js";
import { registerBrowserBasicRoutes } from "./basic.js";
import { registerBrowserInspectRoutes } from "./inspect.js";
import { registerBrowserTabRoutes } from "./tabs.js";
import { registerBrowserToolRoutes } from "./tool.js";
export function registerBrowserRoutes(
app: express.Express,
ctx: BrowserRouteContext,
) {
registerBrowserBasicRoutes(app, ctx);
registerBrowserTabRoutes(app, ctx);
registerBrowserInspectRoutes(app, ctx);
registerBrowserToolRoutes(app, ctx);
}

View File

@@ -0,0 +1,307 @@
import path from "node:path";
import type express from "express";
import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js";
import {
captureScreenshot,
captureScreenshotPng,
evaluateJavaScript,
getDomText,
querySelector,
snapshotAria,
snapshotDom,
} from "../cdp.js";
import {
snapshotAiViaPlaywright,
takeScreenshotViaPlaywright,
} from "../pw-ai.js";
import {
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
normalizeBrowserScreenshot,
} from "../screenshot.js";
import type { BrowserRouteContext } from "../server-context.js";
import { jsonError, toBoolean, toStringOrEmpty } from "./utils.js";
export function registerBrowserInspectRoutes(
app: express.Express,
ctx: BrowserRouteContext,
) {
app.get("/screenshot", async (req, res) => {
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const fullPage =
req.query.fullPage === "true" || req.query.fullPage === "1";
try {
const tab = await ctx.ensureTabAvailable(targetId || undefined);
let shot: Buffer<ArrayBufferLike> = Buffer.alloc(0);
let contentTypeHint: "image/jpeg" | "image/png" = "image/jpeg";
try {
shot = await captureScreenshot({
wsUrl: tab.wsUrl ?? "",
fullPage,
format: "jpeg",
quality: 85,
});
} catch {
contentTypeHint = "image/png";
shot = await captureScreenshotPng({
wsUrl: tab.wsUrl ?? "",
fullPage,
});
}
const normalized = await normalizeBrowserScreenshot(shot, {
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
});
await ensureMediaDir();
const saved = await saveMediaBuffer(
normalized.buffer,
normalized.contentType ?? contentTypeHint,
"browser",
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
);
const filePath = path.resolve(saved.path);
res.json({
ok: true,
path: filePath,
targetId: tab.targetId,
url: tab.url,
});
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.post("/screenshot", async (req, res) => {
const body = req.body as Record<string, unknown>;
const targetId = toStringOrEmpty(body?.targetId);
const fullPage = toBoolean(body?.fullPage) ?? false;
const ref = toStringOrEmpty(body?.ref);
const element = toStringOrEmpty(body?.element);
const type = body?.type === "jpeg" ? "jpeg" : "png";
const filename = toStringOrEmpty(body?.filename);
try {
const tab = await ctx.ensureTabAvailable(targetId || undefined);
const snap = await takeScreenshotViaPlaywright({
cdpPort: ctx.state().cdpPort,
targetId: tab.targetId,
ref,
element,
fullPage,
type,
});
const buffer = snap.buffer;
const normalized = await normalizeBrowserScreenshot(buffer, {
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
});
await ensureMediaDir();
const saved = await saveMediaBuffer(
normalized.buffer,
normalized.contentType ?? `image/${type}`,
"browser",
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
);
const filePath = path.resolve(saved.path);
res.json({
ok: true,
path: filePath,
targetId: tab.targetId,
url: tab.url,
filename: filename || undefined,
});
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.post("/eval", async (req, res) => {
const js = toStringOrEmpty((req.body as { js?: unknown })?.js);
const targetId = toStringOrEmpty(
(req.body as { targetId?: unknown })?.targetId,
);
const awaitPromise = Boolean((req.body as { await?: unknown })?.await);
if (!js) return jsonError(res, 400, "js is required");
try {
const tab = await ctx.ensureTabAvailable(targetId || undefined);
const evaluated = await evaluateJavaScript({
wsUrl: tab.wsUrl ?? "",
expression: js,
awaitPromise,
returnByValue: true,
});
if (evaluated.exceptionDetails) {
const msg =
evaluated.exceptionDetails.exception?.description ||
evaluated.exceptionDetails.text ||
"JavaScript evaluation failed";
return jsonError(res, 400, msg);
}
res.json({
ok: true,
targetId: tab.targetId,
url: tab.url,
result: evaluated.result,
});
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.get("/query", async (req, res) => {
const selector =
typeof req.query.selector === "string" ? req.query.selector.trim() : "";
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const limit =
typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
if (!selector) return jsonError(res, 400, "selector is required");
try {
const tab = await ctx.ensureTabAvailable(targetId || undefined);
const result = await querySelector({
wsUrl: tab.wsUrl ?? "",
selector,
limit,
});
res.json({ ok: true, targetId: tab.targetId, url: tab.url, ...result });
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.get("/dom", async (req, res) => {
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const format = req.query.format === "text" ? "text" : "html";
const selector =
typeof req.query.selector === "string" ? req.query.selector.trim() : "";
const maxChars =
typeof req.query.maxChars === "string"
? Number(req.query.maxChars)
: undefined;
try {
const tab = await ctx.ensureTabAvailable(targetId || undefined);
const result = await getDomText({
wsUrl: tab.wsUrl ?? "",
format,
maxChars,
selector: selector || undefined,
});
res.json({
ok: true,
targetId: tab.targetId,
url: tab.url,
format,
...result,
});
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.get("/snapshot", async (req, res) => {
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const format =
req.query.format === "domSnapshot"
? "domSnapshot"
: req.query.format === "ai"
? "ai"
: "aria";
const limit =
typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
try {
const tab = await ctx.ensureTabAvailable(targetId || undefined);
if (format === "ai") {
const snap = await snapshotAiViaPlaywright({
cdpPort: ctx.state().cdpPort,
targetId: tab.targetId,
});
return res.json({
ok: true,
format,
targetId: tab.targetId,
url: tab.url,
...snap,
});
}
if (format === "aria") {
const snap = await snapshotAria({
wsUrl: tab.wsUrl ?? "",
limit,
});
return res.json({
ok: true,
format,
targetId: tab.targetId,
url: tab.url,
...snap,
});
}
const snap = await snapshotDom({
wsUrl: tab.wsUrl ?? "",
limit,
});
return res.json({
ok: true,
format,
targetId: tab.targetId,
url: tab.url,
...snap,
});
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.post("/click", async (req, res) => {
const ref = toStringOrEmpty((req.body as { ref?: unknown })?.ref);
const targetId = toStringOrEmpty(
(req.body as { targetId?: unknown })?.targetId,
);
if (!ref) return jsonError(res, 400, "ref is required");
try {
const tab = await ctx.ensureTabAvailable(targetId || undefined);
await clickViaPlaywright({
cdpPort: ctx.state().cdpPort,
targetId: tab.targetId,
ref,
});
res.json({ ok: true, targetId: tab.targetId, url: tab.url });
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
}

108
src/browser/routes/tabs.ts Normal file
View File

@@ -0,0 +1,108 @@
import type express from "express";
import type { BrowserRouteContext } from "../server-context.js";
import { jsonError, toNumber, toStringOrEmpty } from "./utils.js";
export function registerBrowserTabRoutes(
app: express.Express,
ctx: BrowserRouteContext,
) {
app.get("/tabs", async (_req, res) => {
try {
const reachable = await ctx.isReachable(300);
if (!reachable)
return res.json({ running: false, tabs: [] as unknown[] });
const tabs = await ctx.listTabs();
res.json({ running: true, tabs });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.post("/tabs/open", async (req, res) => {
const url = toStringOrEmpty((req.body as { url?: unknown })?.url);
if (!url) return jsonError(res, 400, "url is required");
try {
await ctx.ensureBrowserAvailable();
const tab = await ctx.openTab(url);
res.json(tab);
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.post("/tabs/focus", async (req, res) => {
const targetId = toStringOrEmpty(
(req.body as { targetId?: unknown })?.targetId,
);
if (!targetId) return jsonError(res, 400, "targetId is required");
try {
if (!(await ctx.isReachable(300)))
return jsonError(res, 409, "browser not running");
await ctx.focusTab(targetId);
res.json({ ok: true });
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.delete("/tabs/:targetId", async (req, res) => {
const targetId = toStringOrEmpty(req.params.targetId);
if (!targetId) return jsonError(res, 400, "targetId is required");
try {
if (!(await ctx.isReachable(300)))
return jsonError(res, 409, "browser not running");
await ctx.closeTab(targetId);
res.json({ ok: true });
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.post("/tabs/action", async (req, res) => {
const action = toStringOrEmpty((req.body as { action?: unknown })?.action);
const index = toNumber((req.body as { index?: unknown })?.index);
try {
if (action === "list") {
const reachable = await ctx.isReachable(300);
if (!reachable) return res.json({ ok: true, tabs: [] as unknown[] });
const tabs = await ctx.listTabs();
return res.json({ ok: true, tabs });
}
if (action === "new") {
await ctx.ensureBrowserAvailable();
const tab = await ctx.openTab("about:blank");
return res.json({ ok: true, tab });
}
if (action === "close") {
const tabs = await ctx.listTabs();
const target = typeof index === "number" ? tabs[index] : tabs.at(0);
if (!target) return jsonError(res, 404, "tab not found");
await ctx.closeTab(target.targetId);
return res.json({ ok: true, targetId: target.targetId });
}
if (action === "select") {
if (typeof index !== "number")
return jsonError(res, 400, "index is required");
const tabs = await ctx.listTabs();
const target = tabs[index];
if (!target) return jsonError(res, 404, "tab not found");
await ctx.focusTab(target.targetId);
return res.json({ ok: true, targetId: target.targetId });
}
return jsonError(res, 400, "unknown tab action");
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
}

View File

@@ -0,0 +1,432 @@
import path from "node:path";
import type express from "express";
import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js";
import {
clickViaPlaywright,
closePageViaPlaywright,
dragViaPlaywright,
evaluateViaPlaywright,
fileUploadViaPlaywright,
fillFormViaPlaywright,
handleDialogViaPlaywright,
hoverViaPlaywright,
navigateBackViaPlaywright,
navigateViaPlaywright,
pressKeyViaPlaywright,
resizeViewportViaPlaywright,
runCodeViaPlaywright,
selectOptionViaPlaywright,
snapshotAiViaPlaywright,
takeScreenshotViaPlaywright,
typeViaPlaywright,
waitForViaPlaywright,
} from "../pw-ai.js";
import {
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
normalizeBrowserScreenshot,
} from "../screenshot.js";
import type { BrowserRouteContext } from "../server-context.js";
import {
jsonError,
toBoolean,
toNumber,
toStringArray,
toStringOrEmpty,
} from "./utils.js";
type ToolCoreParams = {
name: string;
args: Record<string, unknown>;
targetId: string;
cdpPort: number;
ctx: BrowserRouteContext;
res: express.Response;
};
export async function handleBrowserToolCore(
params: ToolCoreParams,
): Promise<boolean> {
const { name, args, targetId, cdpPort, ctx, res } = params;
const target = targetId || undefined;
switch (name) {
case "browser_close": {
const tab = await ctx.ensureTabAvailable(target);
await closePageViaPlaywright({ cdpPort, targetId: tab.targetId });
res.json({ ok: true, targetId: tab.targetId, url: tab.url });
return true;
}
case "browser_resize": {
const width = toNumber(args.width);
const height = toNumber(args.height);
if (!width || !height) {
jsonError(res, 400, "width and height are required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await resizeViewportViaPlaywright({
cdpPort,
targetId: tab.targetId,
width,
height,
});
res.json({ ok: true, targetId: tab.targetId, url: tab.url });
return true;
}
case "browser_handle_dialog": {
const accept = toBoolean(args.accept);
if (accept === undefined) {
jsonError(res, 400, "accept is required");
return true;
}
const promptText = toStringOrEmpty(args.promptText) || undefined;
const tab = await ctx.ensureTabAvailable(target);
const result = await handleDialogViaPlaywright({
cdpPort,
targetId: tab.targetId,
accept,
promptText,
});
res.json({ ok: true, ...result });
return true;
}
case "browser_evaluate": {
const fn = toStringOrEmpty(args.function);
if (!fn) {
jsonError(res, 400, "function is required");
return true;
}
const ref = toStringOrEmpty(args.ref) || undefined;
const tab = await ctx.ensureTabAvailable(target);
const result = await evaluateViaPlaywright({
cdpPort,
targetId: tab.targetId,
fn,
ref,
});
res.json({ ok: true, result });
return true;
}
case "browser_file_upload": {
const paths = toStringArray(args.paths) ?? [];
const tab = await ctx.ensureTabAvailable(target);
await fileUploadViaPlaywright({
cdpPort,
targetId: tab.targetId,
paths: paths.length ? paths : undefined,
});
res.json({ ok: true, targetId: tab.targetId });
return true;
}
case "browser_fill_form": {
const fields = Array.isArray(args.fields)
? (args.fields as Array<Record<string, unknown>>)
: null;
if (!fields?.length) {
jsonError(res, 400, "fields are required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await fillFormViaPlaywright({
cdpPort,
targetId: tab.targetId,
fields,
});
res.json({ ok: true, targetId: tab.targetId });
return true;
}
case "browser_install": {
res.json({
ok: true,
message:
"clawd browser uses system Chrome/Chromium; no Playwright install needed.",
});
return true;
}
case "browser_press_key": {
const key = toStringOrEmpty(args.key);
if (!key) {
jsonError(res, 400, "key is required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await pressKeyViaPlaywright({
cdpPort,
targetId: tab.targetId,
key,
});
res.json({ ok: true, targetId: tab.targetId });
return true;
}
case "browser_type": {
const ref = toStringOrEmpty(args.ref);
const text = toStringOrEmpty(args.text);
if (!ref || !text) {
jsonError(res, 400, "ref and text are required");
return true;
}
const submit = toBoolean(args.submit) ?? false;
const slowly = toBoolean(args.slowly) ?? false;
const tab = await ctx.ensureTabAvailable(target);
await typeViaPlaywright({
cdpPort,
targetId: tab.targetId,
ref,
text,
submit,
slowly,
});
res.json({ ok: true, targetId: tab.targetId });
return true;
}
case "browser_navigate": {
const url = toStringOrEmpty(args.url);
if (!url) {
jsonError(res, 400, "url is required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
const result = await navigateViaPlaywright({
cdpPort,
targetId: tab.targetId,
url,
});
res.json({ ok: true, targetId: tab.targetId, ...result });
return true;
}
case "browser_navigate_back": {
const tab = await ctx.ensureTabAvailable(target);
const result = await navigateBackViaPlaywright({
cdpPort,
targetId: tab.targetId,
});
res.json({ ok: true, targetId: tab.targetId, ...result });
return true;
}
case "browser_run_code": {
const code = toStringOrEmpty(args.code);
if (!code) {
jsonError(res, 400, "code is required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
const result = await runCodeViaPlaywright({
cdpPort,
targetId: tab.targetId,
code,
});
res.json({ ok: true, result });
return true;
}
case "browser_take_screenshot": {
const type = args.type === "jpeg" ? "jpeg" : "png";
const ref = toStringOrEmpty(args.ref) || undefined;
const fullPage = toBoolean(args.fullPage) ?? false;
const element = toStringOrEmpty(args.element) || undefined;
const filename = toStringOrEmpty(args.filename) || undefined;
const tab = await ctx.ensureTabAvailable(target);
const snap = await takeScreenshotViaPlaywright({
cdpPort,
targetId: tab.targetId,
ref,
element,
fullPage,
type,
});
const normalized = await normalizeBrowserScreenshot(snap.buffer, {
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
});
await ensureMediaDir();
const saved = await saveMediaBuffer(
normalized.buffer,
normalized.contentType ?? `image/${type}`,
"browser",
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
);
res.json({
ok: true,
path: path.resolve(saved.path),
filename,
targetId: tab.targetId,
url: tab.url,
});
return true;
}
case "browser_snapshot": {
const filename = toStringOrEmpty(args.filename) || undefined;
const tab = await ctx.ensureTabAvailable(target);
const snap = await snapshotAiViaPlaywright({
cdpPort,
targetId: tab.targetId,
});
if (filename) {
await ensureMediaDir();
const saved = await saveMediaBuffer(
Buffer.from(snap.snapshot, "utf8"),
"text/plain",
"browser",
);
res.json({
ok: true,
path: path.resolve(saved.path),
filename,
targetId: tab.targetId,
url: tab.url,
});
return true;
}
res.json({
ok: true,
snapshot: snap.snapshot,
targetId: tab.targetId,
url: tab.url,
});
return true;
}
case "browser_click": {
const ref = toStringOrEmpty(args.ref);
if (!ref) {
jsonError(res, 400, "ref is required");
return true;
}
const doubleClick = toBoolean(args.doubleClick) ?? false;
const button = toStringOrEmpty(args.button) || undefined;
const modifiers = Array.isArray(args.modifiers)
? (args.modifiers as string[])
: undefined;
const tab = await ctx.ensureTabAvailable(target);
await clickViaPlaywright({
cdpPort,
targetId: tab.targetId,
ref,
doubleClick,
button,
modifiers,
});
res.json({ ok: true, targetId: tab.targetId, url: tab.url });
return true;
}
case "browser_drag": {
const startRef = toStringOrEmpty(args.startRef);
const endRef = toStringOrEmpty(args.endRef);
if (!startRef || !endRef) {
jsonError(res, 400, "startRef and endRef are required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await dragViaPlaywright({
cdpPort,
targetId: tab.targetId,
startRef,
endRef,
});
res.json({ ok: true, targetId: tab.targetId });
return true;
}
case "browser_hover": {
const ref = toStringOrEmpty(args.ref);
if (!ref) {
jsonError(res, 400, "ref is required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await hoverViaPlaywright({
cdpPort,
targetId: tab.targetId,
ref,
});
res.json({ ok: true, targetId: tab.targetId });
return true;
}
case "browser_select_option": {
const ref = toStringOrEmpty(args.ref);
const values = toStringArray(args.values);
if (!ref || !values?.length) {
jsonError(res, 400, "ref and values are required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await selectOptionViaPlaywright({
cdpPort,
targetId: tab.targetId,
ref,
values,
});
res.json({ ok: true, targetId: tab.targetId });
return true;
}
case "browser_tabs": {
const action = toStringOrEmpty(args.action);
const index = toNumber(args.index);
if (!action) {
jsonError(res, 400, "action is required");
return true;
}
if (action === "list") {
const reachable = await ctx.isReachable(300);
if (!reachable) {
res.json({ ok: true, tabs: [] });
return true;
}
const tabs = await ctx.listTabs();
res.json({ ok: true, tabs });
return true;
}
if (action === "new") {
await ctx.ensureBrowserAvailable();
const tab = await ctx.openTab("about:blank");
res.json({ ok: true, tab });
return true;
}
if (action === "close") {
const tabs = await ctx.listTabs();
const targetTab = typeof index === "number" ? tabs[index] : tabs.at(0);
if (!targetTab) {
jsonError(res, 404, "tab not found");
return true;
}
await ctx.closeTab(targetTab.targetId);
res.json({ ok: true, targetId: targetTab.targetId });
return true;
}
if (action === "select") {
if (typeof index !== "number") {
jsonError(res, 400, "index is required");
return true;
}
const tabs = await ctx.listTabs();
const targetTab = tabs[index];
if (!targetTab) {
jsonError(res, 404, "tab not found");
return true;
}
await ctx.focusTab(targetTab.targetId);
res.json({ ok: true, targetId: targetTab.targetId });
return true;
}
jsonError(res, 400, "unknown tab action");
return true;
}
case "browser_wait_for": {
const time = toNumber(args.time);
const text = toStringOrEmpty(args.text) || undefined;
const textGone = toStringOrEmpty(args.textGone) || undefined;
const tab = await ctx.ensureTabAvailable(target);
await waitForViaPlaywright({
cdpPort,
targetId: tab.targetId,
time,
text,
textGone,
});
res.json({ ok: true, targetId: tab.targetId });
return true;
}
default:
return false;
}
}

View File

@@ -0,0 +1,262 @@
import path from "node:path";
import type express from "express";
import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js";
import {
generateLocatorForRef,
getConsoleMessagesViaPlaywright,
getNetworkRequestsViaPlaywright,
mouseClickViaPlaywright,
mouseDragViaPlaywright,
mouseMoveViaPlaywright,
pdfViaPlaywright,
startTracingViaPlaywright,
stopTracingViaPlaywright,
verifyElementVisibleViaPlaywright,
verifyListVisibleViaPlaywright,
verifyTextVisibleViaPlaywright,
verifyValueViaPlaywright,
} from "../pw-ai.js";
import type { BrowserRouteContext } from "../server-context.js";
import {
jsonError,
toBoolean,
toNumber,
toStringArray,
toStringOrEmpty,
} from "./utils.js";
type ToolExtraParams = {
name: string;
args: Record<string, unknown>;
targetId: string;
cdpPort: number;
ctx: BrowserRouteContext;
res: express.Response;
};
export async function handleBrowserToolExtra(
params: ToolExtraParams,
): Promise<boolean> {
const { name, args, targetId, cdpPort, ctx, res } = params;
const target = targetId || undefined;
switch (name) {
case "browser_console_messages": {
const level = toStringOrEmpty(args.level) || undefined;
const tab = await ctx.ensureTabAvailable(target);
const messages = await getConsoleMessagesViaPlaywright({
cdpPort,
targetId: tab.targetId,
level,
});
res.json({ ok: true, messages, targetId: tab.targetId });
return true;
}
case "browser_network_requests": {
const includeStatic = toBoolean(args.includeStatic) ?? false;
const tab = await ctx.ensureTabAvailable(target);
const requests = await getNetworkRequestsViaPlaywright({
cdpPort,
targetId: tab.targetId,
includeStatic,
});
res.json({ ok: true, requests, targetId: tab.targetId });
return true;
}
case "browser_pdf_save": {
const tab = await ctx.ensureTabAvailable(target);
const pdf = await pdfViaPlaywright({
cdpPort,
targetId: tab.targetId,
});
await ensureMediaDir();
const saved = await saveMediaBuffer(
pdf.buffer,
"application/pdf",
"browser",
pdf.buffer.byteLength,
);
res.json({
ok: true,
path: path.resolve(saved.path),
targetId: tab.targetId,
url: tab.url,
});
return true;
}
case "browser_start_tracing": {
const tab = await ctx.ensureTabAvailable(target);
await startTracingViaPlaywright({
cdpPort,
targetId: tab.targetId,
});
res.json({ ok: true });
return true;
}
case "browser_stop_tracing": {
const tab = await ctx.ensureTabAvailable(target);
const trace = await stopTracingViaPlaywright({
cdpPort,
targetId: tab.targetId,
});
await ensureMediaDir();
const saved = await saveMediaBuffer(
trace.buffer,
"application/zip",
"browser",
trace.buffer.byteLength,
);
res.json({
ok: true,
path: path.resolve(saved.path),
targetId: tab.targetId,
url: tab.url,
});
return true;
}
case "browser_verify_element_visible": {
const role = toStringOrEmpty(args.role);
const accessibleName = toStringOrEmpty(args.accessibleName);
if (!role || !accessibleName) {
jsonError(res, 400, "role and accessibleName are required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await verifyElementVisibleViaPlaywright({
cdpPort,
targetId: tab.targetId,
role,
accessibleName,
});
res.json({ ok: true });
return true;
}
case "browser_verify_text_visible": {
const text = toStringOrEmpty(args.text);
if (!text) {
jsonError(res, 400, "text is required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await verifyTextVisibleViaPlaywright({
cdpPort,
targetId: tab.targetId,
text,
});
res.json({ ok: true });
return true;
}
case "browser_verify_list_visible": {
const ref = toStringOrEmpty(args.ref);
const items = toStringArray(args.items);
if (!ref || !items?.length) {
jsonError(res, 400, "ref and items are required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await verifyListVisibleViaPlaywright({
cdpPort,
targetId: tab.targetId,
ref,
items,
});
res.json({ ok: true });
return true;
}
case "browser_verify_value": {
const ref = toStringOrEmpty(args.ref);
const type = toStringOrEmpty(args.type);
const value = toStringOrEmpty(args.value);
if (!ref || !type) {
jsonError(res, 400, "ref and type are required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await verifyValueViaPlaywright({
cdpPort,
targetId: tab.targetId,
ref,
type,
value,
});
res.json({ ok: true });
return true;
}
case "browser_mouse_move_xy": {
const x = toNumber(args.x);
const y = toNumber(args.y);
if (x === undefined || y === undefined) {
jsonError(res, 400, "x and y are required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await mouseMoveViaPlaywright({
cdpPort,
targetId: tab.targetId,
x,
y,
});
res.json({ ok: true });
return true;
}
case "browser_mouse_click_xy": {
const x = toNumber(args.x);
const y = toNumber(args.y);
if (x === undefined || y === undefined) {
jsonError(res, 400, "x and y are required");
return true;
}
const button = toStringOrEmpty(args.button) || undefined;
const tab = await ctx.ensureTabAvailable(target);
await mouseClickViaPlaywright({
cdpPort,
targetId: tab.targetId,
x,
y,
button,
});
res.json({ ok: true });
return true;
}
case "browser_mouse_drag_xy": {
const startX = toNumber(args.startX);
const startY = toNumber(args.startY);
const endX = toNumber(args.endX);
const endY = toNumber(args.endY);
if (
startX === undefined ||
startY === undefined ||
endX === undefined ||
endY === undefined
) {
jsonError(res, 400, "startX, startY, endX, endY are required");
return true;
}
const tab = await ctx.ensureTabAvailable(target);
await mouseDragViaPlaywright({
cdpPort,
targetId: tab.targetId,
startX,
startY,
endX,
endY,
});
res.json({ ok: true });
return true;
}
case "browser_generate_locator": {
const ref = toStringOrEmpty(args.ref);
if (!ref) {
jsonError(res, 400, "ref is required");
return true;
}
const locator = generateLocatorForRef(ref);
res.json({ ok: true, locator });
return true;
}
default:
return false;
}
}

View File

@@ -0,0 +1,65 @@
import type express from "express";
import type { BrowserRouteContext } from "../server-context.js";
import { handleBrowserToolCore } from "./tool-core.js";
import { handleBrowserToolExtra } from "./tool-extra.js";
import { jsonError, toStringOrEmpty } from "./utils.js";
type ToolRequestBody = {
name?: unknown;
args?: unknown;
targetId?: unknown;
};
function toolArgs(value: unknown): Record<string, unknown> {
if (!value || typeof value !== "object" || Array.isArray(value)) return {};
return value as Record<string, unknown>;
}
export function registerBrowserToolRoutes(
app: express.Express,
ctx: BrowserRouteContext,
) {
app.post("/tool", async (req, res) => {
const body = req.body as ToolRequestBody;
const name = toStringOrEmpty(body?.name);
if (!name) return jsonError(res, 400, "name is required");
const args = toolArgs(body?.args);
const targetId = toStringOrEmpty(body?.targetId || args?.targetId);
try {
let cdpPort: number;
try {
cdpPort = ctx.state().cdpPort;
} catch {
return jsonError(res, 503, "browser server not started");
}
const handledCore = await handleBrowserToolCore({
name,
args,
targetId,
cdpPort,
ctx,
res,
});
if (handledCore) return;
const handledExtra = await handleBrowserToolExtra({
name,
args,
targetId,
cdpPort,
ctx,
res,
});
if (handledExtra) return;
return jsonError(res, 400, "unknown tool name");
} catch (err) {
const mapped = ctx.mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
}

View File

@@ -0,0 +1,38 @@
import type express from "express";
export function jsonError(
res: express.Response,
status: number,
message: string,
) {
res.status(status).json({ error: message });
}
export function toStringOrEmpty(value: unknown) {
return typeof value === "string" ? value.trim() : String(value ?? "").trim();
}
export function toNumber(value: unknown) {
if (typeof value === "number" && Number.isFinite(value)) return value;
if (typeof value === "string" && value.trim()) {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : undefined;
}
return undefined;
}
export function toBoolean(value: unknown) {
if (typeof value === "boolean") return value;
if (typeof value === "string") {
const v = value.trim().toLowerCase();
if (v === "true" || v === "1" || v === "yes") return true;
if (v === "false" || v === "0" || v === "no") return false;
}
return undefined;
}
export function toStringArray(value: unknown): string[] | undefined {
if (!Array.isArray(value)) return undefined;
const strings = value.map((v) => toStringOrEmpty(v)).filter(Boolean);
return strings.length ? strings : undefined;
}

View File

@@ -0,0 +1,272 @@
import type { Server } from "node:http";
import type { RuntimeEnv } from "../runtime.js";
import { createTargetViaCdp } from "./cdp.js";
import {
isChromeReachable,
launchClawdChrome,
type RunningChrome,
stopClawdChrome,
} from "./chrome.js";
import type { ResolvedBrowserConfig } from "./config.js";
import { resolveTargetIdFromTabs } from "./target-id.js";
export type BrowserTab = {
targetId: string;
title: string;
url: string;
wsUrl?: string;
type?: string;
};
export type BrowserServerState = {
server: Server;
port: number;
cdpPort: number;
running: RunningChrome | null;
resolved: ResolvedBrowserConfig;
};
export type BrowserRouteContext = {
state: () => BrowserServerState;
ensureBrowserAvailable: () => Promise<void>;
ensureTabAvailable: (targetId?: string) => Promise<BrowserTab>;
isReachable: (timeoutMs?: number) => Promise<boolean>;
listTabs: () => Promise<BrowserTab[]>;
openTab: (url: string) => Promise<BrowserTab>;
focusTab: (targetId: string) => Promise<void>;
closeTab: (targetId: string) => Promise<void>;
stopRunningBrowser: () => Promise<{ stopped: boolean }>;
mapTabError: (err: unknown) => { status: number; message: string } | null;
};
type ContextOptions = {
runtime: RuntimeEnv;
getState: () => BrowserServerState | null;
setRunning: (running: RunningChrome | null) => void;
};
async function fetchJson<T>(
url: string,
timeoutMs = 1500,
init?: RequestInit,
): Promise<T> {
const ctrl = new AbortController();
const t = setTimeout(() => ctrl.abort(), timeoutMs);
try {
const res = await fetch(url, { ...init, signal: ctrl.signal });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return (await res.json()) as T;
} finally {
clearTimeout(t);
}
}
async function fetchOk(
url: string,
timeoutMs = 1500,
init?: RequestInit,
): Promise<void> {
const ctrl = new AbortController();
const t = setTimeout(() => ctrl.abort(), timeoutMs);
try {
const res = await fetch(url, { ...init, signal: ctrl.signal });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
} finally {
clearTimeout(t);
}
}
export function createBrowserRouteContext(
opts: ContextOptions,
): BrowserRouteContext {
const state = () => {
const current = opts.getState();
if (!current) throw new Error("Browser server not started");
return current;
};
const listTabs = async (): Promise<BrowserTab[]> => {
const current = state();
const raw = await fetchJson<
Array<{
id?: string;
title?: string;
url?: string;
webSocketDebuggerUrl?: string;
type?: string;
}>
>(`http://127.0.0.1:${current.cdpPort}/json/list`);
return raw
.map((t) => ({
targetId: t.id ?? "",
title: t.title ?? "",
url: t.url ?? "",
wsUrl: t.webSocketDebuggerUrl,
type: t.type,
}))
.filter((t) => Boolean(t.targetId));
};
const openTab = async (url: string): Promise<BrowserTab> => {
const current = state();
const createdViaCdp = await createTargetViaCdp({
cdpPort: current.cdpPort,
url,
})
.then((r) => r.targetId)
.catch(() => null);
if (createdViaCdp) {
const deadline = Date.now() + 2000;
while (Date.now() < deadline) {
const tabs = await listTabs().catch(() => [] as BrowserTab[]);
const found = tabs.find((t) => t.targetId === createdViaCdp);
if (found) return found;
await new Promise((r) => setTimeout(r, 100));
}
return { targetId: createdViaCdp, title: "", url, type: "page" };
}
const encoded = encodeURIComponent(url);
type CdpTarget = {
id?: string;
title?: string;
url?: string;
webSocketDebuggerUrl?: string;
type?: string;
};
const endpoint = `http://127.0.0.1:${current.cdpPort}/json/new?${encoded}`;
const created = await fetchJson<CdpTarget>(endpoint, 1500, {
method: "PUT",
}).catch(async (err) => {
if (String(err).includes("HTTP 405")) {
return await fetchJson<CdpTarget>(endpoint, 1500);
}
throw err;
});
if (!created.id) throw new Error("Failed to open tab (missing id)");
return {
targetId: created.id,
title: created.title ?? "",
url: created.url ?? url,
wsUrl: created.webSocketDebuggerUrl,
type: created.type,
};
};
const isReachable = async (timeoutMs = 300) => {
const current = state();
return await isChromeReachable(current.cdpPort, timeoutMs);
};
const ensureBrowserAvailable = async (): Promise<void> => {
const current = state();
if (await isReachable()) return;
if (current.resolved.attachOnly) {
throw new Error(
"Browser attachOnly is enabled and no browser is running.",
);
}
const launched = await launchClawdChrome(current.resolved, opts.runtime);
opts.setRunning(launched);
launched.proc.on("exit", () => {
const live = opts.getState();
if (live?.running?.pid === launched.pid) {
opts.setRunning(null);
}
});
};
const ensureTabAvailable = async (targetId?: string): Promise<BrowserTab> => {
await ensureBrowserAvailable();
const tabs1 = await listTabs();
if (tabs1.length === 0) {
await openTab("about:blank");
}
const tabs = await listTabs();
const chosen = targetId
? (() => {
const resolved = resolveTargetIdFromTabs(targetId, tabs);
if (!resolved.ok) {
if (resolved.reason === "ambiguous") return "AMBIGUOUS" as const;
return null;
}
return tabs.find((t) => t.targetId === resolved.targetId) ?? null;
})()
: (tabs.at(0) ?? null);
if (chosen === "AMBIGUOUS") {
throw new Error("ambiguous target id prefix");
}
if (!chosen?.wsUrl) throw new Error("tab not found");
return chosen;
};
const focusTab = async (targetId: string): Promise<void> => {
const current = state();
const tabs = await listTabs();
const resolved = resolveTargetIdFromTabs(targetId, tabs);
if (!resolved.ok) {
if (resolved.reason === "ambiguous") {
throw new Error("ambiguous target id prefix");
}
throw new Error("tab not found");
}
await fetchOk(
`http://127.0.0.1:${current.cdpPort}/json/activate/${resolved.targetId}`,
);
};
const closeTab = async (targetId: string): Promise<void> => {
const current = state();
const tabs = await listTabs();
const resolved = resolveTargetIdFromTabs(targetId, tabs);
if (!resolved.ok) {
if (resolved.reason === "ambiguous") {
throw new Error("ambiguous target id prefix");
}
throw new Error("tab not found");
}
await fetchOk(
`http://127.0.0.1:${current.cdpPort}/json/close/${resolved.targetId}`,
);
};
const stopRunningBrowser = async (): Promise<{ stopped: boolean }> => {
const current = state();
if (!current.running) return { stopped: false };
await stopClawdChrome(current.running);
opts.setRunning(null);
return { stopped: true };
};
const mapTabError = (err: unknown) => {
const msg = String(err);
if (msg.includes("ambiguous target id prefix")) {
return { status: 409, message: "ambiguous target id prefix" };
}
if (msg.includes("tab not found")) {
return { status: 404, message: "tab not found" };
}
return null;
};
return {
state,
ensureBrowserAvailable,
ensureTabAvailable,
isReachable,
listTabs,
openTab,
focusTab,
closeTab,
stopRunningBrowser,
mapTabError,
};
}

View File

@@ -98,8 +98,45 @@ vi.mock("./cdp.js", () => ({
vi.mock("./pw-ai.js", () => ({
clickRefViaPlaywright: vi.fn(async () => {}),
clickViaPlaywright: vi.fn(async () => {}),
closePageViaPlaywright: vi.fn(async () => {}),
closePlaywrightBrowserConnection: vi.fn(async () => {}),
evaluateViaPlaywright: vi.fn(async () => "ok"),
fileUploadViaPlaywright: vi.fn(async () => {}),
fillFormViaPlaywright: vi.fn(async () => {}),
generateLocatorForRef: vi.fn((ref: string) => `locator('aria-ref=${ref}')`),
getConsoleMessagesViaPlaywright: vi.fn(async () => []),
getNetworkRequestsViaPlaywright: vi.fn(async () => []),
handleDialogViaPlaywright: vi.fn(async () => ({
message: "ok",
type: "alert",
})),
hoverViaPlaywright: vi.fn(async () => {}),
mouseClickViaPlaywright: vi.fn(async () => {}),
mouseDragViaPlaywright: vi.fn(async () => {}),
mouseMoveViaPlaywright: vi.fn(async () => {}),
navigateBackViaPlaywright: vi.fn(async () => ({ url: "about:blank" })),
navigateViaPlaywright: vi.fn(async () => ({ url: "https://example.com" })),
pdfViaPlaywright: vi.fn(async () => ({ buffer: Buffer.from("pdf") })),
pressKeyViaPlaywright: vi.fn(async () => {}),
resizeViewportViaPlaywright: vi.fn(async () => {}),
runCodeViaPlaywright: vi.fn(async () => "ok"),
selectOptionViaPlaywright: vi.fn(async () => {}),
snapshotAiViaPlaywright: vi.fn(async () => ({ snapshot: "ok" })),
startTracingViaPlaywright: vi.fn(async () => {}),
stopTracingViaPlaywright: vi.fn(async () => ({
buffer: Buffer.from("trace"),
})),
takeScreenshotViaPlaywright: vi.fn(async () => ({
buffer: Buffer.from("png"),
})),
typeViaPlaywright: vi.fn(async () => {}),
verifyElementVisibleViaPlaywright: vi.fn(async () => {}),
verifyListVisibleViaPlaywright: vi.fn(async () => {}),
verifyTextVisibleViaPlaywright: vi.fn(async () => {}),
verifyValueViaPlaywright: vi.fn(async () => {}),
waitForViaPlaywright: vi.fn(async () => {}),
dragViaPlaywright: vi.fn(async () => {}),
}));
vi.mock("../media/store.js", () => ({

View File

@@ -1,221 +1,22 @@
import type { Server } from "node:http";
import path from "node:path";
import express from "express";
import { loadConfig } from "../config/config.js";
import { logError, logInfo, logWarn } from "../logger.js";
import { ensureMediaDir, saveMediaBuffer } from "../media/store.js";
import { defaultRuntime, type RuntimeEnv } from "../runtime.js";
import {
captureScreenshot,
captureScreenshotPng,
createTargetViaCdp,
evaluateJavaScript,
getDomText,
querySelector,
snapshotAria,
snapshotDom,
} from "./cdp.js";
import {
isChromeReachable,
launchClawdChrome,
type RunningChrome,
stopClawdChrome,
} from "./chrome.js";
import {
resolveBrowserConfig,
shouldStartLocalBrowserServer,
} from "./config.js";
import { closePlaywrightBrowserConnection } from "./pw-ai.js";
import { registerBrowserRoutes } from "./routes/index.js";
import {
clickRefViaPlaywright,
closePlaywrightBrowserConnection,
snapshotAiViaPlaywright,
} from "./pw-ai.js";
import {
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
normalizeBrowserScreenshot,
} from "./screenshot.js";
import { resolveTargetIdFromTabs } from "./target-id.js";
export type BrowserTab = {
targetId: string;
title: string;
url: string;
wsUrl?: string;
type?: string;
};
type BrowserServerState = {
server: Server;
port: number;
cdpPort: number;
running: RunningChrome | null;
resolved: ReturnType<typeof resolveBrowserConfig>;
};
type BrowserServerState,
createBrowserRouteContext,
} from "./server-context.js";
let state: BrowserServerState | null = null;
function jsonError(res: express.Response, status: number, message: string) {
res.status(status).json({ error: message });
}
async function fetchJson<T>(
url: string,
timeoutMs = 1500,
init?: RequestInit,
): Promise<T> {
const ctrl = new AbortController();
const t = setTimeout(() => ctrl.abort(), timeoutMs);
try {
const res = await fetch(url, { ...init, signal: ctrl.signal });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return (await res.json()) as T;
} finally {
clearTimeout(t);
}
}
async function fetchOk(
url: string,
timeoutMs = 1500,
init?: RequestInit,
): Promise<void> {
const ctrl = new AbortController();
const t = setTimeout(() => ctrl.abort(), timeoutMs);
try {
const res = await fetch(url, { ...init, signal: ctrl.signal });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
} finally {
clearTimeout(t);
}
}
async function listTabs(cdpPort: number): Promise<BrowserTab[]> {
const raw = await fetchJson<
Array<{
id?: string;
title?: string;
url?: string;
webSocketDebuggerUrl?: string;
type?: string;
}>
>(`http://127.0.0.1:${cdpPort}/json/list`);
return raw
.map((t) => ({
targetId: t.id ?? "",
title: t.title ?? "",
url: t.url ?? "",
wsUrl: t.webSocketDebuggerUrl,
type: t.type,
}))
.filter((t) => Boolean(t.targetId));
}
async function openTab(cdpPort: number, url: string): Promise<BrowserTab> {
// Prefer CDP websocket Target.createTarget (more stable across Chrome versions),
// then fall back to /json/new for older/quirky builds.
const createdViaCdp = await createTargetViaCdp({ cdpPort, url })
.then((r) => r.targetId)
.catch(() => null);
if (createdViaCdp) {
const deadline = Date.now() + 2000;
while (Date.now() < deadline) {
const tabs = await listTabs(cdpPort).catch(() => [] as BrowserTab[]);
const found = tabs.find((t) => t.targetId === createdViaCdp);
if (found) return found;
await new Promise((r) => setTimeout(r, 100));
}
return { targetId: createdViaCdp, title: "", url, type: "page" };
}
const encoded = encodeURIComponent(url);
type CdpTarget = {
id?: string;
title?: string;
url?: string;
webSocketDebuggerUrl?: string;
type?: string;
};
// Chrome changed /json/new to require PUT (older versions allowed GET).
const endpoint = `http://127.0.0.1:${cdpPort}/json/new?${encoded}`;
const created = await fetchJson<CdpTarget>(endpoint, 1500, {
method: "PUT",
}).catch(async (err) => {
if (String(err).includes("HTTP 405")) {
return await fetchJson<CdpTarget>(endpoint, 1500);
}
throw err;
});
if (!created.id) throw new Error("Failed to open tab (missing id)");
return {
targetId: created.id,
title: created.title ?? "",
url: created.url ?? url,
wsUrl: created.webSocketDebuggerUrl,
type: created.type,
};
}
async function activateTab(cdpPort: number, targetId: string): Promise<void> {
// Chrome returns plain text ("Target activated") with an application/json content-type.
await fetchOk(`http://127.0.0.1:${cdpPort}/json/activate/${targetId}`);
}
async function closeTab(cdpPort: number, targetId: string): Promise<void> {
// Chrome returns plain text ("Target is closing") with an application/json content-type.
await fetchOk(`http://127.0.0.1:${cdpPort}/json/close/${targetId}`);
}
async function ensureBrowserAvailable(runtime: RuntimeEnv): Promise<void> {
if (!state) throw new Error("Browser server not started");
if (await isChromeReachable(state.cdpPort)) return;
if (state.resolved.attachOnly) {
throw new Error("Browser attachOnly is enabled and no browser is running.");
}
const launched = await launchClawdChrome(state.resolved, runtime);
state.running = launched;
launched.proc.on("exit", () => {
if (state?.running?.pid === launched.pid) {
state.running = null;
}
});
return;
}
async function ensureTabAvailable(runtime: RuntimeEnv, targetId?: string) {
if (!state) throw new Error("Browser server not started");
await ensureBrowserAvailable(runtime);
const tabs1 = await listTabs(state.cdpPort);
if (tabs1.length === 0) {
await openTab(state.cdpPort, "about:blank");
}
const tabs = await listTabs(state.cdpPort);
const chosen = targetId
? (() => {
const resolved = resolveTargetIdFromTabs(targetId, tabs);
if (!resolved.ok) {
if (resolved.reason === "ambiguous") return "AMBIGUOUS" as const;
return null;
}
return tabs.find((t) => t.targetId === resolved.targetId) ?? null;
})()
: (tabs.at(0) ?? null);
if (chosen === "AMBIGUOUS") {
throw new Error("ambiguous target id prefix");
}
if (!chosen?.wsUrl) throw new Error("tab not found");
return chosen;
}
export async function startBrowserControlServerFromConfig(
runtime: RuntimeEnv = defaultRuntime,
): Promise<BrowserServerState | null> {
@@ -236,378 +37,14 @@ export async function startBrowserControlServerFromConfig(
const app = express();
app.use(express.json({ limit: "1mb" }));
app.get("/", async (_req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const reachable = await isChromeReachable(state.cdpPort, 300);
res.json({
enabled: state.resolved.enabled,
controlUrl: state.resolved.controlUrl,
running: reachable,
pid: state.running?.pid ?? null,
cdpPort: state.cdpPort,
chosenBrowser: state.running?.exe.kind ?? null,
userDataDir: state.running?.userDataDir ?? null,
color: state.resolved.color,
headless: state.resolved.headless,
attachOnly: state.resolved.attachOnly,
});
});
app.post("/start", async (_req, res) => {
try {
await ensureBrowserAvailable(runtime);
res.json({ ok: true });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.post("/stop", async (_req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
if (!state.running) return res.json({ ok: true, stopped: false });
try {
await stopClawdChrome(state.running);
state.running = null;
res.json({ ok: true, stopped: true });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.get("/tabs", async (_req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const reachable = await isChromeReachable(state.cdpPort, 300);
if (!reachable)
return res.json({ running: false, tabs: [] as BrowserTab[] });
try {
const tabs = await listTabs(state.cdpPort);
res.json({ running: true, tabs });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.post("/tabs/open", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const url = String((req.body as { url?: unknown })?.url ?? "").trim();
if (!url) return jsonError(res, 400, "url is required");
try {
await ensureBrowserAvailable(runtime);
const tab = await openTab(state.cdpPort, url);
res.json(tab);
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.post("/tabs/focus", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const targetId = String(
(req.body as { targetId?: unknown })?.targetId ?? "",
).trim();
if (!targetId) return jsonError(res, 400, "targetId is required");
const reachable = await isChromeReachable(state.cdpPort, 300);
if (!reachable) return jsonError(res, 409, "browser not running");
try {
const tabs = await listTabs(state.cdpPort);
const resolved = resolveTargetIdFromTabs(targetId, tabs);
if (!resolved.ok) {
if (resolved.reason === "ambiguous") {
return jsonError(res, 409, "ambiguous target id prefix");
}
return jsonError(res, 404, "tab not found");
}
await activateTab(state.cdpPort, resolved.targetId);
res.json({ ok: true });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.delete("/tabs/:targetId", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const targetId = String(req.params.targetId ?? "").trim();
if (!targetId) return jsonError(res, 400, "targetId is required");
const reachable = await isChromeReachable(state.cdpPort, 300);
if (!reachable) return jsonError(res, 409, "browser not running");
try {
const tabs = await listTabs(state.cdpPort);
const resolved = resolveTargetIdFromTabs(targetId, tabs);
if (!resolved.ok) {
if (resolved.reason === "ambiguous") {
return jsonError(res, 409, "ambiguous target id prefix");
}
return jsonError(res, 404, "tab not found");
}
await closeTab(state.cdpPort, resolved.targetId);
res.json({ ok: true });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.get("/screenshot", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const fullPage =
req.query.fullPage === "true" || req.query.fullPage === "1";
const reachable = await isChromeReachable(state.cdpPort, 300);
if (!reachable) return jsonError(res, 409, "browser not running");
try {
const tabs = await listTabs(state.cdpPort);
const chosen = targetId
? (() => {
const resolved = resolveTargetIdFromTabs(targetId, tabs);
if (!resolved.ok) {
if (resolved.reason === "ambiguous") {
return "AMBIGUOUS" as const;
}
return null;
}
return tabs.find((t) => t.targetId === resolved.targetId) ?? null;
})()
: (tabs.at(0) ?? null);
if (chosen === "AMBIGUOUS") {
return jsonError(res, 409, "ambiguous target id prefix");
}
if (!chosen?.wsUrl) return jsonError(res, 404, "tab not found");
let shot: Buffer<ArrayBufferLike> = Buffer.alloc(0);
let contentTypeHint: "image/jpeg" | "image/png" = "image/jpeg";
try {
shot = await captureScreenshot({
wsUrl: chosen.wsUrl,
fullPage,
format: "jpeg",
quality: 85,
});
} catch {
contentTypeHint = "image/png";
shot = await captureScreenshotPng({ wsUrl: chosen.wsUrl, fullPage });
}
const normalized = await normalizeBrowserScreenshot(shot, {
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
});
await ensureMediaDir();
const saved = await saveMediaBuffer(
normalized.buffer,
normalized.contentType ?? contentTypeHint,
"browser",
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
);
const filePath = path.resolve(saved.path);
res.json({
ok: true,
path: filePath,
targetId: chosen.targetId,
url: chosen.url,
});
} catch (err) {
jsonError(res, 500, String(err));
}
});
function mapTabError(err: unknown) {
const msg = String(err);
if (msg.includes("ambiguous target id prefix")) {
return { status: 409, message: "ambiguous target id prefix" };
}
if (msg.includes("tab not found")) {
return { status: 404, message: "tab not found" };
}
return null;
}
app.post("/eval", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const js = String((req.body as { js?: unknown })?.js ?? "").trim();
const targetId = String(
(req.body as { targetId?: unknown })?.targetId ?? "",
).trim();
const awaitPromise = Boolean((req.body as { await?: unknown })?.await);
if (!js) return jsonError(res, 400, "js is required");
try {
const tab = await ensureTabAvailable(runtime, targetId || undefined);
const evaluated = await evaluateJavaScript({
wsUrl: tab.wsUrl ?? "",
expression: js,
awaitPromise,
returnByValue: true,
});
if (evaluated.exceptionDetails) {
const msg =
evaluated.exceptionDetails.exception?.description ||
evaluated.exceptionDetails.text ||
"JavaScript evaluation failed";
return jsonError(res, 400, msg);
}
res.json({
ok: true,
targetId: tab.targetId,
url: tab.url,
result: evaluated.result,
});
} catch (err) {
const mapped = mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.get("/query", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const selector =
typeof req.query.selector === "string" ? req.query.selector.trim() : "";
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const limit =
typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
if (!selector) return jsonError(res, 400, "selector is required");
try {
const tab = await ensureTabAvailable(runtime, targetId || undefined);
const result = await querySelector({
wsUrl: tab.wsUrl ?? "",
selector,
limit,
});
res.json({ ok: true, targetId: tab.targetId, url: tab.url, ...result });
} catch (err) {
const mapped = mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.get("/dom", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const format = req.query.format === "text" ? "text" : "html";
const selector =
typeof req.query.selector === "string" ? req.query.selector.trim() : "";
const maxChars =
typeof req.query.maxChars === "string"
? Number(req.query.maxChars)
: undefined;
try {
const tab = await ensureTabAvailable(runtime, targetId || undefined);
const result = await getDomText({
wsUrl: tab.wsUrl ?? "",
format,
maxChars,
selector: selector || undefined,
});
res.json({
ok: true,
targetId: tab.targetId,
url: tab.url,
format,
...result,
});
} catch (err) {
const mapped = mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.get("/snapshot", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const format =
req.query.format === "domSnapshot"
? "domSnapshot"
: req.query.format === "ai"
? "ai"
: "aria";
const limit =
typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
try {
const tab = await ensureTabAvailable(runtime, targetId || undefined);
if (format === "ai") {
const snap = await snapshotAiViaPlaywright({
cdpPort: state.cdpPort,
targetId: tab.targetId,
});
return res.json({
ok: true,
format,
targetId: tab.targetId,
url: tab.url,
...snap,
});
}
if (format === "aria") {
const snap = await snapshotAria({
wsUrl: tab.wsUrl ?? "",
limit,
});
return res.json({
ok: true,
format,
targetId: tab.targetId,
url: tab.url,
...snap,
});
}
const snap = await snapshotDom({
wsUrl: tab.wsUrl ?? "",
limit,
});
return res.json({
ok: true,
format,
targetId: tab.targetId,
url: tab.url,
...snap,
});
} catch (err) {
const mapped = mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
});
app.post("/click", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const ref = String((req.body as { ref?: unknown })?.ref ?? "").trim();
const targetId = String(
(req.body as { targetId?: unknown })?.targetId ?? "",
).trim();
if (!ref) return jsonError(res, 400, "ref is required");
try {
const tab = await ensureTabAvailable(runtime, targetId || undefined);
await clickRefViaPlaywright({
cdpPort: state.cdpPort,
targetId: tab.targetId,
ref,
});
res.json({ ok: true, targetId: tab.targetId, url: tab.url });
} catch (err) {
const mapped = mapTabError(err);
if (mapped) return jsonError(res, mapped.status, mapped.message);
jsonError(res, 500, String(err));
}
const ctx = createBrowserRouteContext({
runtime,
getState: () => state,
setRunning: (running) => {
if (state) state.running = running;
},
});
registerBrowserRoutes(app, ctx);
const port = resolved.controlPort;
const server = await new Promise<Server>((resolve, reject) => {
@@ -616,6 +53,7 @@ export async function startBrowserControlServerFromConfig(
}).catch((err) => {
logError(
`clawd browser server failed to bind 127.0.0.1:${port}: ${String(err)}`,
runtime,
);
return null;
});
@@ -639,19 +77,27 @@ export async function startBrowserControlServerFromConfig(
export async function stopBrowserControlServer(
runtime: RuntimeEnv = defaultRuntime,
) {
if (!state) return;
): Promise<void> {
const current = state;
state = null;
if (!current) return;
const ctx = createBrowserRouteContext({
runtime,
getState: () => state,
setRunning: (running) => {
if (state) state.running = running;
},
});
try {
await closePlaywrightBrowserConnection();
if (current.running) {
await stopClawdChrome(current.running).catch((err) =>
logWarn(`clawd browser stop failed: ${String(err)}`, runtime),
);
}
} catch {
// ignore
await ctx.stopRunningBrowser();
} catch (err) {
logWarn(`clawd browser stop failed: ${String(err)}`, runtime);
}
await new Promise<void>((resolve) => current.server.close(() => resolve()));
await new Promise<void>((resolve) => {
current.server.close(() => resolve());
});
state = null;
await closePlaywrightBrowserConnection();
}

482
src/cli/browser-cli.ts Normal file
View File

@@ -0,0 +1,482 @@
import type { Command } from "commander";
import {
browserClickRef,
browserCloseTab,
browserDom,
browserEval,
browserFocusTab,
browserOpenTab,
browserQuery,
browserScreenshot,
browserSnapshot,
browserStart,
browserStatus,
browserStop,
browserTabs,
browserTool,
resolveBrowserControlUrl,
} from "../browser/client.js";
import { danger, info } from "../globals.js";
import { defaultRuntime } from "../runtime.js";
export function registerBrowserCli(program: Command) {
const browser = program
.command("browser")
.description("Manage clawd's dedicated browser (Chrome/Chromium)")
.option(
"--url <url>",
"Override browser control URL (default from ~/.clawdis/clawdis.json)",
)
.option("--json", "Output machine-readable JSON", false)
.addHelpText(
"after",
`
Examples:
clawdis browser status
clawdis browser start
clawdis browser tabs
clawdis browser open https://example.com
clawdis browser screenshot # emits MEDIA:<path>
clawdis browser screenshot <targetId> --full-page
clawdis browser eval "location.href"
clawdis browser query "a" --limit 5
clawdis browser dom --format text --max-chars 5000
clawdis browser snapshot --format aria --limit 200
clawdis browser snapshot --format ai
clawdis browser click 76
clawdis browser tool browser_file_upload --args '{"paths":["/tmp/file.txt"]}'
`,
)
.action(() => {
defaultRuntime.error(
danger('Missing subcommand. Try: "clawdis browser status"'),
);
defaultRuntime.exit(1);
});
const parentOpts = (cmd: Command) =>
cmd.parent?.opts?.() as { url?: string; json?: boolean };
browser
.command("status")
.description("Show browser status")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const status = await browserStatus(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(status, null, 2));
return;
}
defaultRuntime.log(
[
`enabled: ${status.enabled}`,
`running: ${status.running}`,
`controlUrl: ${status.controlUrl}`,
`cdpPort: ${status.cdpPort}`,
`browser: ${status.chosenBrowser ?? "unknown"}`,
`profileColor: ${status.color}`,
].join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("start")
.description("Start the clawd browser (no-op if already running)")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserStart(baseUrl);
const status = await browserStatus(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(status, null, 2));
return;
}
defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("stop")
.description("Stop the clawd browser (best-effort)")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserStop(baseUrl);
const status = await browserStatus(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(status, null, 2));
return;
}
defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("tabs")
.description("List open tabs")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const tabs = await browserTabs(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify({ tabs }, null, 2));
return;
}
if (tabs.length === 0) {
defaultRuntime.log("No tabs (browser closed or no targets).");
return;
}
defaultRuntime.log(
tabs
.map(
(t, i) =>
`${i + 1}. ${t.title || "(untitled)"}\n ${t.url}\n id: ${t.targetId}`,
)
.join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("open")
.description("Open a URL in a new tab")
.argument("<url>", "URL to open")
.action(async (url: string, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const tab = await browserOpenTab(baseUrl, url);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(tab, null, 2));
return;
}
defaultRuntime.log(`opened: ${tab.url}\nid: ${tab.targetId}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("focus")
.description("Focus a tab by target id (or unique prefix)")
.argument("<targetId>", "Target id or unique prefix")
.action(async (targetId: string, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserFocusTab(baseUrl, targetId);
if (parent?.json) {
defaultRuntime.log(JSON.stringify({ ok: true }, null, 2));
return;
}
defaultRuntime.log(`focused tab ${targetId}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("close")
.description("Close a tab by target id (or unique prefix)")
.argument("<targetId>", "Target id or unique prefix")
.action(async (targetId: string, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserCloseTab(baseUrl, targetId);
if (parent?.json) {
defaultRuntime.log(JSON.stringify({ ok: true }, null, 2));
return;
}
defaultRuntime.log(`closed tab ${targetId}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("screenshot")
.description("Capture a screenshot (MEDIA:<path>)")
.argument("[targetId]", "CDP target id (or unique prefix)")
.option("--full-page", "Capture full scrollable page", false)
.action(async (targetId: string | undefined, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const result = await browserScreenshot(baseUrl, {
targetId: targetId?.trim() || undefined,
fullPage: Boolean(opts.fullPage),
});
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
defaultRuntime.log(`MEDIA:${result.path}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("eval")
.description("Run JavaScript in the active tab")
.argument("<js>", "JavaScript expression")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--await", "Await promise result", false)
.action(async (js: string, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const result = await browserEval(baseUrl, {
js,
targetId: opts.targetId?.trim() || undefined,
awaitPromise: Boolean(opts.await),
});
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
defaultRuntime.log(JSON.stringify(result.result, null, 2));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("query")
.description("Query selector matches")
.argument("<selector>", "CSS selector")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--limit <n>", "Max matches (default: 20)", (v: string) =>
Number(v),
)
.action(async (selector: string, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const result = await browserQuery(baseUrl, {
selector,
targetId: opts.targetId?.trim() || undefined,
limit: Number.isFinite(opts.limit) ? opts.limit : undefined,
});
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
defaultRuntime.log(JSON.stringify(result.matches, null, 2));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("dom")
.description("Dump DOM (html or text) with truncation")
.option("--format <html|text>", "Output format (default: html)", "html")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--selector <css>", "Optional CSS selector to scope the dump")
.option(
"--max-chars <n>",
"Max characters (default: 200000)",
(v: string) => Number(v),
)
.option("--out <path>", "Write output to a file")
.action(async (opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
const format = opts.format === "text" ? "text" : "html";
try {
const result = await browserDom(baseUrl, {
format,
targetId: opts.targetId?.trim() || undefined,
maxChars: Number.isFinite(opts.maxChars) ? opts.maxChars : undefined,
selector: opts.selector?.trim() || undefined,
});
if (opts.out) {
const fs = await import("node:fs/promises");
await fs.writeFile(opts.out, result.text, "utf8");
if (parent?.json) {
defaultRuntime.log(
JSON.stringify({ ok: true, out: opts.out }, null, 2),
);
} else {
defaultRuntime.log(opts.out);
}
return;
}
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
defaultRuntime.log(result.text);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("snapshot")
.description("Capture an AI-friendly snapshot (aria, domSnapshot, or ai)")
.option(
"--format <aria|domSnapshot|ai>",
"Snapshot format (default: aria)",
"aria",
)
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--limit <n>", "Max nodes (default: 500/800)", (v: string) =>
Number(v),
)
.option("--out <path>", "Write snapshot to a file")
.action(async (opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
const format =
opts.format === "domSnapshot"
? "domSnapshot"
: opts.format === "ai"
? "ai"
: "aria";
try {
const result = await browserSnapshot(baseUrl, {
format,
targetId: opts.targetId?.trim() || undefined,
limit: Number.isFinite(opts.limit) ? opts.limit : undefined,
});
if (opts.out) {
const fs = await import("node:fs/promises");
if (result.format === "ai") {
await fs.writeFile(opts.out, result.snapshot, "utf8");
} else {
const payload = JSON.stringify(result, null, 2);
await fs.writeFile(opts.out, payload, "utf8");
}
if (parent?.json) {
defaultRuntime.log(
JSON.stringify({ ok: true, out: opts.out }, null, 2),
);
} else {
defaultRuntime.log(opts.out);
}
return;
}
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
if (result.format === "ai") {
defaultRuntime.log(result.snapshot);
return;
}
if (result.format === "domSnapshot") {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
const nodes = "nodes" in result ? result.nodes : [];
defaultRuntime.log(
nodes
.map((n) => {
const indent = " ".repeat(Math.min(20, n.depth));
const name = n.name ? ` "${n.name}"` : "";
const value = n.value ? ` = "${n.value}"` : "";
return `${indent}- ${n.role}${name}${value}`;
})
.join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("click")
.description("Click an element by ref from an ai snapshot (e.g. 76)")
.argument("<ref>", "Ref id from ai snapshot")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.action(async (ref: string, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const result = await browserClickRef(baseUrl, {
ref,
targetId: opts.targetId?.trim() || undefined,
});
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
defaultRuntime.log(`clicked ref ${ref} on ${result.url}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("tool")
.description("Call a Playwright MCP-style browser tool by name")
.argument("<name>", "Tool name (browser_*)")
.option("--args <json>", "JSON arguments for the tool")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.action(async (name: string, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
let args: Record<string, unknown> = {};
if (opts.args) {
try {
args = JSON.parse(String(opts.args));
} catch (err) {
defaultRuntime.error(
danger(`Invalid JSON for --args: ${String(err)}`),
);
defaultRuntime.exit(1);
}
}
try {
const result = await browserTool(baseUrl, {
name,
args,
targetId: opts.targetId?.trim() || undefined,
});
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
defaultRuntime.log(JSON.stringify(result, null, 2));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
}

View File

@@ -1,31 +1,16 @@
import chalk from "chalk";
import { Command } from "commander";
import {
browserClickRef,
browserCloseTab,
browserDom,
browserEval,
browserFocusTab,
browserOpenTab,
browserQuery,
browserScreenshot,
browserSnapshot,
browserStart,
browserStatus,
browserStop,
browserTabs,
resolveBrowserControlUrl,
} from "../browser/client.js";
import { agentCommand } from "../commands/agent.js";
import { healthCommand } from "../commands/health.js";
import { sendCommand } from "../commands/send.js";
import { sessionsCommand } from "../commands/sessions.js";
import { setupCommand } from "../commands/setup.js";
import { statusCommand } from "../commands/status.js";
import { danger, info, setVerbose } from "../globals.js";
import { danger, setVerbose } from "../globals.js";
import { loginWeb, logoutWeb } from "../provider-web.js";
import { defaultRuntime } from "../runtime.js";
import { VERSION } from "../version.js";
import { registerBrowserCli } from "./browser-cli.js";
import { registerCanvasCli } from "./canvas-cli.js";
import { registerCronCli } from "./cron-cli.js";
import { createDefaultDeps } from "./deps.js";
@@ -363,493 +348,7 @@ Shows token usage per session when the agent reports it; set inbound.agent.conte
);
});
const browser = program
.command("browser")
.description("Manage clawd's dedicated browser (Chrome/Chromium)")
.option(
"--url <url>",
"Override browser control URL (default from ~/.clawdis/clawdis.json)",
)
.option("--json", "Output machine-readable JSON", false)
.addHelpText(
"after",
`
Examples:
clawdis browser status
clawdis browser start
clawdis browser tabs
clawdis browser open https://example.com
clawdis browser screenshot # emits MEDIA:<path>
clawdis browser screenshot <targetId> --full-page
clawdis browser eval "location.href"
clawdis browser query "a" --limit 5
clawdis browser dom --format text --max-chars 5000
clawdis browser snapshot --format aria --limit 200
clawdis browser snapshot --format ai
clawdis browser click 76
`,
)
.action(() => {
defaultRuntime.error(
danger('Missing subcommand. Try: "clawdis browser status"'),
);
defaultRuntime.exit(1);
});
const parentOpts = (cmd: Command) =>
cmd.parent?.opts?.() as { url?: string; json?: boolean };
browser
.command("status")
.description("Show browser status")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const status = await browserStatus(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(status, null, 2));
return;
}
defaultRuntime.log(
[
`enabled: ${status.enabled}`,
`running: ${status.running}`,
`controlUrl: ${status.controlUrl}`,
`cdpPort: ${status.cdpPort}`,
`browser: ${status.chosenBrowser ?? "unknown"}`,
`profileColor: ${status.color}`,
].join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("start")
.description("Start the clawd browser (no-op if already running)")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserStart(baseUrl);
const status = await browserStatus(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(status, null, 2));
return;
}
defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("stop")
.description("Stop the clawd browser (best-effort)")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserStop(baseUrl);
const status = await browserStatus(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(status, null, 2));
return;
}
defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("tabs")
.description("List open tabs")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const tabs = await browserTabs(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify({ tabs }, null, 2));
return;
}
if (tabs.length === 0) {
defaultRuntime.log("No tabs (browser closed or no targets).");
return;
}
defaultRuntime.log(
tabs
.map(
(t, i) =>
`${i + 1}. ${t.title || "(untitled)"}\n ${t.url}\n id: ${t.targetId}`,
)
.join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("open")
.description("Open a URL in a new tab")
.argument("<url>", "URL to open")
.action(async (url: string, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const tab = await browserOpenTab(baseUrl, url);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(tab, null, 2));
return;
}
defaultRuntime.log(`opened: ${tab.url}\nid: ${tab.targetId}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("focus")
.description("Focus/activate a tab by target id")
.argument("<targetId>", "CDP target id")
.action(async (targetId: string, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserFocusTab(baseUrl, targetId);
if (parent?.json) {
defaultRuntime.log(JSON.stringify({ ok: true }, null, 2));
return;
}
defaultRuntime.log("ok");
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("close")
.description("Close a tab by target id")
.argument("<targetId>", "CDP target id")
.action(async (targetId: string, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserCloseTab(baseUrl, targetId);
if (parent?.json) {
defaultRuntime.log(JSON.stringify({ ok: true }, null, 2));
return;
}
defaultRuntime.log("ok");
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("screenshot")
.description("Capture a screenshot (defaults to first tab)")
.argument("[targetId]", "CDP target id")
.option("--full-page", "Capture full page (best-effort)", false)
.action(async (targetId: string | undefined, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const result = await browserScreenshot(baseUrl, {
targetId: targetId?.trim() || undefined,
fullPage: Boolean(opts.fullPage),
});
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
// Print MEDIA: token so the agent can forward the image as an attachment.
defaultRuntime.log(`MEDIA:${result.path}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("eval")
.description("Evaluate JavaScript in the page context")
.argument("[js]", "JavaScript expression (or use --js-file/--js-stdin)")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--await", "Await promises (Runtime.evaluate awaitPromise)", false)
.option("--js-file <path>", "Read JavaScript from a file")
.option("--js-stdin", "Read JavaScript from stdin", false)
.action(async (jsArg: string | undefined, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
let js = jsArg?.trim() ?? "";
if (opts.jsFile && opts.jsStdin) {
defaultRuntime.error(danger("Use either --js-file or --js-stdin."));
defaultRuntime.exit(2);
return;
}
if (opts.jsFile) {
const fs = await import("node:fs/promises");
js = await fs.readFile(opts.jsFile, "utf8");
} else if (opts.jsStdin) {
js = await new Promise<string>((resolve, reject) => {
let buf = "";
process.stdin.setEncoding("utf8");
process.stdin.on("data", (c) => {
buf += c;
});
process.stdin.on("end", () => resolve(buf));
process.stdin.on("error", (e) => reject(e));
});
}
if (!js.trim()) {
defaultRuntime.error(
danger("Missing JavaScript. Pass <js> or use --js-file/--js-stdin."),
);
defaultRuntime.exit(2);
return;
}
try {
const result = await browserEval(baseUrl, {
js,
targetId: opts.targetId?.trim() || undefined,
awaitPromise: Boolean(opts.await),
});
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
const v = result.result;
if (Object.hasOwn(v, "value")) {
const value = (v as { value?: unknown }).value;
defaultRuntime.log(
typeof value === "string" ? value : JSON.stringify(value, null, 2),
);
return;
}
defaultRuntime.log(v.description ?? JSON.stringify(v, null, 2));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("query")
.description("Query elements by CSS selector")
.argument("<selector>", "CSS selector")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--limit <n>", "Max matches (default: 20)", (v: string) =>
Number(v),
)
.option(
"--format <text|json>",
"Text output format (default: text)",
"text",
)
.action(async (selector: string, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const result = await browserQuery(baseUrl, {
selector,
targetId: opts.targetId?.trim() || undefined,
limit: Number.isFinite(opts.limit) ? opts.limit : undefined,
});
if (parent?.json || opts.format === "json") {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
if (!result.matches.length) {
defaultRuntime.log("No matches.");
return;
}
defaultRuntime.log(
result.matches
.map((m) => {
const id = m.id ? `#${m.id}` : "";
const cls = m.className
? `.${m.className
.split(/\s+/)
.filter(Boolean)
.slice(0, 3)
.join(".")}`
: "";
const head = `${m.index}. <${m.tag}${id}${cls}>`;
const text = m.text ? `\n ${m.text}` : "";
return `${head}${text}`;
})
.join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("dom")
.description("Dump DOM (html or text) with truncation")
.option("--format <html|text>", "Output format (default: html)", "html")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--selector <css>", "Optional CSS selector to scope the dump")
.option(
"--max-chars <n>",
"Max characters (default: 200000)",
(v: string) => Number(v),
)
.option("--out <path>", "Write output to a file")
.action(async (opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
const format = opts.format === "text" ? "text" : "html";
try {
const result = await browserDom(baseUrl, {
format,
targetId: opts.targetId?.trim() || undefined,
maxChars: Number.isFinite(opts.maxChars) ? opts.maxChars : undefined,
selector: opts.selector?.trim() || undefined,
});
if (opts.out) {
const fs = await import("node:fs/promises");
await fs.writeFile(opts.out, result.text, "utf8");
if (parent?.json) {
defaultRuntime.log(
JSON.stringify({ ok: true, out: opts.out }, null, 2),
);
} else {
defaultRuntime.log(opts.out);
}
return;
}
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
defaultRuntime.log(result.text);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("snapshot")
.description("Capture an AI-friendly snapshot (aria, domSnapshot, or ai)")
.option(
"--format <aria|domSnapshot|ai>",
"Snapshot format (default: aria)",
"aria",
)
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option("--limit <n>", "Max nodes (default: 500/800)", (v: string) =>
Number(v),
)
.option("--out <path>", "Write snapshot to a file")
.action(async (opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
const format =
opts.format === "domSnapshot"
? "domSnapshot"
: opts.format === "ai"
? "ai"
: "aria";
try {
const result = await browserSnapshot(baseUrl, {
format,
targetId: opts.targetId?.trim() || undefined,
limit: Number.isFinite(opts.limit) ? opts.limit : undefined,
});
if (opts.out) {
const fs = await import("node:fs/promises");
if (result.format === "ai") {
await fs.writeFile(opts.out, result.snapshot, "utf8");
} else {
const payload = JSON.stringify(result, null, 2);
await fs.writeFile(opts.out, payload, "utf8");
}
if (parent?.json) {
defaultRuntime.log(
JSON.stringify({ ok: true, out: opts.out }, null, 2),
);
} else {
defaultRuntime.log(opts.out);
}
return;
}
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
if (result.format === "ai") {
defaultRuntime.log(result.snapshot);
return;
}
if (result.format === "domSnapshot") {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
// aria text rendering
const nodes = "nodes" in result ? result.nodes : [];
defaultRuntime.log(
nodes
.map((n) => {
const indent = " ".repeat(Math.min(20, n.depth));
const name = n.name ? ` "${n.name}"` : "";
const value = n.value ? ` = "${n.value}"` : "";
return `${indent}- ${n.role}${name}${value}`;
})
.join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("click")
.description("Click an element by ref from an ai snapshot (e.g. 76)")
.argument("<ref>", "Ref id from ai snapshot")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.action(async (ref: string, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const result = await browserClickRef(baseUrl, {
ref,
targetId: opts.targetId?.trim() || undefined,
});
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
defaultRuntime.log(`clicked ref ${ref} on ${result.url}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
registerBrowserCli(program);
return program;
}