From fa54950d2ee17bb03c95d6222e77b0db8f98926f Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 19 Dec 2025 23:57:26 +0000 Subject: [PATCH] feat(browser): add MCP tool dispatch --- src/browser/client.ts | 25 ++ src/browser/pw-ai.test.ts | 5 +- src/browser/pw-ai.ts | 196 +++------- src/browser/pw-session.ts | 218 +++++++++++ src/browser/pw-tools-core.ts | 393 ++++++++++++++++++++ src/browser/pw-tools-observe.ts | 200 ++++++++++ src/browser/routes/basic.ts | 50 +++ src/browser/routes/index.ts | 17 + src/browser/routes/inspect.ts | 307 +++++++++++++++ src/browser/routes/tabs.ts | 108 ++++++ src/browser/routes/tool-core.ts | 432 +++++++++++++++++++++ src/browser/routes/tool-extra.ts | 262 +++++++++++++ src/browser/routes/tool.ts | 65 ++++ src/browser/routes/utils.ts | 38 ++ src/browser/server-context.ts | 272 ++++++++++++++ src/browser/server.test.ts | 37 ++ src/browser/server.ts | 620 ++----------------------------- src/cli/browser-cli.ts | 482 ++++++++++++++++++++++++ src/cli/program.ts | 507 +------------------------ 19 files changed, 2991 insertions(+), 1243 deletions(-) create mode 100644 src/browser/pw-session.ts create mode 100644 src/browser/pw-tools-core.ts create mode 100644 src/browser/pw-tools-observe.ts create mode 100644 src/browser/routes/basic.ts create mode 100644 src/browser/routes/index.ts create mode 100644 src/browser/routes/inspect.ts create mode 100644 src/browser/routes/tabs.ts create mode 100644 src/browser/routes/tool-core.ts create mode 100644 src/browser/routes/tool-extra.ts create mode 100644 src/browser/routes/tool.ts create mode 100644 src/browser/routes/utils.ts create mode 100644 src/browser/server-context.ts create mode 100644 src/cli/browser-cli.ts diff --git a/src/browser/client.ts b/src/browser/client.ts index 670fb2b5a..8d766d86a 100644 --- a/src/browser/client.ts +++ b/src/browser/client.ts @@ -21,6 +21,11 @@ export type BrowserTab = { type?: string; }; +export type BrowserToolResponse = { + ok: true; + [key: string]: unknown; +}; + export type ScreenshotResult = { ok: true; path: string; @@ -354,3 +359,23 @@ export async function browserClickRef( }, ); } + +export async function browserTool( + baseUrl: string, + opts: { + name: string; + args?: Record; + targetId?: string; + }, +): Promise { + return await fetchJson(`${baseUrl}/tool`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + name: opts.name, + args: opts.args ?? {}, + targetId: opts.targetId, + }), + timeoutMs: 20000, + }); +} diff --git a/src/browser/pw-ai.test.ts b/src/browser/pw-ai.test.ts index 787a8a66f..a3c3eac75 100644 --- a/src/browser/pw-ai.test.ts +++ b/src/browser/pw-ai.test.ts @@ -28,11 +28,13 @@ function createPage(opts: { }; const click = vi.fn().mockResolvedValue(undefined); - const locator = vi.fn().mockReturnValue({ click }); + const dblclick = vi.fn().mockResolvedValue(undefined); + const locator = vi.fn().mockReturnValue({ click, dblclick }); const page = { context: () => context, locator, + on: vi.fn(), ...(opts.hasSnapshotForAI === false ? {} : { @@ -48,6 +50,7 @@ function createPage(opts: { function createBrowser(pages: unknown[]) { const ctx = { pages: () => pages, + on: vi.fn(), }; return { contexts: () => [ctx], diff --git a/src/browser/pw-ai.ts b/src/browser/pw-ai.ts index 57d8168dd..ca0f9bada 100644 --- a/src/browser/pw-ai.ts +++ b/src/browser/pw-ai.ts @@ -1,153 +1,47 @@ -import type { Browser, Page } from "playwright-core"; -import { chromium } from "playwright-core"; +export { + type BrowserConsoleMessage, + type BrowserNetworkRequest, + closePlaywrightBrowserConnection, + ensurePageState, + getPageForTargetId, + refLocator, + type WithSnapshotForAI, +} from "./pw-session.js"; -type SnapshotForAIResult = { full: string; incremental?: string }; -type SnapshotForAIOptions = { timeout?: number; track?: string }; +export { + clickRefViaPlaywright, + clickViaPlaywright, + closePageViaPlaywright, + dragViaPlaywright, + evaluateViaPlaywright, + fileUploadViaPlaywright, + fillFormViaPlaywright, + handleDialogViaPlaywright, + hoverViaPlaywright, + navigateBackViaPlaywright, + navigateViaPlaywright, + pdfViaPlaywright, + pressKeyViaPlaywright, + resizeViewportViaPlaywright, + runCodeViaPlaywright, + selectOptionViaPlaywright, + snapshotAiViaPlaywright, + takeScreenshotViaPlaywright, + typeViaPlaywright, + waitForViaPlaywright, +} from "./pw-tools-core.js"; -type WithSnapshotForAI = { - _snapshotForAI?: ( - options?: SnapshotForAIOptions, - ) => Promise; -}; - -type TargetInfoResponse = { - targetInfo?: { - targetId?: string; - }; -}; - -type ConnectedBrowser = { - browser: Browser; - endpoint: string; -}; - -let cached: ConnectedBrowser | null = null; -let connecting: Promise | null = null; - -function endpointForCdpPort(cdpPort: number) { - return `http://127.0.0.1:${cdpPort}`; -} - -async function connectBrowser(endpoint: string): Promise { - if (cached?.endpoint === endpoint) return cached; - if (connecting) return await connecting; - - connecting = chromium - .connectOverCDP(endpoint, { timeout: 5000 }) - .then((browser) => { - const connected: ConnectedBrowser = { browser, endpoint }; - cached = connected; - browser.on("disconnected", () => { - if (cached?.browser === browser) cached = null; - }); - return connected; - }) - .finally(() => { - connecting = null; - }); - - return await connecting; -} - -async function getAllPages(browser: Browser): Promise { - const contexts = browser.contexts(); - const pages = contexts.flatMap((c) => c.pages()); - return pages; -} - -async function pageTargetId(page: Page): Promise { - const session = await page.context().newCDPSession(page); - try { - const info = (await session.send( - "Target.getTargetInfo", - )) as TargetInfoResponse; - const targetId = String(info?.targetInfo?.targetId ?? "").trim(); - return targetId || null; - } finally { - await session.detach().catch(() => {}); - } -} - -async function findPageByTargetId( - browser: Browser, - targetId: string, -): Promise { - const pages = await getAllPages(browser); - for (const page of pages) { - const tid = await pageTargetId(page).catch(() => null); - if (tid && tid === targetId) return page; - } - return null; -} - -async function getPageForTargetId(opts: { - cdpPort: number; - targetId?: string; -}): Promise { - const endpoint = endpointForCdpPort(opts.cdpPort); - const { browser } = await connectBrowser(endpoint); - const pages = await getAllPages(browser); - if (!pages.length) - throw new Error("No pages available in the connected browser."); - const first = pages[0]; - if (!opts.targetId) return first; - const found = await findPageByTargetId(browser, opts.targetId); - if (!found) throw new Error("tab not found"); - return found; -} - -export async function snapshotAiViaPlaywright(opts: { - cdpPort: number; - targetId?: string; - timeoutMs?: number; -}): Promise<{ snapshot: string }> { - const page = await getPageForTargetId({ - cdpPort: opts.cdpPort, - targetId: opts.targetId, - }); - - const maybe = page as unknown as WithSnapshotForAI; - if (!maybe._snapshotForAI) { - throw new Error( - "Playwright _snapshotForAI is not available. Upgrade playwright-core.", - ); - } - - const result = await maybe._snapshotForAI({ - timeout: Math.max( - 500, - Math.min(60_000, Math.floor(opts.timeoutMs ?? 5000)), - ), - track: "response", - }); - return { snapshot: String(result?.full ?? "") }; -} - -export async function clickRefViaPlaywright(opts: { - cdpPort: number; - targetId?: string; - ref: string; - timeoutMs?: number; -}): Promise { - const ref = String(opts.ref ?? "").trim(); - if (!ref) throw new Error("ref is required"); - - const page = await getPageForTargetId({ - cdpPort: opts.cdpPort, - targetId: opts.targetId, - }); - - await page.locator(`aria-ref=${ref}`).click({ - timeout: Math.max( - 500, - Math.min(60_000, Math.floor(opts.timeoutMs ?? 8000)), - ), - }); -} - -export async function closePlaywrightBrowserConnection(): Promise { - const cur = cached; - cached = null; - if (!cur) return; - await cur.browser.close().catch(() => {}); -} +export { + generateLocatorForRef, + getConsoleMessagesViaPlaywright, + getNetworkRequestsViaPlaywright, + mouseClickViaPlaywright, + mouseDragViaPlaywright, + mouseMoveViaPlaywright, + startTracingViaPlaywright, + stopTracingViaPlaywright, + verifyElementVisibleViaPlaywright, + verifyListVisibleViaPlaywright, + verifyTextVisibleViaPlaywright, + verifyValueViaPlaywright, +} from "./pw-tools-observe.js"; diff --git a/src/browser/pw-session.ts b/src/browser/pw-session.ts new file mode 100644 index 000000000..64d9b19f7 --- /dev/null +++ b/src/browser/pw-session.ts @@ -0,0 +1,218 @@ +import type { + Browser, + BrowserContext, + ConsoleMessage, + Page, + Request, +} from "playwright-core"; +import { chromium } from "playwright-core"; + +export type BrowserConsoleMessage = { + type: string; + text: string; + timestamp: string; + location?: { url?: string; lineNumber?: number; columnNumber?: number }; +}; + +export type BrowserNetworkRequest = { + url: string; + method: string; + resourceType?: string; + status?: number; + ok?: boolean; + fromCache?: boolean; + failureText?: string; + timestamp: string; +}; + +type SnapshotForAIResult = { full: string; incremental?: string }; +type SnapshotForAIOptions = { timeout?: number; track?: string }; + +export type WithSnapshotForAI = { + _snapshotForAI?: ( + options?: SnapshotForAIOptions, + ) => Promise; +}; + +type TargetInfoResponse = { + targetInfo?: { + targetId?: string; + }; +}; + +type ConnectedBrowser = { + browser: Browser; + endpoint: string; +}; + +type PageState = { + console: BrowserConsoleMessage[]; + network: BrowserNetworkRequest[]; + requestMap: Map; +}; + +const pageStates = new WeakMap(); +const observedContexts = new WeakSet(); +const observedPages = new WeakSet(); + +const MAX_CONSOLE_MESSAGES = 500; +const MAX_NETWORK_REQUESTS = 1000; + +let cached: ConnectedBrowser | null = null; +let connecting: Promise | null = null; + +function endpointForCdpPort(cdpPort: number) { + return `http://127.0.0.1:${cdpPort}`; +} + +export function ensurePageState(page: Page): PageState { + const existing = pageStates.get(page); + if (existing) return existing; + + const state: PageState = { + console: [], + network: [], + requestMap: new Map(), + }; + pageStates.set(page, state); + + if (!observedPages.has(page)) { + observedPages.add(page); + page.on("console", (msg: ConsoleMessage) => { + const entry: BrowserConsoleMessage = { + type: msg.type(), + text: msg.text(), + timestamp: new Date().toISOString(), + location: msg.location(), + }; + state.console.push(entry); + if (state.console.length > MAX_CONSOLE_MESSAGES) state.console.shift(); + }); + page.on("request", (req: Request) => { + const entry: BrowserNetworkRequest = { + url: req.url(), + method: req.method(), + resourceType: req.resourceType(), + timestamp: new Date().toISOString(), + }; + state.network.push(entry); + state.requestMap.set(req, entry); + if (state.network.length > MAX_NETWORK_REQUESTS) state.network.shift(); + }); + page.on("requestfinished", async (req: Request) => { + const entry = state.requestMap.get(req); + if (!entry) return; + const response = await req.response().catch(() => null); + if (response) { + entry.status = response.status(); + entry.ok = response.ok(); + entry.fromCache = response.fromServiceWorker(); + } + state.requestMap.delete(req); + }); + page.on("requestfailed", (req: Request) => { + const entry = state.requestMap.get(req); + if (!entry) return; + entry.failureText = req.failure()?.errorText; + state.requestMap.delete(req); + }); + page.on("close", () => { + pageStates.delete(page); + observedPages.delete(page); + }); + } + + return state; +} + +function observeContext(context: BrowserContext) { + if (observedContexts.has(context)) return; + observedContexts.add(context); + + for (const page of context.pages()) ensurePageState(page); + context.on("page", (page) => ensurePageState(page)); +} + +function observeBrowser(browser: Browser) { + for (const context of browser.contexts()) observeContext(context); +} + +async function connectBrowser(endpoint: string): Promise { + if (cached?.endpoint === endpoint) return cached; + if (connecting) return await connecting; + + connecting = chromium + .connectOverCDP(endpoint, { timeout: 5000 }) + .then((browser) => { + const connected: ConnectedBrowser = { browser, endpoint }; + cached = connected; + observeBrowser(browser); + browser.on("disconnected", () => { + if (cached?.browser === browser) cached = null; + }); + return connected; + }) + .finally(() => { + connecting = null; + }); + + return await connecting; +} + +async function getAllPages(browser: Browser): Promise { + const contexts = browser.contexts(); + const pages = contexts.flatMap((c) => c.pages()); + return pages; +} + +async function pageTargetId(page: Page): Promise { + const session = await page.context().newCDPSession(page); + try { + const info = (await session.send( + "Target.getTargetInfo", + )) as TargetInfoResponse; + const targetId = String(info?.targetInfo?.targetId ?? "").trim(); + return targetId || null; + } finally { + await session.detach().catch(() => {}); + } +} + +async function findPageByTargetId( + browser: Browser, + targetId: string, +): Promise { + const pages = await getAllPages(browser); + for (const page of pages) { + const tid = await pageTargetId(page).catch(() => null); + if (tid && tid === targetId) return page; + } + return null; +} + +export async function getPageForTargetId(opts: { + cdpPort: number; + targetId?: string; +}): Promise { + const endpoint = endpointForCdpPort(opts.cdpPort); + const { browser } = await connectBrowser(endpoint); + const pages = await getAllPages(browser); + if (!pages.length) + throw new Error("No pages available in the connected browser."); + const first = pages[0]; + if (!opts.targetId) return first; + const found = await findPageByTargetId(browser, opts.targetId); + if (!found) throw new Error("tab not found"); + return found; +} + +export function refLocator(page: Page, ref: string) { + return page.locator(`aria-ref=${ref}`); +} + +export async function closePlaywrightBrowserConnection(): Promise { + const cur = cached; + cached = null; + if (!cur) return; + await cur.browser.close().catch(() => {}); +} diff --git a/src/browser/pw-tools-core.ts b/src/browser/pw-tools-core.ts new file mode 100644 index 000000000..78c968e0a --- /dev/null +++ b/src/browser/pw-tools-core.ts @@ -0,0 +1,393 @@ +import type { Page } from "playwright-core"; + +import { + ensurePageState, + getPageForTargetId, + refLocator, + type WithSnapshotForAI, +} from "./pw-session.js"; + +export async function snapshotAiViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + timeoutMs?: number; +}): Promise<{ snapshot: string }> { + const page = await getPageForTargetId({ + cdpPort: opts.cdpPort, + targetId: opts.targetId, + }); + ensurePageState(page); + + const maybe = page as unknown as WithSnapshotForAI; + if (!maybe._snapshotForAI) { + throw new Error( + "Playwright _snapshotForAI is not available. Upgrade playwright-core.", + ); + } + + const result = await maybe._snapshotForAI({ + timeout: Math.max( + 500, + Math.min(60_000, Math.floor(opts.timeoutMs ?? 5000)), + ), + track: "response", + }); + return { snapshot: String(result?.full ?? "") }; +} + +export async function clickRefViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + ref: string; + timeoutMs?: number; +}): Promise { + await clickViaPlaywright(opts); +} + +export async function clickViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + ref: string; + doubleClick?: boolean; + button?: "left" | "right" | "middle"; + modifiers?: Array<"Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift">; + timeoutMs?: number; +}): Promise { + const ref = String(opts.ref ?? "").trim(); + if (!ref) throw new Error("ref is required"); + + const page = await getPageForTargetId({ + cdpPort: opts.cdpPort, + targetId: opts.targetId, + }); + ensurePageState(page); + const locator = refLocator(page, ref); + const timeout = Math.max( + 500, + Math.min(60_000, Math.floor(opts.timeoutMs ?? 8000)), + ); + if (opts.doubleClick) { + await locator.dblclick({ + timeout, + button: opts.button, + modifiers: opts.modifiers, + }); + } else { + await locator.click({ + timeout, + button: opts.button, + modifiers: opts.modifiers, + }); + } +} + +export async function hoverViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + ref: string; + timeoutMs?: number; +}): Promise { + const ref = String(opts.ref ?? "").trim(); + if (!ref) throw new Error("ref is required"); + const page = await getPageForTargetId(opts); + ensurePageState(page); + await refLocator(page, ref).hover({ + timeout: Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000)), + }); +} + +export async function dragViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + startRef: string; + endRef: string; + timeoutMs?: number; +}): Promise { + const startRef = String(opts.startRef ?? "").trim(); + const endRef = String(opts.endRef ?? "").trim(); + if (!startRef || !endRef) throw new Error("startRef and endRef are required"); + const page = await getPageForTargetId(opts); + ensurePageState(page); + await refLocator(page, startRef).dragTo(refLocator(page, endRef), { + timeout: Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000)), + }); +} + +export async function selectOptionViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + ref: string; + values: string[]; + timeoutMs?: number; +}): Promise { + const ref = String(opts.ref ?? "").trim(); + if (!ref) throw new Error("ref is required"); + if (!opts.values?.length) throw new Error("values are required"); + const page = await getPageForTargetId(opts); + ensurePageState(page); + await refLocator(page, ref).selectOption(opts.values, { + timeout: Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000)), + }); +} + +export async function pressKeyViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + key: string; + delayMs?: number; +}): Promise { + const key = String(opts.key ?? "").trim(); + if (!key) throw new Error("key is required"); + const page = await getPageForTargetId(opts); + ensurePageState(page); + await page.keyboard.press(key, { + delay: Math.max(0, Math.floor(opts.delayMs ?? 0)), + }); +} + +export async function typeViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + ref: string; + text: string; + submit?: boolean; + slowly?: boolean; + timeoutMs?: number; +}): Promise { + const ref = String(opts.ref ?? "").trim(); + if (!ref) throw new Error("ref is required"); + const text = String(opts.text ?? ""); + const page = await getPageForTargetId(opts); + ensurePageState(page); + const locator = refLocator(page, ref); + const timeout = Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000)); + if (opts.slowly) { + await locator.click({ timeout }); + await locator.type(text, { timeout, delay: 75 }); + } else { + await locator.fill(text, { timeout }); + } + if (opts.submit) { + await locator.press("Enter", { timeout }); + } +} + +export async function fillFormViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + fields: Array>; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + for (const field of opts.fields) { + const ref = String(field.ref ?? "").trim(); + const type = String(field.type ?? "").trim(); + const value = String(field.value ?? ""); + if (!ref || !type) continue; + const locator = refLocator(page, ref); + if (type === "checkbox" || type === "radio") { + await locator.setChecked(value === "true"); + continue; + } + await locator.fill(value); + } +} + +export async function evaluateViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + fn: string; + ref?: string; +}): Promise { + const fnText = String(opts.fn ?? "").trim(); + if (!fnText) throw new Error("function is required"); + const page = await getPageForTargetId(opts); + ensurePageState(page); + if (opts.ref) { + const locator = refLocator(page, opts.ref); + return await locator.evaluate((el, fnBody) => { + const runner = new Function( + "element", + `"use strict"; const fn = ${fnBody}; return fn(element);`, + ) as (element: Element) => unknown; + return runner(el as Element); + }, fnText); + } + return await page.evaluate((fnBody) => { + const runner = new Function( + `"use strict"; const fn = ${fnBody}; return fn();`, + ) as () => unknown; + return runner(); + }, fnText); +} + +export async function fileUploadViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + paths?: string[]; + timeoutMs?: number; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const timeout = Math.max(500, Math.min(60_000, opts.timeoutMs ?? 10_000)); + const fileChooser = await page.waitForEvent("filechooser", { timeout }); + if (!opts.paths?.length) { + await fileChooser.cancel(); + return; + } + await fileChooser.setFiles(opts.paths); +} + +export async function handleDialogViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + accept: boolean; + promptText?: string; + timeoutMs?: number; +}): Promise<{ message: string; type: string }> { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const timeout = Math.max(500, Math.min(60_000, opts.timeoutMs ?? 10_000)); + const dialog = await page.waitForEvent("dialog", { timeout }); + const message = dialog.message(); + const type = dialog.type(); + if (opts.accept) await dialog.accept(opts.promptText); + else await dialog.dismiss(); + return { message, type }; +} + +export async function navigateViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + url: string; + timeoutMs?: number; +}): Promise<{ url: string }> { + const url = String(opts.url ?? "").trim(); + if (!url) throw new Error("url is required"); + const page = await getPageForTargetId(opts); + ensurePageState(page); + await page.goto(url, { + timeout: Math.max(1000, Math.min(120_000, opts.timeoutMs ?? 20_000)), + }); + return { url: page.url() }; +} + +export async function navigateBackViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + timeoutMs?: number; +}): Promise<{ url: string }> { + const page = await getPageForTargetId(opts); + ensurePageState(page); + await page.goBack({ + timeout: Math.max(1000, Math.min(120_000, opts.timeoutMs ?? 20_000)), + }); + return { url: page.url() }; +} + +export async function waitForViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + time?: number; + text?: string; + textGone?: string; + timeoutMs?: number; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + if (typeof opts.time === "number" && Number.isFinite(opts.time)) { + await page.waitForTimeout(Math.max(0, opts.time) * 1000); + } + if (opts.text) { + await page + .getByText(opts.text) + .first() + .waitFor({ + state: "visible", + timeout: Math.max(500, Math.min(120_000, opts.timeoutMs ?? 20_000)), + }); + } + if (opts.textGone) { + await page + .getByText(opts.textGone) + .first() + .waitFor({ + state: "hidden", + timeout: Math.max(500, Math.min(120_000, opts.timeoutMs ?? 20_000)), + }); + } +} + +export async function runCodeViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + code: string; +}): Promise { + const code = String(opts.code ?? "").trim(); + if (!code) throw new Error("code is required"); + const page = await getPageForTargetId(opts); + ensurePageState(page); + const fn = new Function(`return (${code});`)() as + | ((page: Page) => unknown) + | undefined; + if (typeof fn !== "function") throw new Error("code is not a function"); + return await fn(page); +} + +export async function takeScreenshotViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + ref?: string; + element?: string; + fullPage?: boolean; + type?: "png" | "jpeg"; +}): Promise<{ buffer: Buffer }> { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const type = opts.type ?? "png"; + if (opts.ref) { + if (opts.fullPage) + throw new Error("fullPage is not supported for element screenshots"); + const locator = refLocator(page, opts.ref); + const buffer = await locator.screenshot({ type }); + return { buffer }; + } + const buffer = await page.screenshot({ + type, + fullPage: Boolean(opts.fullPage), + }); + return { buffer }; +} + +export async function resizeViewportViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + width: number; + height: number; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + await page.setViewportSize({ + width: Math.max(1, Math.floor(opts.width)), + height: Math.max(1, Math.floor(opts.height)), + }); +} + +export async function closePageViaPlaywright(opts: { + cdpPort: number; + targetId?: string; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + await page.close(); +} + +export async function pdfViaPlaywright(opts: { + cdpPort: number; + targetId?: string; +}): Promise<{ buffer: Buffer }> { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const buffer = await page.pdf({ printBackground: true }); + return { buffer }; +} diff --git a/src/browser/pw-tools-observe.ts b/src/browser/pw-tools-observe.ts new file mode 100644 index 000000000..7d07abf53 --- /dev/null +++ b/src/browser/pw-tools-observe.ts @@ -0,0 +1,200 @@ +import crypto from "node:crypto"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; + +import { + type BrowserConsoleMessage, + type BrowserNetworkRequest, + ensurePageState, + getPageForTargetId, + refLocator, +} from "./pw-session.js"; + +const STATIC_RESOURCE_TYPES = new Set(["image", "font", "stylesheet", "media"]); + +const tracingContexts = new WeakSet(); + +function consolePriority(level: string) { + switch (level) { + case "error": + return 3; + case "warning": + return 2; + case "info": + case "log": + return 1; + case "debug": + return 0; + default: + return 1; + } +} + +export async function startTracingViaPlaywright(opts: { + cdpPort: number; + targetId?: string; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const context = page.context(); + if (tracingContexts.has(context)) throw new Error("Tracing already started"); + await context.tracing.start({ + screenshots: true, + snapshots: true, + sources: true, + }); + tracingContexts.add(context); +} + +export async function stopTracingViaPlaywright(opts: { + cdpPort: number; + targetId?: string; +}): Promise<{ buffer: Buffer }> { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const context = page.context(); + if (!tracingContexts.has(context)) throw new Error("Tracing not started"); + const fileName = `clawd-trace-${crypto.randomUUID()}.zip`; + const filePath = path.join(os.tmpdir(), fileName); + await context.tracing.stop({ path: filePath }); + tracingContexts.delete(context); + const buffer = await fs.readFile(filePath); + await fs.rm(filePath).catch(() => {}); + return { buffer }; +} + +export async function getConsoleMessagesViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + level?: string; +}): Promise { + const page = await getPageForTargetId(opts); + const state = ensurePageState(page); + if (!opts.level) return [...state.console]; + const min = consolePriority(opts.level); + return state.console.filter((msg) => consolePriority(msg.type) >= min); +} + +export async function getNetworkRequestsViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + includeStatic?: boolean; +}): Promise { + const page = await getPageForTargetId(opts); + const state = ensurePageState(page); + if (opts.includeStatic) return [...state.network]; + return state.network.filter( + (req) => !req.resourceType || !STATIC_RESOURCE_TYPES.has(req.resourceType), + ); +} + +export async function mouseMoveViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + x: number; + y: number; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + await page.mouse.move(opts.x, opts.y); +} + +export async function mouseClickViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + x: number; + y: number; + button?: "left" | "right" | "middle"; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + await page.mouse.click(opts.x, opts.y, { + button: opts.button, + }); +} + +export async function mouseDragViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + startX: number; + startY: number; + endX: number; + endY: number; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + await page.mouse.move(opts.startX, opts.startY); + await page.mouse.down(); + await page.mouse.move(opts.endX, opts.endY); + await page.mouse.up(); +} + +export async function verifyElementVisibleViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + role: string; + accessibleName: string; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const locator = page.getByRole(opts.role as never, { + name: opts.accessibleName, + }); + if ((await locator.count()) === 0) throw new Error("element not found"); + if (!(await locator.first().isVisible())) + throw new Error("element not visible"); +} + +export async function verifyTextVisibleViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + text: string; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const locator = page.getByText(opts.text).filter({ visible: true }); + if ((await locator.count()) === 0) throw new Error("text not found"); +} + +export async function verifyListVisibleViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + ref: string; + items: string[]; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const locator = refLocator(page, opts.ref); + for (const item of opts.items) { + const itemLocator = locator.getByText(item); + if ((await itemLocator.count()) === 0) + throw new Error(`item "${item}" not found`); + } +} + +export async function verifyValueViaPlaywright(opts: { + cdpPort: number; + targetId?: string; + ref: string; + type: string; + value: string; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const locator = refLocator(page, opts.ref); + if (opts.type === "checkbox" || opts.type === "radio") { + const checked = await locator.isChecked(); + const expected = opts.value === "true"; + if (checked !== expected) + throw new Error(`expected ${opts.value}, got ${String(checked)}`); + return; + } + const value = await locator.inputValue(); + if (value !== opts.value) + throw new Error(`expected ${opts.value}, got ${value}`); +} + +export function generateLocatorForRef(ref: string) { + return `locator('aria-ref=${ref}')`; +} diff --git a/src/browser/routes/basic.ts b/src/browser/routes/basic.ts new file mode 100644 index 000000000..202d81ee8 --- /dev/null +++ b/src/browser/routes/basic.ts @@ -0,0 +1,50 @@ +import type express from "express"; + +import type { BrowserRouteContext } from "../server-context.js"; +import { jsonError } from "./utils.js"; + +export function registerBrowserBasicRoutes( + app: express.Express, + ctx: BrowserRouteContext, +) { + app.get("/", async (_req, res) => { + let current: ReturnType; + try { + current = ctx.state(); + } catch { + return jsonError(res, 503, "browser server not started"); + } + + const reachable = await ctx.isReachable(300); + res.json({ + enabled: current.resolved.enabled, + controlUrl: current.resolved.controlUrl, + running: reachable, + pid: current.running?.pid ?? null, + cdpPort: current.cdpPort, + chosenBrowser: current.running?.exe.kind ?? null, + userDataDir: current.running?.userDataDir ?? null, + color: current.resolved.color, + headless: current.resolved.headless, + attachOnly: current.resolved.attachOnly, + }); + }); + + app.post("/start", async (_req, res) => { + try { + await ctx.ensureBrowserAvailable(); + res.json({ ok: true }); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); + + app.post("/stop", async (_req, res) => { + try { + const result = await ctx.stopRunningBrowser(); + res.json({ ok: true, stopped: result.stopped }); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); +} diff --git a/src/browser/routes/index.ts b/src/browser/routes/index.ts new file mode 100644 index 000000000..4f1bf5dd2 --- /dev/null +++ b/src/browser/routes/index.ts @@ -0,0 +1,17 @@ +import type express from "express"; + +import type { BrowserRouteContext } from "../server-context.js"; +import { registerBrowserBasicRoutes } from "./basic.js"; +import { registerBrowserInspectRoutes } from "./inspect.js"; +import { registerBrowserTabRoutes } from "./tabs.js"; +import { registerBrowserToolRoutes } from "./tool.js"; + +export function registerBrowserRoutes( + app: express.Express, + ctx: BrowserRouteContext, +) { + registerBrowserBasicRoutes(app, ctx); + registerBrowserTabRoutes(app, ctx); + registerBrowserInspectRoutes(app, ctx); + registerBrowserToolRoutes(app, ctx); +} diff --git a/src/browser/routes/inspect.ts b/src/browser/routes/inspect.ts new file mode 100644 index 000000000..f803e65dd --- /dev/null +++ b/src/browser/routes/inspect.ts @@ -0,0 +1,307 @@ +import path from "node:path"; + +import type express from "express"; +import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js"; +import { + captureScreenshot, + captureScreenshotPng, + evaluateJavaScript, + getDomText, + querySelector, + snapshotAria, + snapshotDom, +} from "../cdp.js"; +import { + snapshotAiViaPlaywright, + takeScreenshotViaPlaywright, +} from "../pw-ai.js"; +import { + DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, + normalizeBrowserScreenshot, +} from "../screenshot.js"; +import type { BrowserRouteContext } from "../server-context.js"; +import { jsonError, toBoolean, toStringOrEmpty } from "./utils.js"; + +export function registerBrowserInspectRoutes( + app: express.Express, + ctx: BrowserRouteContext, +) { + app.get("/screenshot", async (req, res) => { + const targetId = + typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; + const fullPage = + req.query.fullPage === "true" || req.query.fullPage === "1"; + + try { + const tab = await ctx.ensureTabAvailable(targetId || undefined); + + let shot: Buffer = Buffer.alloc(0); + let contentTypeHint: "image/jpeg" | "image/png" = "image/jpeg"; + try { + shot = await captureScreenshot({ + wsUrl: tab.wsUrl ?? "", + fullPage, + format: "jpeg", + quality: 85, + }); + } catch { + contentTypeHint = "image/png"; + shot = await captureScreenshotPng({ + wsUrl: tab.wsUrl ?? "", + fullPage, + }); + } + + const normalized = await normalizeBrowserScreenshot(shot, { + maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, + maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + }); + await ensureMediaDir(); + const saved = await saveMediaBuffer( + normalized.buffer, + normalized.contentType ?? contentTypeHint, + "browser", + DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + ); + const filePath = path.resolve(saved.path); + res.json({ + ok: true, + path: filePath, + targetId: tab.targetId, + url: tab.url, + }); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.post("/screenshot", async (req, res) => { + const body = req.body as Record; + const targetId = toStringOrEmpty(body?.targetId); + const fullPage = toBoolean(body?.fullPage) ?? false; + const ref = toStringOrEmpty(body?.ref); + const element = toStringOrEmpty(body?.element); + const type = body?.type === "jpeg" ? "jpeg" : "png"; + const filename = toStringOrEmpty(body?.filename); + + try { + const tab = await ctx.ensureTabAvailable(targetId || undefined); + const snap = await takeScreenshotViaPlaywright({ + cdpPort: ctx.state().cdpPort, + targetId: tab.targetId, + ref, + element, + fullPage, + type, + }); + const buffer = snap.buffer; + const normalized = await normalizeBrowserScreenshot(buffer, { + maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, + maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + }); + await ensureMediaDir(); + const saved = await saveMediaBuffer( + normalized.buffer, + normalized.contentType ?? `image/${type}`, + "browser", + DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + ); + const filePath = path.resolve(saved.path); + res.json({ + ok: true, + path: filePath, + targetId: tab.targetId, + url: tab.url, + filename: filename || undefined, + }); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.post("/eval", async (req, res) => { + const js = toStringOrEmpty((req.body as { js?: unknown })?.js); + const targetId = toStringOrEmpty( + (req.body as { targetId?: unknown })?.targetId, + ); + const awaitPromise = Boolean((req.body as { await?: unknown })?.await); + + if (!js) return jsonError(res, 400, "js is required"); + + try { + const tab = await ctx.ensureTabAvailable(targetId || undefined); + const evaluated = await evaluateJavaScript({ + wsUrl: tab.wsUrl ?? "", + expression: js, + awaitPromise, + returnByValue: true, + }); + + if (evaluated.exceptionDetails) { + const msg = + evaluated.exceptionDetails.exception?.description || + evaluated.exceptionDetails.text || + "JavaScript evaluation failed"; + return jsonError(res, 400, msg); + } + + res.json({ + ok: true, + targetId: tab.targetId, + url: tab.url, + result: evaluated.result, + }); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.get("/query", async (req, res) => { + const selector = + typeof req.query.selector === "string" ? req.query.selector.trim() : ""; + const targetId = + typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; + const limit = + typeof req.query.limit === "string" ? Number(req.query.limit) : undefined; + + if (!selector) return jsonError(res, 400, "selector is required"); + + try { + const tab = await ctx.ensureTabAvailable(targetId || undefined); + const result = await querySelector({ + wsUrl: tab.wsUrl ?? "", + selector, + limit, + }); + res.json({ ok: true, targetId: tab.targetId, url: tab.url, ...result }); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.get("/dom", async (req, res) => { + const targetId = + typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; + const format = req.query.format === "text" ? "text" : "html"; + const selector = + typeof req.query.selector === "string" ? req.query.selector.trim() : ""; + const maxChars = + typeof req.query.maxChars === "string" + ? Number(req.query.maxChars) + : undefined; + + try { + const tab = await ctx.ensureTabAvailable(targetId || undefined); + const result = await getDomText({ + wsUrl: tab.wsUrl ?? "", + format, + maxChars, + selector: selector || undefined, + }); + res.json({ + ok: true, + targetId: tab.targetId, + url: tab.url, + format, + ...result, + }); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.get("/snapshot", async (req, res) => { + const targetId = + typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; + const format = + req.query.format === "domSnapshot" + ? "domSnapshot" + : req.query.format === "ai" + ? "ai" + : "aria"; + const limit = + typeof req.query.limit === "string" ? Number(req.query.limit) : undefined; + + try { + const tab = await ctx.ensureTabAvailable(targetId || undefined); + + if (format === "ai") { + const snap = await snapshotAiViaPlaywright({ + cdpPort: ctx.state().cdpPort, + targetId: tab.targetId, + }); + return res.json({ + ok: true, + format, + targetId: tab.targetId, + url: tab.url, + ...snap, + }); + } + + if (format === "aria") { + const snap = await snapshotAria({ + wsUrl: tab.wsUrl ?? "", + limit, + }); + return res.json({ + ok: true, + format, + targetId: tab.targetId, + url: tab.url, + ...snap, + }); + } + + const snap = await snapshotDom({ + wsUrl: tab.wsUrl ?? "", + limit, + }); + return res.json({ + ok: true, + format, + targetId: tab.targetId, + url: tab.url, + ...snap, + }); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.post("/click", async (req, res) => { + const ref = toStringOrEmpty((req.body as { ref?: unknown })?.ref); + const targetId = toStringOrEmpty( + (req.body as { targetId?: unknown })?.targetId, + ); + + if (!ref) return jsonError(res, 400, "ref is required"); + + try { + const tab = await ctx.ensureTabAvailable(targetId || undefined); + await clickViaPlaywright({ + cdpPort: ctx.state().cdpPort, + targetId: tab.targetId, + ref, + }); + res.json({ ok: true, targetId: tab.targetId, url: tab.url }); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); +} diff --git a/src/browser/routes/tabs.ts b/src/browser/routes/tabs.ts new file mode 100644 index 000000000..ef786afa6 --- /dev/null +++ b/src/browser/routes/tabs.ts @@ -0,0 +1,108 @@ +import type express from "express"; + +import type { BrowserRouteContext } from "../server-context.js"; +import { jsonError, toNumber, toStringOrEmpty } from "./utils.js"; + +export function registerBrowserTabRoutes( + app: express.Express, + ctx: BrowserRouteContext, +) { + app.get("/tabs", async (_req, res) => { + try { + const reachable = await ctx.isReachable(300); + if (!reachable) + return res.json({ running: false, tabs: [] as unknown[] }); + const tabs = await ctx.listTabs(); + res.json({ running: true, tabs }); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); + + app.post("/tabs/open", async (req, res) => { + const url = toStringOrEmpty((req.body as { url?: unknown })?.url); + if (!url) return jsonError(res, 400, "url is required"); + try { + await ctx.ensureBrowserAvailable(); + const tab = await ctx.openTab(url); + res.json(tab); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); + + app.post("/tabs/focus", async (req, res) => { + const targetId = toStringOrEmpty( + (req.body as { targetId?: unknown })?.targetId, + ); + if (!targetId) return jsonError(res, 400, "targetId is required"); + try { + if (!(await ctx.isReachable(300))) + return jsonError(res, 409, "browser not running"); + await ctx.focusTab(targetId); + res.json({ ok: true }); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.delete("/tabs/:targetId", async (req, res) => { + const targetId = toStringOrEmpty(req.params.targetId); + if (!targetId) return jsonError(res, 400, "targetId is required"); + try { + if (!(await ctx.isReachable(300))) + return jsonError(res, 409, "browser not running"); + await ctx.closeTab(targetId); + res.json({ ok: true }); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); + + app.post("/tabs/action", async (req, res) => { + const action = toStringOrEmpty((req.body as { action?: unknown })?.action); + const index = toNumber((req.body as { index?: unknown })?.index); + try { + if (action === "list") { + const reachable = await ctx.isReachable(300); + if (!reachable) return res.json({ ok: true, tabs: [] as unknown[] }); + const tabs = await ctx.listTabs(); + return res.json({ ok: true, tabs }); + } + + if (action === "new") { + await ctx.ensureBrowserAvailable(); + const tab = await ctx.openTab("about:blank"); + return res.json({ ok: true, tab }); + } + + if (action === "close") { + const tabs = await ctx.listTabs(); + const target = typeof index === "number" ? tabs[index] : tabs.at(0); + if (!target) return jsonError(res, 404, "tab not found"); + await ctx.closeTab(target.targetId); + return res.json({ ok: true, targetId: target.targetId }); + } + + if (action === "select") { + if (typeof index !== "number") + return jsonError(res, 400, "index is required"); + const tabs = await ctx.listTabs(); + const target = tabs[index]; + if (!target) return jsonError(res, 404, "tab not found"); + await ctx.focusTab(target.targetId); + return res.json({ ok: true, targetId: target.targetId }); + } + + return jsonError(res, 400, "unknown tab action"); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); +} diff --git a/src/browser/routes/tool-core.ts b/src/browser/routes/tool-core.ts new file mode 100644 index 000000000..d47469496 --- /dev/null +++ b/src/browser/routes/tool-core.ts @@ -0,0 +1,432 @@ +import path from "node:path"; + +import type express from "express"; + +import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js"; +import { + clickViaPlaywright, + closePageViaPlaywright, + dragViaPlaywright, + evaluateViaPlaywright, + fileUploadViaPlaywright, + fillFormViaPlaywright, + handleDialogViaPlaywright, + hoverViaPlaywright, + navigateBackViaPlaywright, + navigateViaPlaywright, + pressKeyViaPlaywright, + resizeViewportViaPlaywright, + runCodeViaPlaywright, + selectOptionViaPlaywright, + snapshotAiViaPlaywright, + takeScreenshotViaPlaywright, + typeViaPlaywright, + waitForViaPlaywright, +} from "../pw-ai.js"; +import { + DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, + normalizeBrowserScreenshot, +} from "../screenshot.js"; +import type { BrowserRouteContext } from "../server-context.js"; +import { + jsonError, + toBoolean, + toNumber, + toStringArray, + toStringOrEmpty, +} from "./utils.js"; + +type ToolCoreParams = { + name: string; + args: Record; + targetId: string; + cdpPort: number; + ctx: BrowserRouteContext; + res: express.Response; +}; + +export async function handleBrowserToolCore( + params: ToolCoreParams, +): Promise { + const { name, args, targetId, cdpPort, ctx, res } = params; + const target = targetId || undefined; + + switch (name) { + case "browser_close": { + const tab = await ctx.ensureTabAvailable(target); + await closePageViaPlaywright({ cdpPort, targetId: tab.targetId }); + res.json({ ok: true, targetId: tab.targetId, url: tab.url }); + return true; + } + case "browser_resize": { + const width = toNumber(args.width); + const height = toNumber(args.height); + if (!width || !height) { + jsonError(res, 400, "width and height are required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await resizeViewportViaPlaywright({ + cdpPort, + targetId: tab.targetId, + width, + height, + }); + res.json({ ok: true, targetId: tab.targetId, url: tab.url }); + return true; + } + case "browser_handle_dialog": { + const accept = toBoolean(args.accept); + if (accept === undefined) { + jsonError(res, 400, "accept is required"); + return true; + } + const promptText = toStringOrEmpty(args.promptText) || undefined; + const tab = await ctx.ensureTabAvailable(target); + const result = await handleDialogViaPlaywright({ + cdpPort, + targetId: tab.targetId, + accept, + promptText, + }); + res.json({ ok: true, ...result }); + return true; + } + case "browser_evaluate": { + const fn = toStringOrEmpty(args.function); + if (!fn) { + jsonError(res, 400, "function is required"); + return true; + } + const ref = toStringOrEmpty(args.ref) || undefined; + const tab = await ctx.ensureTabAvailable(target); + const result = await evaluateViaPlaywright({ + cdpPort, + targetId: tab.targetId, + fn, + ref, + }); + res.json({ ok: true, result }); + return true; + } + case "browser_file_upload": { + const paths = toStringArray(args.paths) ?? []; + const tab = await ctx.ensureTabAvailable(target); + await fileUploadViaPlaywright({ + cdpPort, + targetId: tab.targetId, + paths: paths.length ? paths : undefined, + }); + res.json({ ok: true, targetId: tab.targetId }); + return true; + } + case "browser_fill_form": { + const fields = Array.isArray(args.fields) + ? (args.fields as Array>) + : null; + if (!fields?.length) { + jsonError(res, 400, "fields are required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await fillFormViaPlaywright({ + cdpPort, + targetId: tab.targetId, + fields, + }); + res.json({ ok: true, targetId: tab.targetId }); + return true; + } + case "browser_install": { + res.json({ + ok: true, + message: + "clawd browser uses system Chrome/Chromium; no Playwright install needed.", + }); + return true; + } + case "browser_press_key": { + const key = toStringOrEmpty(args.key); + if (!key) { + jsonError(res, 400, "key is required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await pressKeyViaPlaywright({ + cdpPort, + targetId: tab.targetId, + key, + }); + res.json({ ok: true, targetId: tab.targetId }); + return true; + } + case "browser_type": { + const ref = toStringOrEmpty(args.ref); + const text = toStringOrEmpty(args.text); + if (!ref || !text) { + jsonError(res, 400, "ref and text are required"); + return true; + } + const submit = toBoolean(args.submit) ?? false; + const slowly = toBoolean(args.slowly) ?? false; + const tab = await ctx.ensureTabAvailable(target); + await typeViaPlaywright({ + cdpPort, + targetId: tab.targetId, + ref, + text, + submit, + slowly, + }); + res.json({ ok: true, targetId: tab.targetId }); + return true; + } + case "browser_navigate": { + const url = toStringOrEmpty(args.url); + if (!url) { + jsonError(res, 400, "url is required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + const result = await navigateViaPlaywright({ + cdpPort, + targetId: tab.targetId, + url, + }); + res.json({ ok: true, targetId: tab.targetId, ...result }); + return true; + } + case "browser_navigate_back": { + const tab = await ctx.ensureTabAvailable(target); + const result = await navigateBackViaPlaywright({ + cdpPort, + targetId: tab.targetId, + }); + res.json({ ok: true, targetId: tab.targetId, ...result }); + return true; + } + case "browser_run_code": { + const code = toStringOrEmpty(args.code); + if (!code) { + jsonError(res, 400, "code is required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + const result = await runCodeViaPlaywright({ + cdpPort, + targetId: tab.targetId, + code, + }); + res.json({ ok: true, result }); + return true; + } + case "browser_take_screenshot": { + const type = args.type === "jpeg" ? "jpeg" : "png"; + const ref = toStringOrEmpty(args.ref) || undefined; + const fullPage = toBoolean(args.fullPage) ?? false; + const element = toStringOrEmpty(args.element) || undefined; + const filename = toStringOrEmpty(args.filename) || undefined; + const tab = await ctx.ensureTabAvailable(target); + const snap = await takeScreenshotViaPlaywright({ + cdpPort, + targetId: tab.targetId, + ref, + element, + fullPage, + type, + }); + const normalized = await normalizeBrowserScreenshot(snap.buffer, { + maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, + maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + }); + await ensureMediaDir(); + const saved = await saveMediaBuffer( + normalized.buffer, + normalized.contentType ?? `image/${type}`, + "browser", + DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, + ); + res.json({ + ok: true, + path: path.resolve(saved.path), + filename, + targetId: tab.targetId, + url: tab.url, + }); + return true; + } + case "browser_snapshot": { + const filename = toStringOrEmpty(args.filename) || undefined; + const tab = await ctx.ensureTabAvailable(target); + const snap = await snapshotAiViaPlaywright({ + cdpPort, + targetId: tab.targetId, + }); + if (filename) { + await ensureMediaDir(); + const saved = await saveMediaBuffer( + Buffer.from(snap.snapshot, "utf8"), + "text/plain", + "browser", + ); + res.json({ + ok: true, + path: path.resolve(saved.path), + filename, + targetId: tab.targetId, + url: tab.url, + }); + return true; + } + res.json({ + ok: true, + snapshot: snap.snapshot, + targetId: tab.targetId, + url: tab.url, + }); + return true; + } + case "browser_click": { + const ref = toStringOrEmpty(args.ref); + if (!ref) { + jsonError(res, 400, "ref is required"); + return true; + } + const doubleClick = toBoolean(args.doubleClick) ?? false; + const button = toStringOrEmpty(args.button) || undefined; + const modifiers = Array.isArray(args.modifiers) + ? (args.modifiers as string[]) + : undefined; + const tab = await ctx.ensureTabAvailable(target); + await clickViaPlaywright({ + cdpPort, + targetId: tab.targetId, + ref, + doubleClick, + button, + modifiers, + }); + res.json({ ok: true, targetId: tab.targetId, url: tab.url }); + return true; + } + case "browser_drag": { + const startRef = toStringOrEmpty(args.startRef); + const endRef = toStringOrEmpty(args.endRef); + if (!startRef || !endRef) { + jsonError(res, 400, "startRef and endRef are required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await dragViaPlaywright({ + cdpPort, + targetId: tab.targetId, + startRef, + endRef, + }); + res.json({ ok: true, targetId: tab.targetId }); + return true; + } + case "browser_hover": { + const ref = toStringOrEmpty(args.ref); + if (!ref) { + jsonError(res, 400, "ref is required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await hoverViaPlaywright({ + cdpPort, + targetId: tab.targetId, + ref, + }); + res.json({ ok: true, targetId: tab.targetId }); + return true; + } + case "browser_select_option": { + const ref = toStringOrEmpty(args.ref); + const values = toStringArray(args.values); + if (!ref || !values?.length) { + jsonError(res, 400, "ref and values are required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await selectOptionViaPlaywright({ + cdpPort, + targetId: tab.targetId, + ref, + values, + }); + res.json({ ok: true, targetId: tab.targetId }); + return true; + } + case "browser_tabs": { + const action = toStringOrEmpty(args.action); + const index = toNumber(args.index); + if (!action) { + jsonError(res, 400, "action is required"); + return true; + } + if (action === "list") { + const reachable = await ctx.isReachable(300); + if (!reachable) { + res.json({ ok: true, tabs: [] }); + return true; + } + const tabs = await ctx.listTabs(); + res.json({ ok: true, tabs }); + return true; + } + if (action === "new") { + await ctx.ensureBrowserAvailable(); + const tab = await ctx.openTab("about:blank"); + res.json({ ok: true, tab }); + return true; + } + if (action === "close") { + const tabs = await ctx.listTabs(); + const targetTab = typeof index === "number" ? tabs[index] : tabs.at(0); + if (!targetTab) { + jsonError(res, 404, "tab not found"); + return true; + } + await ctx.closeTab(targetTab.targetId); + res.json({ ok: true, targetId: targetTab.targetId }); + return true; + } + if (action === "select") { + if (typeof index !== "number") { + jsonError(res, 400, "index is required"); + return true; + } + const tabs = await ctx.listTabs(); + const targetTab = tabs[index]; + if (!targetTab) { + jsonError(res, 404, "tab not found"); + return true; + } + await ctx.focusTab(targetTab.targetId); + res.json({ ok: true, targetId: targetTab.targetId }); + return true; + } + jsonError(res, 400, "unknown tab action"); + return true; + } + case "browser_wait_for": { + const time = toNumber(args.time); + const text = toStringOrEmpty(args.text) || undefined; + const textGone = toStringOrEmpty(args.textGone) || undefined; + const tab = await ctx.ensureTabAvailable(target); + await waitForViaPlaywright({ + cdpPort, + targetId: tab.targetId, + time, + text, + textGone, + }); + res.json({ ok: true, targetId: tab.targetId }); + return true; + } + default: + return false; + } +} diff --git a/src/browser/routes/tool-extra.ts b/src/browser/routes/tool-extra.ts new file mode 100644 index 000000000..fe353016c --- /dev/null +++ b/src/browser/routes/tool-extra.ts @@ -0,0 +1,262 @@ +import path from "node:path"; + +import type express from "express"; + +import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js"; +import { + generateLocatorForRef, + getConsoleMessagesViaPlaywright, + getNetworkRequestsViaPlaywright, + mouseClickViaPlaywright, + mouseDragViaPlaywright, + mouseMoveViaPlaywright, + pdfViaPlaywright, + startTracingViaPlaywright, + stopTracingViaPlaywright, + verifyElementVisibleViaPlaywright, + verifyListVisibleViaPlaywright, + verifyTextVisibleViaPlaywright, + verifyValueViaPlaywright, +} from "../pw-ai.js"; +import type { BrowserRouteContext } from "../server-context.js"; +import { + jsonError, + toBoolean, + toNumber, + toStringArray, + toStringOrEmpty, +} from "./utils.js"; + +type ToolExtraParams = { + name: string; + args: Record; + targetId: string; + cdpPort: number; + ctx: BrowserRouteContext; + res: express.Response; +}; + +export async function handleBrowserToolExtra( + params: ToolExtraParams, +): Promise { + const { name, args, targetId, cdpPort, ctx, res } = params; + const target = targetId || undefined; + + switch (name) { + case "browser_console_messages": { + const level = toStringOrEmpty(args.level) || undefined; + const tab = await ctx.ensureTabAvailable(target); + const messages = await getConsoleMessagesViaPlaywright({ + cdpPort, + targetId: tab.targetId, + level, + }); + res.json({ ok: true, messages, targetId: tab.targetId }); + return true; + } + case "browser_network_requests": { + const includeStatic = toBoolean(args.includeStatic) ?? false; + const tab = await ctx.ensureTabAvailable(target); + const requests = await getNetworkRequestsViaPlaywright({ + cdpPort, + targetId: tab.targetId, + includeStatic, + }); + res.json({ ok: true, requests, targetId: tab.targetId }); + return true; + } + case "browser_pdf_save": { + const tab = await ctx.ensureTabAvailable(target); + const pdf = await pdfViaPlaywright({ + cdpPort, + targetId: tab.targetId, + }); + await ensureMediaDir(); + const saved = await saveMediaBuffer( + pdf.buffer, + "application/pdf", + "browser", + pdf.buffer.byteLength, + ); + res.json({ + ok: true, + path: path.resolve(saved.path), + targetId: tab.targetId, + url: tab.url, + }); + return true; + } + case "browser_start_tracing": { + const tab = await ctx.ensureTabAvailable(target); + await startTracingViaPlaywright({ + cdpPort, + targetId: tab.targetId, + }); + res.json({ ok: true }); + return true; + } + case "browser_stop_tracing": { + const tab = await ctx.ensureTabAvailable(target); + const trace = await stopTracingViaPlaywright({ + cdpPort, + targetId: tab.targetId, + }); + await ensureMediaDir(); + const saved = await saveMediaBuffer( + trace.buffer, + "application/zip", + "browser", + trace.buffer.byteLength, + ); + res.json({ + ok: true, + path: path.resolve(saved.path), + targetId: tab.targetId, + url: tab.url, + }); + return true; + } + case "browser_verify_element_visible": { + const role = toStringOrEmpty(args.role); + const accessibleName = toStringOrEmpty(args.accessibleName); + if (!role || !accessibleName) { + jsonError(res, 400, "role and accessibleName are required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await verifyElementVisibleViaPlaywright({ + cdpPort, + targetId: tab.targetId, + role, + accessibleName, + }); + res.json({ ok: true }); + return true; + } + case "browser_verify_text_visible": { + const text = toStringOrEmpty(args.text); + if (!text) { + jsonError(res, 400, "text is required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await verifyTextVisibleViaPlaywright({ + cdpPort, + targetId: tab.targetId, + text, + }); + res.json({ ok: true }); + return true; + } + case "browser_verify_list_visible": { + const ref = toStringOrEmpty(args.ref); + const items = toStringArray(args.items); + if (!ref || !items?.length) { + jsonError(res, 400, "ref and items are required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await verifyListVisibleViaPlaywright({ + cdpPort, + targetId: tab.targetId, + ref, + items, + }); + res.json({ ok: true }); + return true; + } + case "browser_verify_value": { + const ref = toStringOrEmpty(args.ref); + const type = toStringOrEmpty(args.type); + const value = toStringOrEmpty(args.value); + if (!ref || !type) { + jsonError(res, 400, "ref and type are required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await verifyValueViaPlaywright({ + cdpPort, + targetId: tab.targetId, + ref, + type, + value, + }); + res.json({ ok: true }); + return true; + } + case "browser_mouse_move_xy": { + const x = toNumber(args.x); + const y = toNumber(args.y); + if (x === undefined || y === undefined) { + jsonError(res, 400, "x and y are required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await mouseMoveViaPlaywright({ + cdpPort, + targetId: tab.targetId, + x, + y, + }); + res.json({ ok: true }); + return true; + } + case "browser_mouse_click_xy": { + const x = toNumber(args.x); + const y = toNumber(args.y); + if (x === undefined || y === undefined) { + jsonError(res, 400, "x and y are required"); + return true; + } + const button = toStringOrEmpty(args.button) || undefined; + const tab = await ctx.ensureTabAvailable(target); + await mouseClickViaPlaywright({ + cdpPort, + targetId: tab.targetId, + x, + y, + button, + }); + res.json({ ok: true }); + return true; + } + case "browser_mouse_drag_xy": { + const startX = toNumber(args.startX); + const startY = toNumber(args.startY); + const endX = toNumber(args.endX); + const endY = toNumber(args.endY); + if ( + startX === undefined || + startY === undefined || + endX === undefined || + endY === undefined + ) { + jsonError(res, 400, "startX, startY, endX, endY are required"); + return true; + } + const tab = await ctx.ensureTabAvailable(target); + await mouseDragViaPlaywright({ + cdpPort, + targetId: tab.targetId, + startX, + startY, + endX, + endY, + }); + res.json({ ok: true }); + return true; + } + case "browser_generate_locator": { + const ref = toStringOrEmpty(args.ref); + if (!ref) { + jsonError(res, 400, "ref is required"); + return true; + } + const locator = generateLocatorForRef(ref); + res.json({ ok: true, locator }); + return true; + } + default: + return false; + } +} diff --git a/src/browser/routes/tool.ts b/src/browser/routes/tool.ts new file mode 100644 index 000000000..fb095ba4c --- /dev/null +++ b/src/browser/routes/tool.ts @@ -0,0 +1,65 @@ +import type express from "express"; + +import type { BrowserRouteContext } from "../server-context.js"; +import { handleBrowserToolCore } from "./tool-core.js"; +import { handleBrowserToolExtra } from "./tool-extra.js"; +import { jsonError, toStringOrEmpty } from "./utils.js"; + +type ToolRequestBody = { + name?: unknown; + args?: unknown; + targetId?: unknown; +}; + +function toolArgs(value: unknown): Record { + if (!value || typeof value !== "object" || Array.isArray(value)) return {}; + return value as Record; +} + +export function registerBrowserToolRoutes( + app: express.Express, + ctx: BrowserRouteContext, +) { + app.post("/tool", async (req, res) => { + const body = req.body as ToolRequestBody; + const name = toStringOrEmpty(body?.name); + if (!name) return jsonError(res, 400, "name is required"); + const args = toolArgs(body?.args); + const targetId = toStringOrEmpty(body?.targetId || args?.targetId); + + try { + let cdpPort: number; + try { + cdpPort = ctx.state().cdpPort; + } catch { + return jsonError(res, 503, "browser server not started"); + } + + const handledCore = await handleBrowserToolCore({ + name, + args, + targetId, + cdpPort, + ctx, + res, + }); + if (handledCore) return; + + const handledExtra = await handleBrowserToolExtra({ + name, + args, + targetId, + cdpPort, + ctx, + res, + }); + if (handledExtra) return; + + return jsonError(res, 400, "unknown tool name"); + } catch (err) { + const mapped = ctx.mapTabError(err); + if (mapped) return jsonError(res, mapped.status, mapped.message); + jsonError(res, 500, String(err)); + } + }); +} diff --git a/src/browser/routes/utils.ts b/src/browser/routes/utils.ts new file mode 100644 index 000000000..bc393b05c --- /dev/null +++ b/src/browser/routes/utils.ts @@ -0,0 +1,38 @@ +import type express from "express"; + +export function jsonError( + res: express.Response, + status: number, + message: string, +) { + res.status(status).json({ error: message }); +} + +export function toStringOrEmpty(value: unknown) { + return typeof value === "string" ? value.trim() : String(value ?? "").trim(); +} + +export function toNumber(value: unknown) { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "string" && value.trim()) { + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : undefined; + } + return undefined; +} + +export function toBoolean(value: unknown) { + if (typeof value === "boolean") return value; + if (typeof value === "string") { + const v = value.trim().toLowerCase(); + if (v === "true" || v === "1" || v === "yes") return true; + if (v === "false" || v === "0" || v === "no") return false; + } + return undefined; +} + +export function toStringArray(value: unknown): string[] | undefined { + if (!Array.isArray(value)) return undefined; + const strings = value.map((v) => toStringOrEmpty(v)).filter(Boolean); + return strings.length ? strings : undefined; +} diff --git a/src/browser/server-context.ts b/src/browser/server-context.ts new file mode 100644 index 000000000..c95f34406 --- /dev/null +++ b/src/browser/server-context.ts @@ -0,0 +1,272 @@ +import type { Server } from "node:http"; + +import type { RuntimeEnv } from "../runtime.js"; +import { createTargetViaCdp } from "./cdp.js"; +import { + isChromeReachable, + launchClawdChrome, + type RunningChrome, + stopClawdChrome, +} from "./chrome.js"; +import type { ResolvedBrowserConfig } from "./config.js"; +import { resolveTargetIdFromTabs } from "./target-id.js"; + +export type BrowserTab = { + targetId: string; + title: string; + url: string; + wsUrl?: string; + type?: string; +}; + +export type BrowserServerState = { + server: Server; + port: number; + cdpPort: number; + running: RunningChrome | null; + resolved: ResolvedBrowserConfig; +}; + +export type BrowserRouteContext = { + state: () => BrowserServerState; + ensureBrowserAvailable: () => Promise; + ensureTabAvailable: (targetId?: string) => Promise; + isReachable: (timeoutMs?: number) => Promise; + listTabs: () => Promise; + openTab: (url: string) => Promise; + focusTab: (targetId: string) => Promise; + closeTab: (targetId: string) => Promise; + stopRunningBrowser: () => Promise<{ stopped: boolean }>; + mapTabError: (err: unknown) => { status: number; message: string } | null; +}; + +type ContextOptions = { + runtime: RuntimeEnv; + getState: () => BrowserServerState | null; + setRunning: (running: RunningChrome | null) => void; +}; + +async function fetchJson( + url: string, + timeoutMs = 1500, + init?: RequestInit, +): Promise { + const ctrl = new AbortController(); + const t = setTimeout(() => ctrl.abort(), timeoutMs); + try { + const res = await fetch(url, { ...init, signal: ctrl.signal }); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + return (await res.json()) as T; + } finally { + clearTimeout(t); + } +} + +async function fetchOk( + url: string, + timeoutMs = 1500, + init?: RequestInit, +): Promise { + const ctrl = new AbortController(); + const t = setTimeout(() => ctrl.abort(), timeoutMs); + try { + const res = await fetch(url, { ...init, signal: ctrl.signal }); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + } finally { + clearTimeout(t); + } +} + +export function createBrowserRouteContext( + opts: ContextOptions, +): BrowserRouteContext { + const state = () => { + const current = opts.getState(); + if (!current) throw new Error("Browser server not started"); + return current; + }; + + const listTabs = async (): Promise => { + const current = state(); + const raw = await fetchJson< + Array<{ + id?: string; + title?: string; + url?: string; + webSocketDebuggerUrl?: string; + type?: string; + }> + >(`http://127.0.0.1:${current.cdpPort}/json/list`); + return raw + .map((t) => ({ + targetId: t.id ?? "", + title: t.title ?? "", + url: t.url ?? "", + wsUrl: t.webSocketDebuggerUrl, + type: t.type, + })) + .filter((t) => Boolean(t.targetId)); + }; + + const openTab = async (url: string): Promise => { + const current = state(); + const createdViaCdp = await createTargetViaCdp({ + cdpPort: current.cdpPort, + url, + }) + .then((r) => r.targetId) + .catch(() => null); + + if (createdViaCdp) { + const deadline = Date.now() + 2000; + while (Date.now() < deadline) { + const tabs = await listTabs().catch(() => [] as BrowserTab[]); + const found = tabs.find((t) => t.targetId === createdViaCdp); + if (found) return found; + await new Promise((r) => setTimeout(r, 100)); + } + return { targetId: createdViaCdp, title: "", url, type: "page" }; + } + + const encoded = encodeURIComponent(url); + + type CdpTarget = { + id?: string; + title?: string; + url?: string; + webSocketDebuggerUrl?: string; + type?: string; + }; + + const endpoint = `http://127.0.0.1:${current.cdpPort}/json/new?${encoded}`; + const created = await fetchJson(endpoint, 1500, { + method: "PUT", + }).catch(async (err) => { + if (String(err).includes("HTTP 405")) { + return await fetchJson(endpoint, 1500); + } + throw err; + }); + + if (!created.id) throw new Error("Failed to open tab (missing id)"); + return { + targetId: created.id, + title: created.title ?? "", + url: created.url ?? url, + wsUrl: created.webSocketDebuggerUrl, + type: created.type, + }; + }; + + const isReachable = async (timeoutMs = 300) => { + const current = state(); + return await isChromeReachable(current.cdpPort, timeoutMs); + }; + + const ensureBrowserAvailable = async (): Promise => { + const current = state(); + if (await isReachable()) return; + if (current.resolved.attachOnly) { + throw new Error( + "Browser attachOnly is enabled and no browser is running.", + ); + } + + const launched = await launchClawdChrome(current.resolved, opts.runtime); + opts.setRunning(launched); + launched.proc.on("exit", () => { + const live = opts.getState(); + if (live?.running?.pid === launched.pid) { + opts.setRunning(null); + } + }); + }; + + const ensureTabAvailable = async (targetId?: string): Promise => { + await ensureBrowserAvailable(); + const tabs1 = await listTabs(); + if (tabs1.length === 0) { + await openTab("about:blank"); + } + + const tabs = await listTabs(); + const chosen = targetId + ? (() => { + const resolved = resolveTargetIdFromTabs(targetId, tabs); + if (!resolved.ok) { + if (resolved.reason === "ambiguous") return "AMBIGUOUS" as const; + return null; + } + return tabs.find((t) => t.targetId === resolved.targetId) ?? null; + })() + : (tabs.at(0) ?? null); + + if (chosen === "AMBIGUOUS") { + throw new Error("ambiguous target id prefix"); + } + if (!chosen?.wsUrl) throw new Error("tab not found"); + return chosen; + }; + + const focusTab = async (targetId: string): Promise => { + const current = state(); + const tabs = await listTabs(); + const resolved = resolveTargetIdFromTabs(targetId, tabs); + if (!resolved.ok) { + if (resolved.reason === "ambiguous") { + throw new Error("ambiguous target id prefix"); + } + throw new Error("tab not found"); + } + await fetchOk( + `http://127.0.0.1:${current.cdpPort}/json/activate/${resolved.targetId}`, + ); + }; + + const closeTab = async (targetId: string): Promise => { + const current = state(); + const tabs = await listTabs(); + const resolved = resolveTargetIdFromTabs(targetId, tabs); + if (!resolved.ok) { + if (resolved.reason === "ambiguous") { + throw new Error("ambiguous target id prefix"); + } + throw new Error("tab not found"); + } + await fetchOk( + `http://127.0.0.1:${current.cdpPort}/json/close/${resolved.targetId}`, + ); + }; + + const stopRunningBrowser = async (): Promise<{ stopped: boolean }> => { + const current = state(); + if (!current.running) return { stopped: false }; + await stopClawdChrome(current.running); + opts.setRunning(null); + return { stopped: true }; + }; + + const mapTabError = (err: unknown) => { + const msg = String(err); + if (msg.includes("ambiguous target id prefix")) { + return { status: 409, message: "ambiguous target id prefix" }; + } + if (msg.includes("tab not found")) { + return { status: 404, message: "tab not found" }; + } + return null; + }; + + return { + state, + ensureBrowserAvailable, + ensureTabAvailable, + isReachable, + listTabs, + openTab, + focusTab, + closeTab, + stopRunningBrowser, + mapTabError, + }; +} diff --git a/src/browser/server.test.ts b/src/browser/server.test.ts index 55773520d..4e7062225 100644 --- a/src/browser/server.test.ts +++ b/src/browser/server.test.ts @@ -98,8 +98,45 @@ vi.mock("./cdp.js", () => ({ vi.mock("./pw-ai.js", () => ({ clickRefViaPlaywright: vi.fn(async () => {}), + clickViaPlaywright: vi.fn(async () => {}), + closePageViaPlaywright: vi.fn(async () => {}), closePlaywrightBrowserConnection: vi.fn(async () => {}), + evaluateViaPlaywright: vi.fn(async () => "ok"), + fileUploadViaPlaywright: vi.fn(async () => {}), + fillFormViaPlaywright: vi.fn(async () => {}), + generateLocatorForRef: vi.fn((ref: string) => `locator('aria-ref=${ref}')`), + getConsoleMessagesViaPlaywright: vi.fn(async () => []), + getNetworkRequestsViaPlaywright: vi.fn(async () => []), + handleDialogViaPlaywright: vi.fn(async () => ({ + message: "ok", + type: "alert", + })), + hoverViaPlaywright: vi.fn(async () => {}), + mouseClickViaPlaywright: vi.fn(async () => {}), + mouseDragViaPlaywright: vi.fn(async () => {}), + mouseMoveViaPlaywright: vi.fn(async () => {}), + navigateBackViaPlaywright: vi.fn(async () => ({ url: "about:blank" })), + navigateViaPlaywright: vi.fn(async () => ({ url: "https://example.com" })), + pdfViaPlaywright: vi.fn(async () => ({ buffer: Buffer.from("pdf") })), + pressKeyViaPlaywright: vi.fn(async () => {}), + resizeViewportViaPlaywright: vi.fn(async () => {}), + runCodeViaPlaywright: vi.fn(async () => "ok"), + selectOptionViaPlaywright: vi.fn(async () => {}), snapshotAiViaPlaywright: vi.fn(async () => ({ snapshot: "ok" })), + startTracingViaPlaywright: vi.fn(async () => {}), + stopTracingViaPlaywright: vi.fn(async () => ({ + buffer: Buffer.from("trace"), + })), + takeScreenshotViaPlaywright: vi.fn(async () => ({ + buffer: Buffer.from("png"), + })), + typeViaPlaywright: vi.fn(async () => {}), + verifyElementVisibleViaPlaywright: vi.fn(async () => {}), + verifyListVisibleViaPlaywright: vi.fn(async () => {}), + verifyTextVisibleViaPlaywright: vi.fn(async () => {}), + verifyValueViaPlaywright: vi.fn(async () => {}), + waitForViaPlaywright: vi.fn(async () => {}), + dragViaPlaywright: vi.fn(async () => {}), })); vi.mock("../media/store.js", () => ({ diff --git a/src/browser/server.ts b/src/browser/server.ts index 2158c4eca..9dfd3a934 100644 --- a/src/browser/server.ts +++ b/src/browser/server.ts @@ -1,221 +1,22 @@ import type { Server } from "node:http"; -import path from "node:path"; import express from "express"; import { loadConfig } from "../config/config.js"; import { logError, logInfo, logWarn } from "../logger.js"; -import { ensureMediaDir, saveMediaBuffer } from "../media/store.js"; import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; -import { - captureScreenshot, - captureScreenshotPng, - createTargetViaCdp, - evaluateJavaScript, - getDomText, - querySelector, - snapshotAria, - snapshotDom, -} from "./cdp.js"; -import { - isChromeReachable, - launchClawdChrome, - type RunningChrome, - stopClawdChrome, -} from "./chrome.js"; import { resolveBrowserConfig, shouldStartLocalBrowserServer, } from "./config.js"; +import { closePlaywrightBrowserConnection } from "./pw-ai.js"; +import { registerBrowserRoutes } from "./routes/index.js"; import { - clickRefViaPlaywright, - closePlaywrightBrowserConnection, - snapshotAiViaPlaywright, -} from "./pw-ai.js"; -import { - DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, - DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, - normalizeBrowserScreenshot, -} from "./screenshot.js"; -import { resolveTargetIdFromTabs } from "./target-id.js"; - -export type BrowserTab = { - targetId: string; - title: string; - url: string; - wsUrl?: string; - type?: string; -}; - -type BrowserServerState = { - server: Server; - port: number; - cdpPort: number; - running: RunningChrome | null; - resolved: ReturnType; -}; + type BrowserServerState, + createBrowserRouteContext, +} from "./server-context.js"; let state: BrowserServerState | null = null; -function jsonError(res: express.Response, status: number, message: string) { - res.status(status).json({ error: message }); -} - -async function fetchJson( - url: string, - timeoutMs = 1500, - init?: RequestInit, -): Promise { - const ctrl = new AbortController(); - const t = setTimeout(() => ctrl.abort(), timeoutMs); - try { - const res = await fetch(url, { ...init, signal: ctrl.signal }); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - return (await res.json()) as T; - } finally { - clearTimeout(t); - } -} - -async function fetchOk( - url: string, - timeoutMs = 1500, - init?: RequestInit, -): Promise { - const ctrl = new AbortController(); - const t = setTimeout(() => ctrl.abort(), timeoutMs); - try { - const res = await fetch(url, { ...init, signal: ctrl.signal }); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - } finally { - clearTimeout(t); - } -} - -async function listTabs(cdpPort: number): Promise { - const raw = await fetchJson< - Array<{ - id?: string; - title?: string; - url?: string; - webSocketDebuggerUrl?: string; - type?: string; - }> - >(`http://127.0.0.1:${cdpPort}/json/list`); - return raw - .map((t) => ({ - targetId: t.id ?? "", - title: t.title ?? "", - url: t.url ?? "", - wsUrl: t.webSocketDebuggerUrl, - type: t.type, - })) - .filter((t) => Boolean(t.targetId)); -} - -async function openTab(cdpPort: number, url: string): Promise { - // Prefer CDP websocket Target.createTarget (more stable across Chrome versions), - // then fall back to /json/new for older/quirky builds. - const createdViaCdp = await createTargetViaCdp({ cdpPort, url }) - .then((r) => r.targetId) - .catch(() => null); - - if (createdViaCdp) { - const deadline = Date.now() + 2000; - while (Date.now() < deadline) { - const tabs = await listTabs(cdpPort).catch(() => [] as BrowserTab[]); - const found = tabs.find((t) => t.targetId === createdViaCdp); - if (found) return found; - await new Promise((r) => setTimeout(r, 100)); - } - return { targetId: createdViaCdp, title: "", url, type: "page" }; - } - - const encoded = encodeURIComponent(url); - - type CdpTarget = { - id?: string; - title?: string; - url?: string; - webSocketDebuggerUrl?: string; - type?: string; - }; - - // Chrome changed /json/new to require PUT (older versions allowed GET). - const endpoint = `http://127.0.0.1:${cdpPort}/json/new?${encoded}`; - const created = await fetchJson(endpoint, 1500, { - method: "PUT", - }).catch(async (err) => { - if (String(err).includes("HTTP 405")) { - return await fetchJson(endpoint, 1500); - } - throw err; - }); - - if (!created.id) throw new Error("Failed to open tab (missing id)"); - return { - targetId: created.id, - title: created.title ?? "", - url: created.url ?? url, - wsUrl: created.webSocketDebuggerUrl, - type: created.type, - }; -} - -async function activateTab(cdpPort: number, targetId: string): Promise { - // Chrome returns plain text ("Target activated") with an application/json content-type. - await fetchOk(`http://127.0.0.1:${cdpPort}/json/activate/${targetId}`); -} - -async function closeTab(cdpPort: number, targetId: string): Promise { - // Chrome returns plain text ("Target is closing") with an application/json content-type. - await fetchOk(`http://127.0.0.1:${cdpPort}/json/close/${targetId}`); -} - -async function ensureBrowserAvailable(runtime: RuntimeEnv): Promise { - if (!state) throw new Error("Browser server not started"); - if (await isChromeReachable(state.cdpPort)) return; - if (state.resolved.attachOnly) { - throw new Error("Browser attachOnly is enabled and no browser is running."); - } - - const launched = await launchClawdChrome(state.resolved, runtime); - state.running = launched; - launched.proc.on("exit", () => { - if (state?.running?.pid === launched.pid) { - state.running = null; - } - }); - return; -} - -async function ensureTabAvailable(runtime: RuntimeEnv, targetId?: string) { - if (!state) throw new Error("Browser server not started"); - await ensureBrowserAvailable(runtime); - - const tabs1 = await listTabs(state.cdpPort); - if (tabs1.length === 0) { - await openTab(state.cdpPort, "about:blank"); - } - - const tabs = await listTabs(state.cdpPort); - const chosen = targetId - ? (() => { - const resolved = resolveTargetIdFromTabs(targetId, tabs); - if (!resolved.ok) { - if (resolved.reason === "ambiguous") return "AMBIGUOUS" as const; - return null; - } - return tabs.find((t) => t.targetId === resolved.targetId) ?? null; - })() - : (tabs.at(0) ?? null); - - if (chosen === "AMBIGUOUS") { - throw new Error("ambiguous target id prefix"); - } - if (!chosen?.wsUrl) throw new Error("tab not found"); - return chosen; -} - export async function startBrowserControlServerFromConfig( runtime: RuntimeEnv = defaultRuntime, ): Promise { @@ -236,378 +37,14 @@ export async function startBrowserControlServerFromConfig( const app = express(); app.use(express.json({ limit: "1mb" })); - app.get("/", async (_req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const reachable = await isChromeReachable(state.cdpPort, 300); - res.json({ - enabled: state.resolved.enabled, - controlUrl: state.resolved.controlUrl, - running: reachable, - pid: state.running?.pid ?? null, - cdpPort: state.cdpPort, - chosenBrowser: state.running?.exe.kind ?? null, - userDataDir: state.running?.userDataDir ?? null, - color: state.resolved.color, - headless: state.resolved.headless, - attachOnly: state.resolved.attachOnly, - }); - }); - - app.post("/start", async (_req, res) => { - try { - await ensureBrowserAvailable(runtime); - res.json({ ok: true }); - } catch (err) { - jsonError(res, 500, String(err)); - } - }); - - app.post("/stop", async (_req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - if (!state.running) return res.json({ ok: true, stopped: false }); - try { - await stopClawdChrome(state.running); - state.running = null; - res.json({ ok: true, stopped: true }); - } catch (err) { - jsonError(res, 500, String(err)); - } - }); - - app.get("/tabs", async (_req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const reachable = await isChromeReachable(state.cdpPort, 300); - if (!reachable) - return res.json({ running: false, tabs: [] as BrowserTab[] }); - try { - const tabs = await listTabs(state.cdpPort); - res.json({ running: true, tabs }); - } catch (err) { - jsonError(res, 500, String(err)); - } - }); - - app.post("/tabs/open", async (req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const url = String((req.body as { url?: unknown })?.url ?? "").trim(); - if (!url) return jsonError(res, 400, "url is required"); - try { - await ensureBrowserAvailable(runtime); - const tab = await openTab(state.cdpPort, url); - res.json(tab); - } catch (err) { - jsonError(res, 500, String(err)); - } - }); - - app.post("/tabs/focus", async (req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const targetId = String( - (req.body as { targetId?: unknown })?.targetId ?? "", - ).trim(); - if (!targetId) return jsonError(res, 400, "targetId is required"); - const reachable = await isChromeReachable(state.cdpPort, 300); - if (!reachable) return jsonError(res, 409, "browser not running"); - try { - const tabs = await listTabs(state.cdpPort); - const resolved = resolveTargetIdFromTabs(targetId, tabs); - if (!resolved.ok) { - if (resolved.reason === "ambiguous") { - return jsonError(res, 409, "ambiguous target id prefix"); - } - return jsonError(res, 404, "tab not found"); - } - await activateTab(state.cdpPort, resolved.targetId); - res.json({ ok: true }); - } catch (err) { - jsonError(res, 500, String(err)); - } - }); - - app.delete("/tabs/:targetId", async (req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const targetId = String(req.params.targetId ?? "").trim(); - if (!targetId) return jsonError(res, 400, "targetId is required"); - const reachable = await isChromeReachable(state.cdpPort, 300); - if (!reachable) return jsonError(res, 409, "browser not running"); - try { - const tabs = await listTabs(state.cdpPort); - const resolved = resolveTargetIdFromTabs(targetId, tabs); - if (!resolved.ok) { - if (resolved.reason === "ambiguous") { - return jsonError(res, 409, "ambiguous target id prefix"); - } - return jsonError(res, 404, "tab not found"); - } - await closeTab(state.cdpPort, resolved.targetId); - res.json({ ok: true }); - } catch (err) { - jsonError(res, 500, String(err)); - } - }); - - app.get("/screenshot", async (req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const targetId = - typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; - const fullPage = - req.query.fullPage === "true" || req.query.fullPage === "1"; - - const reachable = await isChromeReachable(state.cdpPort, 300); - if (!reachable) return jsonError(res, 409, "browser not running"); - - try { - const tabs = await listTabs(state.cdpPort); - const chosen = targetId - ? (() => { - const resolved = resolveTargetIdFromTabs(targetId, tabs); - if (!resolved.ok) { - if (resolved.reason === "ambiguous") { - return "AMBIGUOUS" as const; - } - return null; - } - return tabs.find((t) => t.targetId === resolved.targetId) ?? null; - })() - : (tabs.at(0) ?? null); - if (chosen === "AMBIGUOUS") { - return jsonError(res, 409, "ambiguous target id prefix"); - } - if (!chosen?.wsUrl) return jsonError(res, 404, "tab not found"); - - let shot: Buffer = Buffer.alloc(0); - let contentTypeHint: "image/jpeg" | "image/png" = "image/jpeg"; - try { - shot = await captureScreenshot({ - wsUrl: chosen.wsUrl, - fullPage, - format: "jpeg", - quality: 85, - }); - } catch { - contentTypeHint = "image/png"; - shot = await captureScreenshotPng({ wsUrl: chosen.wsUrl, fullPage }); - } - - const normalized = await normalizeBrowserScreenshot(shot, { - maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, - maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, - }); - await ensureMediaDir(); - const saved = await saveMediaBuffer( - normalized.buffer, - normalized.contentType ?? contentTypeHint, - "browser", - DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, - ); - const filePath = path.resolve(saved.path); - res.json({ - ok: true, - path: filePath, - targetId: chosen.targetId, - url: chosen.url, - }); - } catch (err) { - jsonError(res, 500, String(err)); - } - }); - - function mapTabError(err: unknown) { - const msg = String(err); - if (msg.includes("ambiguous target id prefix")) { - return { status: 409, message: "ambiguous target id prefix" }; - } - if (msg.includes("tab not found")) { - return { status: 404, message: "tab not found" }; - } - return null; - } - - app.post("/eval", async (req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const js = String((req.body as { js?: unknown })?.js ?? "").trim(); - const targetId = String( - (req.body as { targetId?: unknown })?.targetId ?? "", - ).trim(); - const awaitPromise = Boolean((req.body as { await?: unknown })?.await); - - if (!js) return jsonError(res, 400, "js is required"); - - try { - const tab = await ensureTabAvailable(runtime, targetId || undefined); - const evaluated = await evaluateJavaScript({ - wsUrl: tab.wsUrl ?? "", - expression: js, - awaitPromise, - returnByValue: true, - }); - - if (evaluated.exceptionDetails) { - const msg = - evaluated.exceptionDetails.exception?.description || - evaluated.exceptionDetails.text || - "JavaScript evaluation failed"; - return jsonError(res, 400, msg); - } - - res.json({ - ok: true, - targetId: tab.targetId, - url: tab.url, - result: evaluated.result, - }); - } catch (err) { - const mapped = mapTabError(err); - if (mapped) return jsonError(res, mapped.status, mapped.message); - jsonError(res, 500, String(err)); - } - }); - - app.get("/query", async (req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const selector = - typeof req.query.selector === "string" ? req.query.selector.trim() : ""; - const targetId = - typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; - const limit = - typeof req.query.limit === "string" ? Number(req.query.limit) : undefined; - - if (!selector) return jsonError(res, 400, "selector is required"); - - try { - const tab = await ensureTabAvailable(runtime, targetId || undefined); - const result = await querySelector({ - wsUrl: tab.wsUrl ?? "", - selector, - limit, - }); - res.json({ ok: true, targetId: tab.targetId, url: tab.url, ...result }); - } catch (err) { - const mapped = mapTabError(err); - if (mapped) return jsonError(res, mapped.status, mapped.message); - jsonError(res, 500, String(err)); - } - }); - - app.get("/dom", async (req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const targetId = - typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; - const format = req.query.format === "text" ? "text" : "html"; - const selector = - typeof req.query.selector === "string" ? req.query.selector.trim() : ""; - const maxChars = - typeof req.query.maxChars === "string" - ? Number(req.query.maxChars) - : undefined; - - try { - const tab = await ensureTabAvailable(runtime, targetId || undefined); - const result = await getDomText({ - wsUrl: tab.wsUrl ?? "", - format, - maxChars, - selector: selector || undefined, - }); - res.json({ - ok: true, - targetId: tab.targetId, - url: tab.url, - format, - ...result, - }); - } catch (err) { - const mapped = mapTabError(err); - if (mapped) return jsonError(res, mapped.status, mapped.message); - jsonError(res, 500, String(err)); - } - }); - - app.get("/snapshot", async (req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const targetId = - typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; - const format = - req.query.format === "domSnapshot" - ? "domSnapshot" - : req.query.format === "ai" - ? "ai" - : "aria"; - const limit = - typeof req.query.limit === "string" ? Number(req.query.limit) : undefined; - - try { - const tab = await ensureTabAvailable(runtime, targetId || undefined); - - if (format === "ai") { - const snap = await snapshotAiViaPlaywright({ - cdpPort: state.cdpPort, - targetId: tab.targetId, - }); - return res.json({ - ok: true, - format, - targetId: tab.targetId, - url: tab.url, - ...snap, - }); - } - - if (format === "aria") { - const snap = await snapshotAria({ - wsUrl: tab.wsUrl ?? "", - limit, - }); - return res.json({ - ok: true, - format, - targetId: tab.targetId, - url: tab.url, - ...snap, - }); - } - - const snap = await snapshotDom({ - wsUrl: tab.wsUrl ?? "", - limit, - }); - return res.json({ - ok: true, - format, - targetId: tab.targetId, - url: tab.url, - ...snap, - }); - } catch (err) { - const mapped = mapTabError(err); - if (mapped) return jsonError(res, mapped.status, mapped.message); - jsonError(res, 500, String(err)); - } - }); - - app.post("/click", async (req, res) => { - if (!state) return jsonError(res, 503, "browser server not started"); - const ref = String((req.body as { ref?: unknown })?.ref ?? "").trim(); - const targetId = String( - (req.body as { targetId?: unknown })?.targetId ?? "", - ).trim(); - - if (!ref) return jsonError(res, 400, "ref is required"); - - try { - const tab = await ensureTabAvailable(runtime, targetId || undefined); - await clickRefViaPlaywright({ - cdpPort: state.cdpPort, - targetId: tab.targetId, - ref, - }); - res.json({ ok: true, targetId: tab.targetId, url: tab.url }); - } catch (err) { - const mapped = mapTabError(err); - if (mapped) return jsonError(res, mapped.status, mapped.message); - jsonError(res, 500, String(err)); - } + const ctx = createBrowserRouteContext({ + runtime, + getState: () => state, + setRunning: (running) => { + if (state) state.running = running; + }, }); + registerBrowserRoutes(app, ctx); const port = resolved.controlPort; const server = await new Promise((resolve, reject) => { @@ -616,6 +53,7 @@ export async function startBrowserControlServerFromConfig( }).catch((err) => { logError( `clawd browser server failed to bind 127.0.0.1:${port}: ${String(err)}`, + runtime, ); return null; }); @@ -639,19 +77,27 @@ export async function startBrowserControlServerFromConfig( export async function stopBrowserControlServer( runtime: RuntimeEnv = defaultRuntime, -) { - if (!state) return; +): Promise { const current = state; - state = null; + if (!current) return; + + const ctx = createBrowserRouteContext({ + runtime, + getState: () => state, + setRunning: (running) => { + if (state) state.running = running; + }, + }); + try { - await closePlaywrightBrowserConnection(); - if (current.running) { - await stopClawdChrome(current.running).catch((err) => - logWarn(`clawd browser stop failed: ${String(err)}`, runtime), - ); - } - } catch { - // ignore + await ctx.stopRunningBrowser(); + } catch (err) { + logWarn(`clawd browser stop failed: ${String(err)}`, runtime); } - await new Promise((resolve) => current.server.close(() => resolve())); + + await new Promise((resolve) => { + current.server.close(() => resolve()); + }); + state = null; + await closePlaywrightBrowserConnection(); } diff --git a/src/cli/browser-cli.ts b/src/cli/browser-cli.ts new file mode 100644 index 000000000..555b83206 --- /dev/null +++ b/src/cli/browser-cli.ts @@ -0,0 +1,482 @@ +import type { Command } from "commander"; + +import { + browserClickRef, + browserCloseTab, + browserDom, + browserEval, + browserFocusTab, + browserOpenTab, + browserQuery, + browserScreenshot, + browserSnapshot, + browserStart, + browserStatus, + browserStop, + browserTabs, + browserTool, + resolveBrowserControlUrl, +} from "../browser/client.js"; +import { danger, info } from "../globals.js"; +import { defaultRuntime } from "../runtime.js"; + +export function registerBrowserCli(program: Command) { + const browser = program + .command("browser") + .description("Manage clawd's dedicated browser (Chrome/Chromium)") + .option( + "--url ", + "Override browser control URL (default from ~/.clawdis/clawdis.json)", + ) + .option("--json", "Output machine-readable JSON", false) + .addHelpText( + "after", + ` +Examples: + clawdis browser status + clawdis browser start + clawdis browser tabs + clawdis browser open https://example.com + clawdis browser screenshot # emits MEDIA: + clawdis browser screenshot --full-page + clawdis browser eval "location.href" + clawdis browser query "a" --limit 5 + clawdis browser dom --format text --max-chars 5000 + clawdis browser snapshot --format aria --limit 200 + clawdis browser snapshot --format ai + clawdis browser click 76 + clawdis browser tool browser_file_upload --args '{"paths":["/tmp/file.txt"]}' +`, + ) + .action(() => { + defaultRuntime.error( + danger('Missing subcommand. Try: "clawdis browser status"'), + ); + defaultRuntime.exit(1); + }); + + const parentOpts = (cmd: Command) => + cmd.parent?.opts?.() as { url?: string; json?: boolean }; + + browser + .command("status") + .description("Show browser status") + .action(async (_opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const status = await browserStatus(baseUrl); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(status, null, 2)); + return; + } + defaultRuntime.log( + [ + `enabled: ${status.enabled}`, + `running: ${status.running}`, + `controlUrl: ${status.controlUrl}`, + `cdpPort: ${status.cdpPort}`, + `browser: ${status.chosenBrowser ?? "unknown"}`, + `profileColor: ${status.color}`, + ].join("\n"), + ); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("start") + .description("Start the clawd browser (no-op if already running)") + .action(async (_opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + await browserStart(baseUrl); + const status = await browserStatus(baseUrl); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(status, null, 2)); + return; + } + defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`)); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("stop") + .description("Stop the clawd browser (best-effort)") + .action(async (_opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + await browserStop(baseUrl); + const status = await browserStatus(baseUrl); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(status, null, 2)); + return; + } + defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`)); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("tabs") + .description("List open tabs") + .action(async (_opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const tabs = await browserTabs(baseUrl); + if (parent?.json) { + defaultRuntime.log(JSON.stringify({ tabs }, null, 2)); + return; + } + if (tabs.length === 0) { + defaultRuntime.log("No tabs (browser closed or no targets)."); + return; + } + defaultRuntime.log( + tabs + .map( + (t, i) => + `${i + 1}. ${t.title || "(untitled)"}\n ${t.url}\n id: ${t.targetId}`, + ) + .join("\n"), + ); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("open") + .description("Open a URL in a new tab") + .argument("", "URL to open") + .action(async (url: string, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const tab = await browserOpenTab(baseUrl, url); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(tab, null, 2)); + return; + } + defaultRuntime.log(`opened: ${tab.url}\nid: ${tab.targetId}`); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("focus") + .description("Focus a tab by target id (or unique prefix)") + .argument("", "Target id or unique prefix") + .action(async (targetId: string, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + await browserFocusTab(baseUrl, targetId); + if (parent?.json) { + defaultRuntime.log(JSON.stringify({ ok: true }, null, 2)); + return; + } + defaultRuntime.log(`focused tab ${targetId}`); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("close") + .description("Close a tab by target id (or unique prefix)") + .argument("", "Target id or unique prefix") + .action(async (targetId: string, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + await browserCloseTab(baseUrl, targetId); + if (parent?.json) { + defaultRuntime.log(JSON.stringify({ ok: true }, null, 2)); + return; + } + defaultRuntime.log(`closed tab ${targetId}`); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("screenshot") + .description("Capture a screenshot (MEDIA:)") + .argument("[targetId]", "CDP target id (or unique prefix)") + .option("--full-page", "Capture full scrollable page", false) + .action(async (targetId: string | undefined, opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const result = await browserScreenshot(baseUrl, { + targetId: targetId?.trim() || undefined, + fullPage: Boolean(opts.fullPage), + }); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + defaultRuntime.log(`MEDIA:${result.path}`); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("eval") + .description("Run JavaScript in the active tab") + .argument("", "JavaScript expression") + .option("--target-id ", "CDP target id (or unique prefix)") + .option("--await", "Await promise result", false) + .action(async (js: string, opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const result = await browserEval(baseUrl, { + js, + targetId: opts.targetId?.trim() || undefined, + awaitPromise: Boolean(opts.await), + }); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + defaultRuntime.log(JSON.stringify(result.result, null, 2)); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("query") + .description("Query selector matches") + .argument("", "CSS selector") + .option("--target-id ", "CDP target id (or unique prefix)") + .option("--limit ", "Max matches (default: 20)", (v: string) => + Number(v), + ) + .action(async (selector: string, opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const result = await browserQuery(baseUrl, { + selector, + targetId: opts.targetId?.trim() || undefined, + limit: Number.isFinite(opts.limit) ? opts.limit : undefined, + }); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + defaultRuntime.log(JSON.stringify(result.matches, null, 2)); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("dom") + .description("Dump DOM (html or text) with truncation") + .option("--format ", "Output format (default: html)", "html") + .option("--target-id ", "CDP target id (or unique prefix)") + .option("--selector ", "Optional CSS selector to scope the dump") + .option( + "--max-chars ", + "Max characters (default: 200000)", + (v: string) => Number(v), + ) + .option("--out ", "Write output to a file") + .action(async (opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + const format = opts.format === "text" ? "text" : "html"; + try { + const result = await browserDom(baseUrl, { + format, + targetId: opts.targetId?.trim() || undefined, + maxChars: Number.isFinite(opts.maxChars) ? opts.maxChars : undefined, + selector: opts.selector?.trim() || undefined, + }); + if (opts.out) { + const fs = await import("node:fs/promises"); + await fs.writeFile(opts.out, result.text, "utf8"); + if (parent?.json) { + defaultRuntime.log( + JSON.stringify({ ok: true, out: opts.out }, null, 2), + ); + } else { + defaultRuntime.log(opts.out); + } + return; + } + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + defaultRuntime.log(result.text); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("snapshot") + .description("Capture an AI-friendly snapshot (aria, domSnapshot, or ai)") + .option( + "--format ", + "Snapshot format (default: aria)", + "aria", + ) + .option("--target-id ", "CDP target id (or unique prefix)") + .option("--limit ", "Max nodes (default: 500/800)", (v: string) => + Number(v), + ) + .option("--out ", "Write snapshot to a file") + .action(async (opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + const format = + opts.format === "domSnapshot" + ? "domSnapshot" + : opts.format === "ai" + ? "ai" + : "aria"; + try { + const result = await browserSnapshot(baseUrl, { + format, + targetId: opts.targetId?.trim() || undefined, + limit: Number.isFinite(opts.limit) ? opts.limit : undefined, + }); + + if (opts.out) { + const fs = await import("node:fs/promises"); + if (result.format === "ai") { + await fs.writeFile(opts.out, result.snapshot, "utf8"); + } else { + const payload = JSON.stringify(result, null, 2); + await fs.writeFile(opts.out, payload, "utf8"); + } + if (parent?.json) { + defaultRuntime.log( + JSON.stringify({ ok: true, out: opts.out }, null, 2), + ); + } else { + defaultRuntime.log(opts.out); + } + return; + } + + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + + if (result.format === "ai") { + defaultRuntime.log(result.snapshot); + return; + } + + if (result.format === "domSnapshot") { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + + const nodes = "nodes" in result ? result.nodes : []; + defaultRuntime.log( + nodes + .map((n) => { + const indent = " ".repeat(Math.min(20, n.depth)); + const name = n.name ? ` "${n.name}"` : ""; + const value = n.value ? ` = "${n.value}"` : ""; + return `${indent}- ${n.role}${name}${value}`; + }) + .join("\n"), + ); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("click") + .description("Click an element by ref from an ai snapshot (e.g. 76)") + .argument("", "Ref id from ai snapshot") + .option("--target-id ", "CDP target id (or unique prefix)") + .action(async (ref: string, opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const result = await browserClickRef(baseUrl, { + ref, + targetId: opts.targetId?.trim() || undefined, + }); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + defaultRuntime.log(`clicked ref ${ref} on ${result.url}`); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("tool") + .description("Call a Playwright MCP-style browser tool by name") + .argument("", "Tool name (browser_*)") + .option("--args ", "JSON arguments for the tool") + .option("--target-id ", "CDP target id (or unique prefix)") + .action(async (name: string, opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + let args: Record = {}; + if (opts.args) { + try { + args = JSON.parse(String(opts.args)); + } catch (err) { + defaultRuntime.error( + danger(`Invalid JSON for --args: ${String(err)}`), + ); + defaultRuntime.exit(1); + } + } + try { + const result = await browserTool(baseUrl, { + name, + args, + targetId: opts.targetId?.trim() || undefined, + }); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + defaultRuntime.log(JSON.stringify(result, null, 2)); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); +} diff --git a/src/cli/program.ts b/src/cli/program.ts index b3f73e933..63d5f1c40 100644 --- a/src/cli/program.ts +++ b/src/cli/program.ts @@ -1,31 +1,16 @@ import chalk from "chalk"; import { Command } from "commander"; -import { - browserClickRef, - browserCloseTab, - browserDom, - browserEval, - browserFocusTab, - browserOpenTab, - browserQuery, - browserScreenshot, - browserSnapshot, - browserStart, - browserStatus, - browserStop, - browserTabs, - resolveBrowserControlUrl, -} from "../browser/client.js"; import { agentCommand } from "../commands/agent.js"; import { healthCommand } from "../commands/health.js"; import { sendCommand } from "../commands/send.js"; import { sessionsCommand } from "../commands/sessions.js"; import { setupCommand } from "../commands/setup.js"; import { statusCommand } from "../commands/status.js"; -import { danger, info, setVerbose } from "../globals.js"; +import { danger, setVerbose } from "../globals.js"; import { loginWeb, logoutWeb } from "../provider-web.js"; import { defaultRuntime } from "../runtime.js"; import { VERSION } from "../version.js"; +import { registerBrowserCli } from "./browser-cli.js"; import { registerCanvasCli } from "./canvas-cli.js"; import { registerCronCli } from "./cron-cli.js"; import { createDefaultDeps } from "./deps.js"; @@ -363,493 +348,7 @@ Shows token usage per session when the agent reports it; set inbound.agent.conte ); }); - const browser = program - .command("browser") - .description("Manage clawd's dedicated browser (Chrome/Chromium)") - .option( - "--url ", - "Override browser control URL (default from ~/.clawdis/clawdis.json)", - ) - .option("--json", "Output machine-readable JSON", false) - .addHelpText( - "after", - ` -Examples: - clawdis browser status - clawdis browser start - clawdis browser tabs - clawdis browser open https://example.com - clawdis browser screenshot # emits MEDIA: - clawdis browser screenshot --full-page - clawdis browser eval "location.href" - clawdis browser query "a" --limit 5 - clawdis browser dom --format text --max-chars 5000 - clawdis browser snapshot --format aria --limit 200 - clawdis browser snapshot --format ai - clawdis browser click 76 -`, - ) - .action(() => { - defaultRuntime.error( - danger('Missing subcommand. Try: "clawdis browser status"'), - ); - defaultRuntime.exit(1); - }); - - const parentOpts = (cmd: Command) => - cmd.parent?.opts?.() as { url?: string; json?: boolean }; - - browser - .command("status") - .description("Show browser status") - .action(async (_opts, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - try { - const status = await browserStatus(baseUrl); - if (parent?.json) { - defaultRuntime.log(JSON.stringify(status, null, 2)); - return; - } - defaultRuntime.log( - [ - `enabled: ${status.enabled}`, - `running: ${status.running}`, - `controlUrl: ${status.controlUrl}`, - `cdpPort: ${status.cdpPort}`, - `browser: ${status.chosenBrowser ?? "unknown"}`, - `profileColor: ${status.color}`, - ].join("\n"), - ); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("start") - .description("Start the clawd browser (no-op if already running)") - .action(async (_opts, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - try { - await browserStart(baseUrl); - const status = await browserStatus(baseUrl); - if (parent?.json) { - defaultRuntime.log(JSON.stringify(status, null, 2)); - return; - } - defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`)); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("stop") - .description("Stop the clawd browser (best-effort)") - .action(async (_opts, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - try { - await browserStop(baseUrl); - const status = await browserStatus(baseUrl); - if (parent?.json) { - defaultRuntime.log(JSON.stringify(status, null, 2)); - return; - } - defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`)); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("tabs") - .description("List open tabs") - .action(async (_opts, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - try { - const tabs = await browserTabs(baseUrl); - if (parent?.json) { - defaultRuntime.log(JSON.stringify({ tabs }, null, 2)); - return; - } - if (tabs.length === 0) { - defaultRuntime.log("No tabs (browser closed or no targets)."); - return; - } - defaultRuntime.log( - tabs - .map( - (t, i) => - `${i + 1}. ${t.title || "(untitled)"}\n ${t.url}\n id: ${t.targetId}`, - ) - .join("\n"), - ); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("open") - .description("Open a URL in a new tab") - .argument("", "URL to open") - .action(async (url: string, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - try { - const tab = await browserOpenTab(baseUrl, url); - if (parent?.json) { - defaultRuntime.log(JSON.stringify(tab, null, 2)); - return; - } - defaultRuntime.log(`opened: ${tab.url}\nid: ${tab.targetId}`); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("focus") - .description("Focus/activate a tab by target id") - .argument("", "CDP target id") - .action(async (targetId: string, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - try { - await browserFocusTab(baseUrl, targetId); - if (parent?.json) { - defaultRuntime.log(JSON.stringify({ ok: true }, null, 2)); - return; - } - defaultRuntime.log("ok"); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("close") - .description("Close a tab by target id") - .argument("", "CDP target id") - .action(async (targetId: string, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - try { - await browserCloseTab(baseUrl, targetId); - if (parent?.json) { - defaultRuntime.log(JSON.stringify({ ok: true }, null, 2)); - return; - } - defaultRuntime.log("ok"); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("screenshot") - .description("Capture a screenshot (defaults to first tab)") - .argument("[targetId]", "CDP target id") - .option("--full-page", "Capture full page (best-effort)", false) - .action(async (targetId: string | undefined, opts, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - try { - const result = await browserScreenshot(baseUrl, { - targetId: targetId?.trim() || undefined, - fullPage: Boolean(opts.fullPage), - }); - if (parent?.json) { - defaultRuntime.log(JSON.stringify(result, null, 2)); - return; - } - // Print MEDIA: token so the agent can forward the image as an attachment. - defaultRuntime.log(`MEDIA:${result.path}`); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("eval") - .description("Evaluate JavaScript in the page context") - .argument("[js]", "JavaScript expression (or use --js-file/--js-stdin)") - .option("--target-id ", "CDP target id (or unique prefix)") - .option("--await", "Await promises (Runtime.evaluate awaitPromise)", false) - .option("--js-file ", "Read JavaScript from a file") - .option("--js-stdin", "Read JavaScript from stdin", false) - .action(async (jsArg: string | undefined, opts, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - - let js = jsArg?.trim() ?? ""; - if (opts.jsFile && opts.jsStdin) { - defaultRuntime.error(danger("Use either --js-file or --js-stdin.")); - defaultRuntime.exit(2); - return; - } - if (opts.jsFile) { - const fs = await import("node:fs/promises"); - js = await fs.readFile(opts.jsFile, "utf8"); - } else if (opts.jsStdin) { - js = await new Promise((resolve, reject) => { - let buf = ""; - process.stdin.setEncoding("utf8"); - process.stdin.on("data", (c) => { - buf += c; - }); - process.stdin.on("end", () => resolve(buf)); - process.stdin.on("error", (e) => reject(e)); - }); - } - - if (!js.trim()) { - defaultRuntime.error( - danger("Missing JavaScript. Pass or use --js-file/--js-stdin."), - ); - defaultRuntime.exit(2); - return; - } - - try { - const result = await browserEval(baseUrl, { - js, - targetId: opts.targetId?.trim() || undefined, - awaitPromise: Boolean(opts.await), - }); - if (parent?.json) { - defaultRuntime.log(JSON.stringify(result, null, 2)); - return; - } - const v = result.result; - if (Object.hasOwn(v, "value")) { - const value = (v as { value?: unknown }).value; - defaultRuntime.log( - typeof value === "string" ? value : JSON.stringify(value, null, 2), - ); - return; - } - defaultRuntime.log(v.description ?? JSON.stringify(v, null, 2)); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("query") - .description("Query elements by CSS selector") - .argument("", "CSS selector") - .option("--target-id ", "CDP target id (or unique prefix)") - .option("--limit ", "Max matches (default: 20)", (v: string) => - Number(v), - ) - .option( - "--format ", - "Text output format (default: text)", - "text", - ) - .action(async (selector: string, opts, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - try { - const result = await browserQuery(baseUrl, { - selector, - targetId: opts.targetId?.trim() || undefined, - limit: Number.isFinite(opts.limit) ? opts.limit : undefined, - }); - if (parent?.json || opts.format === "json") { - defaultRuntime.log(JSON.stringify(result, null, 2)); - return; - } - if (!result.matches.length) { - defaultRuntime.log("No matches."); - return; - } - defaultRuntime.log( - result.matches - .map((m) => { - const id = m.id ? `#${m.id}` : ""; - const cls = m.className - ? `.${m.className - .split(/\s+/) - .filter(Boolean) - .slice(0, 3) - .join(".")}` - : ""; - const head = `${m.index}. <${m.tag}${id}${cls}>`; - const text = m.text ? `\n ${m.text}` : ""; - return `${head}${text}`; - }) - .join("\n"), - ); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("dom") - .description("Dump DOM (html or text) with truncation") - .option("--format ", "Output format (default: html)", "html") - .option("--target-id ", "CDP target id (or unique prefix)") - .option("--selector ", "Optional CSS selector to scope the dump") - .option( - "--max-chars ", - "Max characters (default: 200000)", - (v: string) => Number(v), - ) - .option("--out ", "Write output to a file") - .action(async (opts, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - const format = opts.format === "text" ? "text" : "html"; - try { - const result = await browserDom(baseUrl, { - format, - targetId: opts.targetId?.trim() || undefined, - maxChars: Number.isFinite(opts.maxChars) ? opts.maxChars : undefined, - selector: opts.selector?.trim() || undefined, - }); - if (opts.out) { - const fs = await import("node:fs/promises"); - await fs.writeFile(opts.out, result.text, "utf8"); - if (parent?.json) { - defaultRuntime.log( - JSON.stringify({ ok: true, out: opts.out }, null, 2), - ); - } else { - defaultRuntime.log(opts.out); - } - return; - } - if (parent?.json) { - defaultRuntime.log(JSON.stringify(result, null, 2)); - return; - } - defaultRuntime.log(result.text); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("snapshot") - .description("Capture an AI-friendly snapshot (aria, domSnapshot, or ai)") - .option( - "--format ", - "Snapshot format (default: aria)", - "aria", - ) - .option("--target-id ", "CDP target id (or unique prefix)") - .option("--limit ", "Max nodes (default: 500/800)", (v: string) => - Number(v), - ) - .option("--out ", "Write snapshot to a file") - .action(async (opts, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - const format = - opts.format === "domSnapshot" - ? "domSnapshot" - : opts.format === "ai" - ? "ai" - : "aria"; - try { - const result = await browserSnapshot(baseUrl, { - format, - targetId: opts.targetId?.trim() || undefined, - limit: Number.isFinite(opts.limit) ? opts.limit : undefined, - }); - - if (opts.out) { - const fs = await import("node:fs/promises"); - if (result.format === "ai") { - await fs.writeFile(opts.out, result.snapshot, "utf8"); - } else { - const payload = JSON.stringify(result, null, 2); - await fs.writeFile(opts.out, payload, "utf8"); - } - if (parent?.json) { - defaultRuntime.log( - JSON.stringify({ ok: true, out: opts.out }, null, 2), - ); - } else { - defaultRuntime.log(opts.out); - } - return; - } - - if (parent?.json) { - defaultRuntime.log(JSON.stringify(result, null, 2)); - return; - } - - if (result.format === "ai") { - defaultRuntime.log(result.snapshot); - return; - } - - if (result.format === "domSnapshot") { - defaultRuntime.log(JSON.stringify(result, null, 2)); - return; - } - - // aria text rendering - const nodes = "nodes" in result ? result.nodes : []; - defaultRuntime.log( - nodes - .map((n) => { - const indent = " ".repeat(Math.min(20, n.depth)); - const name = n.name ? ` "${n.name}"` : ""; - const value = n.value ? ` = "${n.value}"` : ""; - return `${indent}- ${n.role}${name}${value}`; - }) - .join("\n"), - ); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); - - browser - .command("click") - .description("Click an element by ref from an ai snapshot (e.g. 76)") - .argument("", "Ref id from ai snapshot") - .option("--target-id ", "CDP target id (or unique prefix)") - .action(async (ref: string, opts, cmd) => { - const parent = parentOpts(cmd); - const baseUrl = resolveBrowserControlUrl(parent?.url); - try { - const result = await browserClickRef(baseUrl, { - ref, - targetId: opts.targetId?.trim() || undefined, - }); - if (parent?.json) { - defaultRuntime.log(JSON.stringify(result, null, 2)); - return; - } - defaultRuntime.log(`clicked ref ${ref} on ${result.url}`); - } catch (err) { - defaultRuntime.error(danger(String(err))); - defaultRuntime.exit(1); - } - }); + registerBrowserCli(program); return program; }