feat(browser): add ai snapshot refs + click
This commit is contained in:
@@ -103,6 +103,13 @@ export type SnapshotResult =
|
||||
type?: string;
|
||||
value?: string;
|
||||
}>;
|
||||
}
|
||||
| {
|
||||
ok: true;
|
||||
format: "ai";
|
||||
targetId: string;
|
||||
url: string;
|
||||
snapshot: string;
|
||||
};
|
||||
|
||||
function unwrapCause(err: unknown): unknown {
|
||||
@@ -310,7 +317,7 @@ export async function browserDom(
|
||||
export async function browserSnapshot(
|
||||
baseUrl: string,
|
||||
opts: {
|
||||
format: "aria" | "domSnapshot";
|
||||
format: "aria" | "domSnapshot" | "ai";
|
||||
targetId?: string;
|
||||
limit?: number;
|
||||
},
|
||||
@@ -326,3 +333,24 @@ export async function browserSnapshot(
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
export async function browserClickRef(
|
||||
baseUrl: string,
|
||||
opts: {
|
||||
ref: string;
|
||||
targetId?: string;
|
||||
},
|
||||
): Promise<{ ok: true; targetId: string; url: string }> {
|
||||
return await fetchJson<{ ok: true; targetId: string; url: string }>(
|
||||
`${baseUrl}/click`,
|
||||
{
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
ref: opts.ref,
|
||||
targetId: opts.targetId,
|
||||
}),
|
||||
timeoutMs: 20000,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
143
src/browser/pw-ai.test.ts
Normal file
143
src/browser/pw-ai.test.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
vi.mock("playwright-core", () => ({
|
||||
chromium: {
|
||||
connectOverCDP: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
type FakeSession = {
|
||||
send: ReturnType<typeof vi.fn>;
|
||||
detach: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
|
||||
function createPage(opts: {
|
||||
targetId: string;
|
||||
snapshotFull?: string;
|
||||
hasSnapshotForAI?: boolean;
|
||||
}) {
|
||||
const session: FakeSession = {
|
||||
send: vi.fn().mockResolvedValue({
|
||||
targetInfo: { targetId: opts.targetId },
|
||||
}),
|
||||
detach: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
||||
const context = {
|
||||
newCDPSession: vi.fn().mockResolvedValue(session),
|
||||
};
|
||||
|
||||
const click = vi.fn().mockResolvedValue(undefined);
|
||||
const locator = vi.fn().mockReturnValue({ click });
|
||||
|
||||
const page = {
|
||||
context: () => context,
|
||||
locator,
|
||||
...(opts.hasSnapshotForAI === false
|
||||
? {}
|
||||
: {
|
||||
_snapshotForAI: vi
|
||||
.fn()
|
||||
.mockResolvedValue({ full: opts.snapshotFull ?? "SNAP" }),
|
||||
}),
|
||||
};
|
||||
|
||||
return { page, session, locator, click };
|
||||
}
|
||||
|
||||
function createBrowser(pages: unknown[]) {
|
||||
const ctx = {
|
||||
pages: () => pages,
|
||||
};
|
||||
return {
|
||||
contexts: () => [ctx],
|
||||
on: vi.fn(),
|
||||
close: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
}
|
||||
|
||||
async function importModule() {
|
||||
return await import("./pw-ai.js");
|
||||
}
|
||||
|
||||
afterEach(async () => {
|
||||
const mod = await importModule();
|
||||
await mod.closePlaywrightBrowserConnection();
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("pw-ai", () => {
|
||||
it("captures an ai snapshot via Playwright for a specific target", async () => {
|
||||
const { chromium } = await import("playwright-core");
|
||||
const p1 = createPage({ targetId: "T1", snapshotFull: "ONE" });
|
||||
const p2 = createPage({ targetId: "T2", snapshotFull: "TWO" });
|
||||
const browser = createBrowser([p1.page, p2.page]);
|
||||
|
||||
(
|
||||
chromium.connectOverCDP as unknown as ReturnType<typeof vi.fn>
|
||||
).mockResolvedValue(browser);
|
||||
|
||||
const mod = await importModule();
|
||||
const res = await mod.snapshotAiViaPlaywright({
|
||||
cdpPort: 18792,
|
||||
targetId: "T2",
|
||||
});
|
||||
|
||||
expect(res.snapshot).toBe("TWO");
|
||||
expect(p1.session.detach).toHaveBeenCalledTimes(1);
|
||||
expect(p2.session.detach).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("clicks a ref using aria-ref locator", async () => {
|
||||
const { chromium } = await import("playwright-core");
|
||||
const p1 = createPage({ targetId: "T1" });
|
||||
const browser = createBrowser([p1.page]);
|
||||
(
|
||||
chromium.connectOverCDP as unknown as ReturnType<typeof vi.fn>
|
||||
).mockResolvedValue(browser);
|
||||
|
||||
const mod = await importModule();
|
||||
await mod.clickRefViaPlaywright({
|
||||
cdpPort: 18792,
|
||||
targetId: "T1",
|
||||
ref: "76",
|
||||
});
|
||||
|
||||
expect(p1.locator).toHaveBeenCalledWith("aria-ref=76");
|
||||
expect(p1.click).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("fails with a clear error when _snapshotForAI is missing", async () => {
|
||||
const { chromium } = await import("playwright-core");
|
||||
const p1 = createPage({ targetId: "T1", hasSnapshotForAI: false });
|
||||
const browser = createBrowser([p1.page]);
|
||||
(
|
||||
chromium.connectOverCDP as unknown as ReturnType<typeof vi.fn>
|
||||
).mockResolvedValue(browser);
|
||||
|
||||
const mod = await importModule();
|
||||
await expect(
|
||||
mod.snapshotAiViaPlaywright({ cdpPort: 18792, targetId: "T1" }),
|
||||
).rejects.toThrow(/_snapshotForAI/i);
|
||||
});
|
||||
|
||||
it("reuses the CDP connection for repeated calls", async () => {
|
||||
const { chromium } = await import("playwright-core");
|
||||
const p1 = createPage({ targetId: "T1", snapshotFull: "ONE" });
|
||||
const browser = createBrowser([p1.page]);
|
||||
const connect = chromium.connectOverCDP as unknown as ReturnType<
|
||||
typeof vi.fn
|
||||
>;
|
||||
connect.mockResolvedValue(browser);
|
||||
|
||||
const mod = await importModule();
|
||||
await mod.snapshotAiViaPlaywright({ cdpPort: 18792, targetId: "T1" });
|
||||
await mod.clickRefViaPlaywright({
|
||||
cdpPort: 18792,
|
||||
targetId: "T1",
|
||||
ref: "1",
|
||||
});
|
||||
|
||||
expect(connect).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
153
src/browser/pw-ai.ts
Normal file
153
src/browser/pw-ai.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
import type { Browser, Page } from "playwright-core";
|
||||
import { chromium } from "playwright-core";
|
||||
|
||||
type SnapshotForAIResult = { full: string; incremental?: string };
|
||||
type SnapshotForAIOptions = { timeout?: number; track?: string };
|
||||
|
||||
type WithSnapshotForAI = {
|
||||
_snapshotForAI?: (
|
||||
options?: SnapshotForAIOptions,
|
||||
) => Promise<SnapshotForAIResult>;
|
||||
};
|
||||
|
||||
type TargetInfoResponse = {
|
||||
targetInfo?: {
|
||||
targetId?: string;
|
||||
};
|
||||
};
|
||||
|
||||
type ConnectedBrowser = {
|
||||
browser: Browser;
|
||||
endpoint: string;
|
||||
};
|
||||
|
||||
let cached: ConnectedBrowser | null = null;
|
||||
let connecting: Promise<ConnectedBrowser> | null = null;
|
||||
|
||||
function endpointForCdpPort(cdpPort: number) {
|
||||
return `http://127.0.0.1:${cdpPort}`;
|
||||
}
|
||||
|
||||
async function connectBrowser(endpoint: string): Promise<ConnectedBrowser> {
|
||||
if (cached?.endpoint === endpoint) return cached;
|
||||
if (connecting) return await connecting;
|
||||
|
||||
connecting = chromium
|
||||
.connectOverCDP(endpoint, { timeout: 5000 })
|
||||
.then((browser) => {
|
||||
const connected: ConnectedBrowser = { browser, endpoint };
|
||||
cached = connected;
|
||||
browser.on("disconnected", () => {
|
||||
if (cached?.browser === browser) cached = null;
|
||||
});
|
||||
return connected;
|
||||
})
|
||||
.finally(() => {
|
||||
connecting = null;
|
||||
});
|
||||
|
||||
return await connecting;
|
||||
}
|
||||
|
||||
async function getAllPages(browser: Browser): Promise<Page[]> {
|
||||
const contexts = browser.contexts();
|
||||
const pages = contexts.flatMap((c) => c.pages());
|
||||
return pages;
|
||||
}
|
||||
|
||||
async function pageTargetId(page: Page): Promise<string | null> {
|
||||
const session = await page.context().newCDPSession(page);
|
||||
try {
|
||||
const info = (await session.send(
|
||||
"Target.getTargetInfo",
|
||||
)) as TargetInfoResponse;
|
||||
const targetId = String(info?.targetInfo?.targetId ?? "").trim();
|
||||
return targetId || null;
|
||||
} finally {
|
||||
await session.detach().catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
async function findPageByTargetId(
|
||||
browser: Browser,
|
||||
targetId: string,
|
||||
): Promise<Page | null> {
|
||||
const pages = await getAllPages(browser);
|
||||
for (const page of pages) {
|
||||
const tid = await pageTargetId(page).catch(() => null);
|
||||
if (tid && tid === targetId) return page;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function getPageForTargetId(opts: {
|
||||
cdpPort: number;
|
||||
targetId?: string;
|
||||
}): Promise<Page> {
|
||||
const endpoint = endpointForCdpPort(opts.cdpPort);
|
||||
const { browser } = await connectBrowser(endpoint);
|
||||
const pages = await getAllPages(browser);
|
||||
if (!pages.length)
|
||||
throw new Error("No pages available in the connected browser.");
|
||||
const first = pages[0];
|
||||
if (!opts.targetId) return first;
|
||||
const found = await findPageByTargetId(browser, opts.targetId);
|
||||
if (!found) throw new Error("tab not found");
|
||||
return found;
|
||||
}
|
||||
|
||||
export async function snapshotAiViaPlaywright(opts: {
|
||||
cdpPort: number;
|
||||
targetId?: string;
|
||||
timeoutMs?: number;
|
||||
}): Promise<{ snapshot: string }> {
|
||||
const page = await getPageForTargetId({
|
||||
cdpPort: opts.cdpPort,
|
||||
targetId: opts.targetId,
|
||||
});
|
||||
|
||||
const maybe = page as unknown as WithSnapshotForAI;
|
||||
if (!maybe._snapshotForAI) {
|
||||
throw new Error(
|
||||
"Playwright _snapshotForAI is not available. Upgrade playwright-core.",
|
||||
);
|
||||
}
|
||||
|
||||
const result = await maybe._snapshotForAI({
|
||||
timeout: Math.max(
|
||||
500,
|
||||
Math.min(60_000, Math.floor(opts.timeoutMs ?? 5000)),
|
||||
),
|
||||
track: "response",
|
||||
});
|
||||
return { snapshot: String(result?.full ?? "") };
|
||||
}
|
||||
|
||||
export async function clickRefViaPlaywright(opts: {
|
||||
cdpPort: number;
|
||||
targetId?: string;
|
||||
ref: string;
|
||||
timeoutMs?: number;
|
||||
}): Promise<void> {
|
||||
const ref = String(opts.ref ?? "").trim();
|
||||
if (!ref) throw new Error("ref is required");
|
||||
|
||||
const page = await getPageForTargetId({
|
||||
cdpPort: opts.cdpPort,
|
||||
targetId: opts.targetId,
|
||||
});
|
||||
|
||||
await page.locator(`aria-ref=${ref}`).click({
|
||||
timeout: Math.max(
|
||||
500,
|
||||
Math.min(60_000, Math.floor(opts.timeoutMs ?? 8000)),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
export async function closePlaywrightBrowserConnection(): Promise<void> {
|
||||
const cur = cached;
|
||||
cached = null;
|
||||
if (!cur) return;
|
||||
await cur.browser.close().catch(() => {});
|
||||
}
|
||||
@@ -26,6 +26,11 @@ import {
|
||||
resolveBrowserConfig,
|
||||
shouldStartLocalBrowserServer,
|
||||
} from "./config.js";
|
||||
import {
|
||||
clickRefViaPlaywright,
|
||||
closePlaywrightBrowserConnection,
|
||||
snapshotAiViaPlaywright,
|
||||
} from "./pw-ai.js";
|
||||
import {
|
||||
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
|
||||
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
|
||||
@@ -522,13 +527,32 @@ export async function startBrowserControlServerFromConfig(
|
||||
if (!state) return jsonError(res, 503, "browser server not started");
|
||||
const targetId =
|
||||
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
|
||||
const format = req.query.format === "domSnapshot" ? "domSnapshot" : "aria";
|
||||
const format =
|
||||
req.query.format === "domSnapshot"
|
||||
? "domSnapshot"
|
||||
: req.query.format === "ai"
|
||||
? "ai"
|
||||
: "aria";
|
||||
const limit =
|
||||
typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
|
||||
|
||||
try {
|
||||
const tab = await ensureTabAvailable(runtime, targetId || undefined);
|
||||
|
||||
if (format === "ai") {
|
||||
const snap = await snapshotAiViaPlaywright({
|
||||
cdpPort: state.cdpPort,
|
||||
targetId: tab.targetId,
|
||||
});
|
||||
return res.json({
|
||||
ok: true,
|
||||
format,
|
||||
targetId: tab.targetId,
|
||||
url: tab.url,
|
||||
...snap,
|
||||
});
|
||||
}
|
||||
|
||||
if (format === "aria") {
|
||||
const snap = await snapshotAria({
|
||||
wsUrl: tab.wsUrl ?? "",
|
||||
@@ -561,6 +585,30 @@ export async function startBrowserControlServerFromConfig(
|
||||
}
|
||||
});
|
||||
|
||||
app.post("/click", async (req, res) => {
|
||||
if (!state) return jsonError(res, 503, "browser server not started");
|
||||
const ref = String((req.body as { ref?: unknown })?.ref ?? "").trim();
|
||||
const targetId = String(
|
||||
(req.body as { targetId?: unknown })?.targetId ?? "",
|
||||
).trim();
|
||||
|
||||
if (!ref) return jsonError(res, 400, "ref is required");
|
||||
|
||||
try {
|
||||
const tab = await ensureTabAvailable(runtime, targetId || undefined);
|
||||
await clickRefViaPlaywright({
|
||||
cdpPort: state.cdpPort,
|
||||
targetId: tab.targetId,
|
||||
ref,
|
||||
});
|
||||
res.json({ ok: true, targetId: tab.targetId, url: tab.url });
|
||||
} catch (err) {
|
||||
const mapped = mapTabError(err);
|
||||
if (mapped) return jsonError(res, mapped.status, mapped.message);
|
||||
jsonError(res, 500, String(err));
|
||||
}
|
||||
});
|
||||
|
||||
const port = resolved.controlPort;
|
||||
const server = await new Promise<Server>((resolve, reject) => {
|
||||
const s = app.listen(port, "127.0.0.1", () => resolve(s));
|
||||
@@ -596,6 +644,7 @@ export async function stopBrowserControlServer(
|
||||
const current = state;
|
||||
state = null;
|
||||
try {
|
||||
await closePlaywrightBrowserConnection();
|
||||
if (current.running) {
|
||||
await stopClawdChrome(current.running).catch((err) =>
|
||||
logWarn(`clawd browser stop failed: ${String(err)}`, runtime),
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import chalk from "chalk";
|
||||
import { Command } from "commander";
|
||||
import {
|
||||
browserClickRef,
|
||||
browserCloseTab,
|
||||
browserDom,
|
||||
browserEval,
|
||||
@@ -441,6 +442,8 @@ Examples:
|
||||
clawdis browser query "a" --limit 5
|
||||
clawdis browser dom --format text --max-chars 5000
|
||||
clawdis browser snapshot --format aria --limit 200
|
||||
clawdis browser snapshot --format ai
|
||||
clawdis browser click 76
|
||||
`,
|
||||
)
|
||||
.action(() => {
|
||||
@@ -803,9 +806,9 @@ Examples:
|
||||
|
||||
browser
|
||||
.command("snapshot")
|
||||
.description("Capture an AI-friendly snapshot (aria or domSnapshot)")
|
||||
.description("Capture an AI-friendly snapshot (aria, domSnapshot, or ai)")
|
||||
.option(
|
||||
"--format <aria|domSnapshot>",
|
||||
"--format <aria|domSnapshot|ai>",
|
||||
"Snapshot format (default: aria)",
|
||||
"aria",
|
||||
)
|
||||
@@ -813,11 +816,16 @@ Examples:
|
||||
.option("--limit <n>", "Max nodes (default: 500/800)", (v: string) =>
|
||||
Number(v),
|
||||
)
|
||||
.option("--out <path>", "Write JSON snapshot to a file")
|
||||
.option("--out <path>", "Write snapshot to a file")
|
||||
.action(async (opts, cmd) => {
|
||||
const parent = parentOpts(cmd);
|
||||
const baseUrl = resolveBrowserControlUrl(parent?.url);
|
||||
const format = opts.format === "domSnapshot" ? "domSnapshot" : "aria";
|
||||
const format =
|
||||
opts.format === "domSnapshot"
|
||||
? "domSnapshot"
|
||||
: opts.format === "ai"
|
||||
? "ai"
|
||||
: "aria";
|
||||
try {
|
||||
const result = await browserSnapshot(baseUrl, {
|
||||
format,
|
||||
@@ -825,10 +833,14 @@ Examples:
|
||||
limit: Number.isFinite(opts.limit) ? opts.limit : undefined,
|
||||
});
|
||||
|
||||
const payload = JSON.stringify(result, null, 2);
|
||||
if (opts.out) {
|
||||
const fs = await import("node:fs/promises");
|
||||
await fs.writeFile(opts.out, payload, "utf8");
|
||||
if (result.format === "ai") {
|
||||
await fs.writeFile(opts.out, result.snapshot, "utf8");
|
||||
} else {
|
||||
const payload = JSON.stringify(result, null, 2);
|
||||
await fs.writeFile(opts.out, payload, "utf8");
|
||||
}
|
||||
if (parent?.json) {
|
||||
defaultRuntime.log(
|
||||
JSON.stringify({ ok: true, out: opts.out }, null, 2),
|
||||
@@ -839,8 +851,18 @@ Examples:
|
||||
return;
|
||||
}
|
||||
|
||||
if (parent?.json || format === "domSnapshot") {
|
||||
defaultRuntime.log(payload);
|
||||
if (parent?.json) {
|
||||
defaultRuntime.log(JSON.stringify(result, null, 2));
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.format === "ai") {
|
||||
defaultRuntime.log(result.snapshot);
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.format === "domSnapshot") {
|
||||
defaultRuntime.log(JSON.stringify(result, null, 2));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -862,5 +884,29 @@ Examples:
|
||||
}
|
||||
});
|
||||
|
||||
browser
|
||||
.command("click")
|
||||
.description("Click an element by ref from an ai snapshot (e.g. 76)")
|
||||
.argument("<ref>", "Ref id from ai snapshot")
|
||||
.option("--target-id <id>", "CDP target id (or unique prefix)")
|
||||
.action(async (ref: string, opts, cmd) => {
|
||||
const parent = parentOpts(cmd);
|
||||
const baseUrl = resolveBrowserControlUrl(parent?.url);
|
||||
try {
|
||||
const result = await browserClickRef(baseUrl, {
|
||||
ref,
|
||||
targetId: opts.targetId?.trim() || undefined,
|
||||
});
|
||||
if (parent?.json) {
|
||||
defaultRuntime.log(JSON.stringify(result, null, 2));
|
||||
return;
|
||||
}
|
||||
defaultRuntime.log(`clicked ref ${ref} on ${result.url}`);
|
||||
} catch (err) {
|
||||
defaultRuntime.error(danger(String(err)));
|
||||
defaultRuntime.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user