feat: add selector-based browser actions

This commit is contained in:
Peter Steinberger
2025-12-24 19:52:28 +00:00
parent 523d9ec3c2
commit 27025b71db
11 changed files with 173 additions and 36 deletions

View File

@@ -14,7 +14,8 @@ export type BrowserFormField = {
export type BrowserActRequest =
| {
kind: "click";
ref: string;
ref?: string;
selector?: string;
targetId?: string;
doubleClick?: boolean;
button?: string;
@@ -22,7 +23,8 @@ export type BrowserActRequest =
}
| {
kind: "type";
ref: string;
ref?: string;
selector?: string;
text: string;
targetId?: string;
submit?: boolean;

View File

@@ -29,7 +29,8 @@ function createPage(opts: {
const click = vi.fn().mockResolvedValue(undefined);
const dblclick = vi.fn().mockResolvedValue(undefined);
const locator = vi.fn().mockReturnValue({ click, dblclick });
const fill = vi.fn().mockResolvedValue(undefined);
const locator = vi.fn().mockReturnValue({ click, dblclick, fill });
const page = {
context: () => context,
@@ -44,7 +45,7 @@ function createPage(opts: {
}),
};
return { page, session, locator, click };
return { page, session, locator, click, fill };
}
function createBrowser(pages: unknown[]) {
@@ -110,6 +111,45 @@ describe("pw-ai", () => {
expect(p1.click).toHaveBeenCalledTimes(1);
});
it("clicks a css selector when provided", async () => {
const { chromium } = await import("playwright-core");
const p1 = createPage({ targetId: "T1" });
const browser = createBrowser([p1.page]);
(
chromium.connectOverCDP as unknown as ReturnType<typeof vi.fn>
).mockResolvedValue(browser);
const mod = await importModule();
await mod.clickViaPlaywright({
cdpPort: 18792,
targetId: "T1",
selector: "button.save",
});
expect(p1.locator).toHaveBeenCalledWith("button.save");
expect(p1.click).toHaveBeenCalledTimes(1);
});
it("types via css selector when provided", async () => {
const { chromium } = await import("playwright-core");
const p1 = createPage({ targetId: "T1" });
const browser = createBrowser([p1.page]);
(
chromium.connectOverCDP as unknown as ReturnType<typeof vi.fn>
).mockResolvedValue(browser);
const mod = await importModule();
await mod.typeViaPlaywright({
cdpPort: 18792,
targetId: "T1",
selector: "input[name=q]",
text: "hello",
});
expect(p1.locator).toHaveBeenCalledWith("input[name=q]");
expect(p1.fill).toHaveBeenCalledTimes(1);
});
it("fails with a clear error when _snapshotForAI is missing", async () => {
const { chromium } = await import("playwright-core");
const p1 = createPage({ targetId: "T1", hasSnapshotForAI: false });

View File

@@ -10,6 +10,19 @@ import {
let nextUploadArmId = 0;
let nextDialogArmId = 0;
type LocatorPage = Parameters<typeof refLocator>[0];
function resolveLocator(
page: LocatorPage,
opts: { ref?: string; selector?: string },
) {
const selector = typeof opts.selector === "string" ? opts.selector.trim() : "";
if (selector) return page.locator(selector);
const ref = typeof opts.ref === "string" ? opts.ref.trim() : "";
if (ref) return refLocator(page, ref);
throw new Error("ref or selector is required");
}
export async function snapshotAiViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
@@ -41,21 +54,22 @@ export async function snapshotAiViaPlaywright(opts: {
export async function clickViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref: string;
ref?: string;
selector?: string;
doubleClick?: boolean;
button?: "left" | "right" | "middle";
modifiers?: Array<"Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift">;
timeoutMs?: number;
}): Promise<void> {
const ref = String(opts.ref ?? "").trim();
if (!ref) throw new Error("ref is required");
const page = await getPageForTargetId({
cdpPort: opts.cdpPort,
targetId: opts.targetId,
});
ensurePageState(page);
const locator = refLocator(page, ref);
const locator = resolveLocator(page, {
ref: opts.ref,
selector: opts.selector,
});
const timeout = Math.max(
500,
Math.min(60_000, Math.floor(opts.timeoutMs ?? 8000)),
@@ -142,18 +156,20 @@ export async function pressKeyViaPlaywright(opts: {
export async function typeViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
ref: string;
ref?: string;
selector?: string;
text: string;
submit?: boolean;
slowly?: boolean;
timeoutMs?: number;
}): Promise<void> {
const ref = String(opts.ref ?? "").trim();
if (!ref) throw new Error("ref is required");
const text = String(opts.text ?? "");
const page = await getPageForTargetId(opts);
ensurePageState(page);
const locator = refLocator(page, ref);
const locator = resolveLocator(page, {
ref: opts.ref,
selector: opts.selector,
});
const timeout = Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000));
if (opts.slowly) {
await locator.click({ timeout });

View File

@@ -139,7 +139,9 @@ export function registerBrowserAgentRoutes(
switch (kind) {
case "click": {
const ref = toStringOrEmpty(body.ref);
if (!ref) return jsonError(res, 400, "ref is required");
const selector = toStringOrEmpty(body.selector);
if (!ref && !selector)
return jsonError(res, 400, "ref or selector is required");
const doubleClick = toBoolean(body.doubleClick) ?? false;
const buttonRaw = toStringOrEmpty(body.button) || "";
const button = buttonRaw ? parseClickButton(buttonRaw) : undefined;
@@ -166,32 +168,38 @@ export function registerBrowserAgentRoutes(
const modifiers = modifiersRaw.length
? (modifiersRaw as ClickModifier[])
: undefined;
await pw.clickViaPlaywright({
const clickRequest: Parameters<typeof pw.clickViaPlaywright>[0] = {
cdpPort,
targetId: tab.targetId,
ref,
doubleClick,
button,
modifiers,
});
};
if (ref) clickRequest.ref = ref;
if (selector) clickRequest.selector = selector;
if (button) clickRequest.button = button;
if (modifiers) clickRequest.modifiers = modifiers;
await pw.clickViaPlaywright(clickRequest);
return res.json({ ok: true, targetId: tab.targetId, url: tab.url });
}
case "type": {
const ref = toStringOrEmpty(body.ref);
if (!ref) return jsonError(res, 400, "ref is required");
const selector = toStringOrEmpty(body.selector);
if (!ref && !selector)
return jsonError(res, 400, "ref or selector is required");
if (typeof body.text !== "string")
return jsonError(res, 400, "text is required");
const text = body.text;
const submit = toBoolean(body.submit) ?? false;
const slowly = toBoolean(body.slowly) ?? false;
await pw.typeViaPlaywright({
const typeRequest: Parameters<typeof pw.typeViaPlaywright>[0] = {
cdpPort,
targetId: tab.targetId,
ref,
text,
submit,
slowly,
});
};
if (ref) typeRequest.ref = ref;
if (selector) typeRequest.selector = selector;
await pw.typeViaPlaywright(typeRequest);
return res.json({ ok: true, targetId: tab.targetId });
}
case "press": {

View File

@@ -318,7 +318,7 @@ describe("browser control server", () => {
}),
}).then((r) => r.json())) as { ok: boolean };
expect(click.ok).toBe(true);
expect(pwMocks.clickViaPlaywright).toHaveBeenCalledWith({
expect(pwMocks.clickViaPlaywright).toHaveBeenNthCalledWith(1, {
cdpPort: testPort + 1,
targetId: "abcd1234",
ref: "1",
@@ -327,13 +327,29 @@ describe("browser control server", () => {
modifiers: ["Shift"],
});
const clickSelector = (await realFetch(`${base}/act`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
kind: "click",
selector: "button.save",
}),
}).then((r) => r.json())) as { ok: boolean };
expect(clickSelector.ok).toBe(true);
expect(pwMocks.clickViaPlaywright).toHaveBeenNthCalledWith(2, {
cdpPort: testPort + 1,
targetId: "abcd1234",
selector: "button.save",
doubleClick: false,
});
const type = (await realFetch(`${base}/act`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ kind: "type", ref: "1", text: "" }),
}).then((r) => r.json())) as { ok: boolean };
expect(type.ok).toBe(true);
expect(pwMocks.typeViaPlaywright).toHaveBeenCalledWith({
expect(pwMocks.typeViaPlaywright).toHaveBeenNthCalledWith(1, {
cdpPort: testPort + 1,
targetId: "abcd1234",
ref: "1",
@@ -342,6 +358,26 @@ describe("browser control server", () => {
slowly: false,
});
const typeSelector = (await realFetch(`${base}/act`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
kind: "type",
selector: "input[name=q]",
text: "hello",
submit: true,
}),
}).then((r) => r.json())) as { ok: boolean };
expect(typeSelector.ok).toBe(true);
expect(pwMocks.typeViaPlaywright).toHaveBeenNthCalledWith(2, {
cdpPort: testPort + 1,
targetId: "abcd1234",
selector: "input[name=q]",
text: "hello",
submit: true,
slowly: false,
});
const press = (await realFetch(`${base}/act`, {
method: "POST",
headers: { "Content-Type": "application/json" },