fix(browser): allow control server without playwright

This commit is contained in:
Peter Steinberger
2025-12-20 19:16:56 +00:00
parent 1eb6d617f5
commit ab4457e2a3
2 changed files with 97 additions and 50 deletions

View File

@@ -3,27 +3,7 @@ import path from "node:path";
import type express from "express"; import type express from "express";
import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js"; import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js";
import { snapshotAria } from "../cdp.js"; import { captureScreenshot, snapshotAria } from "../cdp.js";
import {
armDialogViaPlaywright,
armFileUploadViaPlaywright,
clickViaPlaywright,
closePageViaPlaywright,
dragViaPlaywright,
evaluateViaPlaywright,
fillFormViaPlaywright,
getConsoleMessagesViaPlaywright,
hoverViaPlaywright,
navigateViaPlaywright,
pdfViaPlaywright,
pressKeyViaPlaywright,
resizeViewportViaPlaywright,
selectOptionViaPlaywright,
snapshotAiViaPlaywright,
takeScreenshotViaPlaywright,
typeViaPlaywright,
waitForViaPlaywright,
} from "../pw-ai.js";
import { import {
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
@@ -75,6 +55,35 @@ function parseClickButton(raw: string): ClickButton | undefined {
return undefined; return undefined;
} }
type PwAiModule = typeof import("../pw-ai.js");
let pwAiModule: Promise<PwAiModule | null> | null = null;
async function getPwAiModule(): Promise<PwAiModule | null> {
if (pwAiModule) return pwAiModule;
pwAiModule = (async () => {
try {
return await import("../pw-ai.js");
} catch {
return null;
}
})();
return pwAiModule;
}
async function requirePwAi(
res: express.Response,
feature: string,
): Promise<PwAiModule | null> {
const mod = await getPwAiModule();
if (mod) return mod;
jsonError(
res,
501,
`Playwright is not available in this gateway build; '${feature}' is unsupported.`,
);
return null;
}
export function registerBrowserAgentRoutes( export function registerBrowserAgentRoutes(
app: express.Express, app: express.Express,
ctx: BrowserRouteContext, ctx: BrowserRouteContext,
@@ -86,7 +95,9 @@ export function registerBrowserAgentRoutes(
if (!url) return jsonError(res, 400, "url is required"); if (!url) return jsonError(res, 400, "url is required");
try { try {
const tab = await ctx.ensureTabAvailable(targetId); const tab = await ctx.ensureTabAvailable(targetId);
const result = await navigateViaPlaywright({ const pw = await requirePwAi(res, "navigate");
if (!pw) return;
const result = await pw.navigateViaPlaywright({
cdpPort: ctx.state().cdpPort, cdpPort: ctx.state().cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
url, url,
@@ -121,6 +132,8 @@ export function registerBrowserAgentRoutes(
try { try {
const tab = await ctx.ensureTabAvailable(targetId); const tab = await ctx.ensureTabAvailable(targetId);
const cdpPort = ctx.state().cdpPort; const cdpPort = ctx.state().cdpPort;
const pw = await requirePwAi(res, `act:${kind}`);
if (!pw) return;
switch (kind) { switch (kind) {
case "click": { case "click": {
@@ -152,7 +165,7 @@ export function registerBrowserAgentRoutes(
const modifiers = modifiersRaw.length const modifiers = modifiersRaw.length
? (modifiersRaw as ClickModifier[]) ? (modifiersRaw as ClickModifier[])
: undefined; : undefined;
await clickViaPlaywright({ await pw.clickViaPlaywright({
cdpPort, cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
ref, ref,
@@ -170,7 +183,7 @@ export function registerBrowserAgentRoutes(
const text = body.text; const text = body.text;
const submit = toBoolean(body.submit) ?? false; const submit = toBoolean(body.submit) ?? false;
const slowly = toBoolean(body.slowly) ?? false; const slowly = toBoolean(body.slowly) ?? false;
await typeViaPlaywright({ await pw.typeViaPlaywright({
cdpPort, cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
ref, ref,
@@ -183,13 +196,13 @@ export function registerBrowserAgentRoutes(
case "press": { case "press": {
const key = toStringOrEmpty(body.key); const key = toStringOrEmpty(body.key);
if (!key) return jsonError(res, 400, "key is required"); if (!key) return jsonError(res, 400, "key is required");
await pressKeyViaPlaywright({ cdpPort, targetId: tab.targetId, key }); await pw.pressKeyViaPlaywright({ cdpPort, targetId: tab.targetId, key });
return res.json({ ok: true, targetId: tab.targetId }); return res.json({ ok: true, targetId: tab.targetId });
} }
case "hover": { case "hover": {
const ref = toStringOrEmpty(body.ref); const ref = toStringOrEmpty(body.ref);
if (!ref) return jsonError(res, 400, "ref is required"); if (!ref) return jsonError(res, 400, "ref is required");
await hoverViaPlaywright({ cdpPort, targetId: tab.targetId, ref }); await pw.hoverViaPlaywright({ cdpPort, targetId: tab.targetId, ref });
return res.json({ ok: true, targetId: tab.targetId }); return res.json({ ok: true, targetId: tab.targetId });
} }
case "drag": { case "drag": {
@@ -197,7 +210,7 @@ export function registerBrowserAgentRoutes(
const endRef = toStringOrEmpty(body.endRef); const endRef = toStringOrEmpty(body.endRef);
if (!startRef || !endRef) if (!startRef || !endRef)
return jsonError(res, 400, "startRef and endRef are required"); return jsonError(res, 400, "startRef and endRef are required");
await dragViaPlaywright({ await pw.dragViaPlaywright({
cdpPort, cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
startRef, startRef,
@@ -210,7 +223,7 @@ export function registerBrowserAgentRoutes(
const values = toStringArray(body.values); const values = toStringArray(body.values);
if (!ref || !values?.length) if (!ref || !values?.length)
return jsonError(res, 400, "ref and values are required"); return jsonError(res, 400, "ref and values are required");
await selectOptionViaPlaywright({ await pw.selectOptionViaPlaywright({
cdpPort, cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
ref, ref,
@@ -224,7 +237,7 @@ export function registerBrowserAgentRoutes(
: null; : null;
if (!fields?.length) if (!fields?.length)
return jsonError(res, 400, "fields are required"); return jsonError(res, 400, "fields are required");
await fillFormViaPlaywright({ await pw.fillFormViaPlaywright({
cdpPort, cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
fields, fields,
@@ -236,7 +249,7 @@ export function registerBrowserAgentRoutes(
const height = toNumber(body.height); const height = toNumber(body.height);
if (!width || !height) if (!width || !height)
return jsonError(res, 400, "width and height are required"); return jsonError(res, 400, "width and height are required");
await resizeViewportViaPlaywright({ await pw.resizeViewportViaPlaywright({
cdpPort, cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
width, width,
@@ -248,7 +261,7 @@ export function registerBrowserAgentRoutes(
const timeMs = toNumber(body.timeMs); const timeMs = toNumber(body.timeMs);
const text = toStringOrEmpty(body.text) || undefined; const text = toStringOrEmpty(body.text) || undefined;
const textGone = toStringOrEmpty(body.textGone) || undefined; const textGone = toStringOrEmpty(body.textGone) || undefined;
await waitForViaPlaywright({ await pw.waitForViaPlaywright({
cdpPort, cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
timeMs, timeMs,
@@ -261,7 +274,7 @@ export function registerBrowserAgentRoutes(
const fn = toStringOrEmpty(body.fn); const fn = toStringOrEmpty(body.fn);
if (!fn) return jsonError(res, 400, "fn is required"); if (!fn) return jsonError(res, 400, "fn is required");
const ref = toStringOrEmpty(body.ref) || undefined; const ref = toStringOrEmpty(body.ref) || undefined;
const result = await evaluateViaPlaywright({ const result = await pw.evaluateViaPlaywright({
cdpPort, cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
fn, fn,
@@ -275,7 +288,7 @@ export function registerBrowserAgentRoutes(
}); });
} }
case "close": { case "close": {
await closePageViaPlaywright({ cdpPort, targetId: tab.targetId }); await pw.closePageViaPlaywright({ cdpPort, targetId: tab.targetId });
return res.json({ ok: true, targetId: tab.targetId }); return res.json({ ok: true, targetId: tab.targetId });
} }
default: { default: {
@@ -295,7 +308,9 @@ export function registerBrowserAgentRoutes(
if (!paths.length) return jsonError(res, 400, "paths are required"); if (!paths.length) return jsonError(res, 400, "paths are required");
try { try {
const tab = await ctx.ensureTabAvailable(targetId); const tab = await ctx.ensureTabAvailable(targetId);
await armFileUploadViaPlaywright({ const pw = await requirePwAi(res, "file chooser hook");
if (!pw) return;
await pw.armFileUploadViaPlaywright({
cdpPort: ctx.state().cdpPort, cdpPort: ctx.state().cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
paths, paths,
@@ -316,7 +331,9 @@ export function registerBrowserAgentRoutes(
if (accept === undefined) return jsonError(res, 400, "accept is required"); if (accept === undefined) return jsonError(res, 400, "accept is required");
try { try {
const tab = await ctx.ensureTabAvailable(targetId); const tab = await ctx.ensureTabAvailable(targetId);
await armDialogViaPlaywright({ const pw = await requirePwAi(res, "dialog hook");
if (!pw) return;
await pw.armDialogViaPlaywright({
cdpPort: ctx.state().cdpPort, cdpPort: ctx.state().cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
accept, accept,
@@ -336,7 +353,9 @@ export function registerBrowserAgentRoutes(
try { try {
const tab = await ctx.ensureTabAvailable(targetId || undefined); const tab = await ctx.ensureTabAvailable(targetId || undefined);
const messages = await getConsoleMessagesViaPlaywright({ const pw = await requirePwAi(res, "console messages");
if (!pw) return;
const messages = await pw.getConsoleMessagesViaPlaywright({
cdpPort: ctx.state().cdpPort, cdpPort: ctx.state().cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
level: level.trim() || undefined, level: level.trim() || undefined,
@@ -352,7 +371,9 @@ export function registerBrowserAgentRoutes(
const targetId = toStringOrEmpty(body.targetId) || undefined; const targetId = toStringOrEmpty(body.targetId) || undefined;
try { try {
const tab = await ctx.ensureTabAvailable(targetId); const tab = await ctx.ensureTabAvailable(targetId);
const pdf = await pdfViaPlaywright({ const pw = await requirePwAi(res, "pdf");
if (!pw) return;
const pdf = await pw.pdfViaPlaywright({
cdpPort: ctx.state().cdpPort, cdpPort: ctx.state().cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
}); });
@@ -392,16 +413,29 @@ export function registerBrowserAgentRoutes(
try { try {
const tab = await ctx.ensureTabAvailable(targetId); const tab = await ctx.ensureTabAvailable(targetId);
const snap = await takeScreenshotViaPlaywright({ let buffer: Buffer;
cdpPort: ctx.state().cdpPort, if (ref || element) {
targetId: tab.targetId, const pw = await requirePwAi(res, "element/ref screenshot");
ref, if (!pw) return;
element, const snap = await pw.takeScreenshotViaPlaywright({
fullPage, cdpPort: ctx.state().cdpPort,
type, targetId: tab.targetId,
}); ref,
element,
fullPage,
type,
});
buffer = snap.buffer;
} else {
buffer = await captureScreenshot({
wsUrl: tab.wsUrl ?? "",
fullPage,
format: type,
quality: type === "jpeg" ? 85 : undefined,
});
}
const normalized = await normalizeBrowserScreenshot(snap.buffer, { const normalized = await normalizeBrowserScreenshot(buffer, {
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE, maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES, maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
}); });
@@ -426,14 +460,21 @@ export function registerBrowserAgentRoutes(
app.get("/snapshot", async (req, res) => { app.get("/snapshot", async (req, res) => {
const targetId = const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const format = req.query.format === "aria" ? "aria" : "ai"; const format =
req.query.format === "aria"
? "aria"
: req.query.format === "ai"
? "ai"
: ((await getPwAiModule()) ? "ai" : "aria");
const limit = const limit =
typeof req.query.limit === "string" ? Number(req.query.limit) : undefined; typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
try { try {
const tab = await ctx.ensureTabAvailable(targetId || undefined); const tab = await ctx.ensureTabAvailable(targetId || undefined);
if (format === "ai") { if (format === "ai") {
const snap = await snapshotAiViaPlaywright({ const pw = await requirePwAi(res, "ai snapshot");
if (!pw) return;
const snap = await pw.snapshotAiViaPlaywright({
cdpPort: ctx.state().cdpPort, cdpPort: ctx.state().cdpPort,
targetId: tab.targetId, targetId: tab.targetId,
}); });

View File

@@ -8,7 +8,6 @@ import {
resolveBrowserConfig, resolveBrowserConfig,
shouldStartLocalBrowserServer, shouldStartLocalBrowserServer,
} from "./config.js"; } from "./config.js";
import { closePlaywrightBrowserConnection } from "./pw-ai.js";
import { registerBrowserRoutes } from "./routes/index.js"; import { registerBrowserRoutes } from "./routes/index.js";
import { import {
type BrowserServerState, type BrowserServerState,
@@ -99,5 +98,12 @@ export async function stopBrowserControlServer(
current.server.close(() => resolve()); current.server.close(() => resolve());
}); });
state = null; state = null;
await closePlaywrightBrowserConnection();
// Optional: Playwright is not always available (e.g. embedded gateway builds).
try {
const mod = await import("./pw-ai.js");
await mod.closePlaywrightBrowserConnection();
} catch {
// ignore
}
} }