diff --git a/src/browser/client-actions-core.ts b/src/browser/client-actions-core.ts index 58b260eda..5d4b76218 100644 --- a/src/browser/client-actions-core.ts +++ b/src/browser/client-actions-core.ts @@ -36,6 +36,12 @@ export type BrowserActRequest = } | { kind: "press"; key: string; targetId?: string; delayMs?: number } | { kind: "hover"; ref: string; targetId?: string; timeoutMs?: number } + | { + kind: "scrollIntoView"; + ref: string; + targetId?: string; + timeoutMs?: number; + } | { kind: "drag"; startRef: string; diff --git a/src/browser/pw-ai.ts b/src/browser/pw-ai.ts index a0367d4cc..68c7a28f5 100644 --- a/src/browser/pw-ai.ts +++ b/src/browser/pw-ai.ts @@ -30,6 +30,7 @@ export { pressKeyViaPlaywright, resizeViewportViaPlaywright, responseBodyViaPlaywright, + scrollIntoViewViaPlaywright, selectOptionViaPlaywright, setDeviceViaPlaywright, setExtraHTTPHeadersViaPlaywright, diff --git a/src/browser/pw-tools-core.ts b/src/browser/pw-tools-core.ts index 8f66fb252..a699de1cb 100644 --- a/src/browser/pw-tools-core.ts +++ b/src/browser/pw-tools-core.ts @@ -1160,6 +1160,25 @@ export async function responseBodyViaPlaywright(opts: { }; } +export async function scrollIntoViewViaPlaywright(opts: { + cdpUrl: string; + targetId?: string; + ref: string; + timeoutMs?: number; +}): Promise { + const page = await getPageForTargetId(opts); + ensurePageState(page); + const timeout = normalizeTimeoutMs(opts.timeoutMs, 20_000); + + const ref = requireRef(opts.ref); + const locator = refLocator(page, ref); + try { + await locator.scrollIntoViewIfNeeded({ timeout }); + } catch (err) { + throw toAIFriendlyError(err, ref); + } +} + export async function navigateViaPlaywright(opts: { cdpUrl: string; targetId?: string; diff --git a/src/browser/routes/agent.ts b/src/browser/routes/agent.ts index 2de5e21eb..91b4d5c42 100644 --- a/src/browser/routes/agent.ts +++ b/src/browser/routes/agent.ts @@ -29,6 +29,7 @@ type ActKind = | "evaluate" | "fill" | "hover" + | "scrollIntoView" | "press" | "resize" | "select" @@ -154,6 +155,7 @@ export function registerBrowserAgentRoutes( kind !== "evaluate" && kind !== "fill" && kind !== "hover" && + kind !== "scrollIntoView" && kind !== "press" && kind !== "resize" && kind !== "select" && @@ -257,6 +259,21 @@ export function registerBrowserAgentRoutes( }); return res.json({ ok: true, targetId: tab.targetId }); } + case "scrollIntoView": { + const ref = toStringOrEmpty(body.ref); + if (!ref) return jsonError(res, 400, "ref is required"); + const timeoutMs = toNumber(body.timeoutMs); + const scrollRequest: Parameters< + typeof pw.scrollIntoViewViaPlaywright + >[0] = { + cdpUrl, + targetId: tab.targetId, + ref, + }; + if (timeoutMs) scrollRequest.timeoutMs = timeoutMs; + await pw.scrollIntoViewViaPlaywright(scrollRequest); + return res.json({ ok: true, targetId: tab.targetId }); + } case "drag": { const startRef = toStringOrEmpty(body.startRef); const endRef = toStringOrEmpty(body.endRef); diff --git a/src/cli/browser-cli-actions-input.ts b/src/cli/browser-cli-actions-input.ts index 7fd05d49e..0466d213c 100644 --- a/src/cli/browser-cli-actions-input.ts +++ b/src/cli/browser-cli-actions-input.ts @@ -272,6 +272,50 @@ export function registerBrowserActionInputCommands( } }); + browser + .command("scrollintoview") + .description("Scroll an element into view by ref from snapshot") + .argument("", "Ref id from snapshot") + .option("--target-id ", "CDP target id (or unique prefix)") + .option( + "--timeout-ms ", + "How long to wait for scroll (default: 20000)", + (v: string) => Number(v), + ) + .action(async (ref: string | undefined, opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + const profile = parent?.browserProfile; + const refValue = typeof ref === "string" ? ref.trim() : ""; + if (!refValue) { + defaultRuntime.error(danger("ref is required")); + defaultRuntime.exit(1); + return; + } + try { + const result = await browserAct( + baseUrl, + { + kind: "scrollIntoView", + ref: refValue, + targetId: opts.targetId?.trim() || undefined, + timeoutMs: Number.isFinite(opts.timeoutMs) + ? opts.timeoutMs + : undefined, + }, + { profile }, + ); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + defaultRuntime.log(`scrolled into view: ${refValue}`); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + browser .command("drag") .description("Drag from one ref to another")