feat(browser): add scrollintoview action

This commit is contained in:
Peter Steinberger
2026-01-12 21:56:08 +00:00
parent 2faf7cea93
commit 6857f16609
5 changed files with 87 additions and 0 deletions

View File

@@ -36,6 +36,12 @@ export type BrowserActRequest =
}
| { kind: "press"; key: string; targetId?: string; delayMs?: number }
| { kind: "hover"; ref: string; targetId?: string; timeoutMs?: number }
| {
kind: "scrollIntoView";
ref: string;
targetId?: string;
timeoutMs?: number;
}
| {
kind: "drag";
startRef: string;

View File

@@ -30,6 +30,7 @@ export {
pressKeyViaPlaywright,
resizeViewportViaPlaywright,
responseBodyViaPlaywright,
scrollIntoViewViaPlaywright,
selectOptionViaPlaywright,
setDeviceViaPlaywright,
setExtraHTTPHeadersViaPlaywright,

View File

@@ -1160,6 +1160,25 @@ export async function responseBodyViaPlaywright(opts: {
};
}
export async function scrollIntoViewViaPlaywright(opts: {
cdpUrl: string;
targetId?: string;
ref: string;
timeoutMs?: number;
}): Promise<void> {
const page = await getPageForTargetId(opts);
ensurePageState(page);
const timeout = normalizeTimeoutMs(opts.timeoutMs, 20_000);
const ref = requireRef(opts.ref);
const locator = refLocator(page, ref);
try {
await locator.scrollIntoViewIfNeeded({ timeout });
} catch (err) {
throw toAIFriendlyError(err, ref);
}
}
export async function navigateViaPlaywright(opts: {
cdpUrl: string;
targetId?: string;

View File

@@ -29,6 +29,7 @@ type ActKind =
| "evaluate"
| "fill"
| "hover"
| "scrollIntoView"
| "press"
| "resize"
| "select"
@@ -154,6 +155,7 @@ export function registerBrowserAgentRoutes(
kind !== "evaluate" &&
kind !== "fill" &&
kind !== "hover" &&
kind !== "scrollIntoView" &&
kind !== "press" &&
kind !== "resize" &&
kind !== "select" &&
@@ -257,6 +259,21 @@ export function registerBrowserAgentRoutes(
});
return res.json({ ok: true, targetId: tab.targetId });
}
case "scrollIntoView": {
const ref = toStringOrEmpty(body.ref);
if (!ref) return jsonError(res, 400, "ref is required");
const timeoutMs = toNumber(body.timeoutMs);
const scrollRequest: Parameters<
typeof pw.scrollIntoViewViaPlaywright
>[0] = {
cdpUrl,
targetId: tab.targetId,
ref,
};
if (timeoutMs) scrollRequest.timeoutMs = timeoutMs;
await pw.scrollIntoViewViaPlaywright(scrollRequest);
return res.json({ ok: true, targetId: tab.targetId });
}
case "drag": {
const startRef = toStringOrEmpty(body.startRef);
const endRef = toStringOrEmpty(body.endRef);

View File

@@ -272,6 +272,50 @@ export function registerBrowserActionInputCommands(
}
});
browser
.command("scrollintoview")
.description("Scroll an element into view by ref from snapshot")
.argument("<ref>", "Ref id from snapshot")
.option("--target-id <id>", "CDP target id (or unique prefix)")
.option(
"--timeout-ms <ms>",
"How long to wait for scroll (default: 20000)",
(v: string) => Number(v),
)
.action(async (ref: string | undefined, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
const profile = parent?.browserProfile;
const refValue = typeof ref === "string" ? ref.trim() : "";
if (!refValue) {
defaultRuntime.error(danger("ref is required"));
defaultRuntime.exit(1);
return;
}
try {
const result = await browserAct(
baseUrl,
{
kind: "scrollIntoView",
ref: refValue,
targetId: opts.targetId?.trim() || undefined,
timeoutMs: Number.isFinite(opts.timeoutMs)
? opts.timeoutMs
: undefined,
},
{ profile },
);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
defaultRuntime.log(`scrolled into view: ${refValue}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("drag")
.description("Drag from one ref to another")