refactor: make browser actions ref-only
This commit is contained in:
@@ -3,7 +3,7 @@
|
|||||||
## 2.0.0-beta3 — Unreleased (2025-12-26)
|
## 2.0.0-beta3 — Unreleased (2025-12-26)
|
||||||
|
|
||||||
### Highlights
|
### Highlights
|
||||||
- First-class Clawdis tools (browser, canvas, nodes, cron) replace the old `clawdis-*` skills; tool schemas are now injected directly into the agent runtime (including selector-based browser actions).
|
- First-class Clawdis tools (browser, canvas, nodes, cron) replace the old `clawdis-*` skills; tool schemas are now injected directly into the agent runtime.
|
||||||
- Per-session model selection + custom model providers: `models.providers` merges into `~/.clawdis/agent/models.json` (merge/replace modes) for LiteLLM, local OpenAI-compatible servers, Anthropic proxies, etc.
|
- Per-session model selection + custom model providers: `models.providers` merges into `~/.clawdis/agent/models.json` (merge/replace modes) for LiteLLM, local OpenAI-compatible servers, Anthropic proxies, etc.
|
||||||
- Group chat activation modes: per-group `/activation mention|always` command with status visibility.
|
- Group chat activation modes: per-group `/activation mention|always` command with status visibility.
|
||||||
- Discord bot transport for DMs and guild text channels, with allowlists + mention gating.
|
- Discord bot transport for DMs and guild text channels, with allowlists + mention gating.
|
||||||
@@ -17,6 +17,7 @@
|
|||||||
- Config refactor: `inbound.*` removed; use top-level `routing` (allowlists + group rules + transcription), `messages` (prefixes/timestamps), and `session` (scoping/store/mainKey). No legacy keys read.
|
- Config refactor: `inbound.*` removed; use top-level `routing` (allowlists + group rules + transcription), `messages` (prefixes/timestamps), and `session` (scoping/store/mainKey). No legacy keys read.
|
||||||
- Heartbeat config moved to `agent.heartbeat`: set `every: "30m"` (duration string) and optional `model`. `agent.heartbeatMinutes` is removed, and heartbeats are disabled unless `agent.heartbeat.every` is set.
|
- Heartbeat config moved to `agent.heartbeat`: set `every: "30m"` (duration string) and optional `model`. `agent.heartbeatMinutes` is removed, and heartbeats are disabled unless `agent.heartbeat.every` is set.
|
||||||
- Heartbeats now run via the gateway runner (main session) and deliver to the last used channel by default. WhatsApp reply-heartbeat behavior is removed; use `agent.heartbeat.target`/`to` (or `target: "none"`) to control delivery.
|
- Heartbeats now run via the gateway runner (main session) and deliver to the last used channel by default. WhatsApp reply-heartbeat behavior is removed; use `agent.heartbeat.target`/`to` (or `target: "none"`) to control delivery.
|
||||||
|
- Browser `act` no longer accepts CSS `selector`; use `snapshot` refs (default `ai`) or `evaluate` as an escape hatch.
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
- Heartbeat replies now strip repeated `HEARTBEAT_OK` tails to avoid accidental “OK OK” spam.
|
- Heartbeat replies now strip repeated `HEARTBEAT_OK` tails to avoid accidental “OK OK” spam.
|
||||||
@@ -56,6 +57,7 @@
|
|||||||
|
|
||||||
### macOS app
|
### macOS app
|
||||||
- Update-ready state surfaced in the menu; menu sections regrouped with session submenus.
|
- Update-ready state surfaced in the menu; menu sections regrouped with session submenus.
|
||||||
|
- Menu bar now shows a dedicated Nodes section under Context with inline rows, overflow submenu, and iconized actions.
|
||||||
- Session list polish: sleeping/disconnected/error states, usage bar restored, padding + bar sizing tuned, syncing menu removed, header hidden when disconnected.
|
- Session list polish: sleeping/disconnected/error states, usage bar restored, padding + bar sizing tuned, syncing menu removed, header hidden when disconnected.
|
||||||
- Chat UI polish: tool call cards + merged tool results, glass background, tighter composer spacing, visual effect host tweaks.
|
- Chat UI polish: tool call cards + merged tool results, glass background, tighter composer spacing, visual effect host tweaks.
|
||||||
- OAuth storage moved; legacy session syncing metadata removed.
|
- OAuth storage moved; legacy session syncing metadata removed.
|
||||||
|
|||||||
@@ -86,7 +86,6 @@ struct MenuContent: View {
|
|||||||
} label: {
|
} label: {
|
||||||
Label("Open Dashboard", systemImage: "gauge")
|
Label("Open Dashboard", systemImage: "gauge")
|
||||||
}
|
}
|
||||||
Divider()
|
|
||||||
Toggle(
|
Toggle(
|
||||||
isOn: Binding(
|
isOn: Binding(
|
||||||
get: { self.browserControlEnabled },
|
get: { self.browserControlEnabled },
|
||||||
|
|||||||
@@ -187,6 +187,11 @@ final class MenuSessionsInjector: NSObject, NSMenuDelegate {
|
|||||||
var cursor = insertIndex
|
var cursor = insertIndex
|
||||||
|
|
||||||
let entries = self.sortedNodeEntries()
|
let entries = self.sortedNodeEntries()
|
||||||
|
let topSeparator = NSMenuItem.separator()
|
||||||
|
topSeparator.tag = self.nodesTag
|
||||||
|
menu.insertItem(topSeparator, at: cursor)
|
||||||
|
cursor += 1
|
||||||
|
|
||||||
let header = self.makeNodesHeaderItem(width: width, count: entries.count)
|
let header = self.makeNodesHeaderItem(width: width, count: entries.count)
|
||||||
menu.insertItem(header, at: cursor)
|
menu.insertItem(header, at: cursor)
|
||||||
cursor += 1
|
cursor += 1
|
||||||
|
|||||||
@@ -112,4 +112,4 @@ git commit -m "Add Clawd workspace"
|
|||||||
- Canvas UI runs full-screen with native overlays. Avoid placing critical controls in the top-left/top-right/bottom edges; add explicit gutters in the layout and don’t rely on safe-area insets.
|
- Canvas UI runs full-screen with native overlays. Avoid placing critical controls in the top-left/top-right/bottom edges; add explicit gutters in the layout and don’t rely on safe-area insets.
|
||||||
- For browser-driven verification, use `clawdis browser` (tabs/status/screenshot) with the clawd-managed Chrome profile.
|
- For browser-driven verification, use `clawdis browser` (tabs/status/screenshot) with the clawd-managed Chrome profile.
|
||||||
- For DOM inspection, use `clawdis browser eval|query|dom|snapshot` (and `--json`/`--out` when you need machine output).
|
- For DOM inspection, use `clawdis browser eval|query|dom|snapshot` (and `--json`/`--out` when you need machine output).
|
||||||
- For interactions, use `clawdis browser click|type|hover|drag|select|upload|press|wait|navigate|back|evaluate|run` (click/type accept `--selector`).
|
- For interactions, use `clawdis browser click|type|hover|drag|select|upload|press|wait|navigate|back|evaluate|run` (click/type require snapshot refs; use `evaluate` for CSS selectors).
|
||||||
|
|||||||
@@ -167,16 +167,14 @@ Inspection:
|
|||||||
- `clawdis browser screenshot`
|
- `clawdis browser screenshot`
|
||||||
- `clawdis browser screenshot --full-page`
|
- `clawdis browser screenshot --full-page`
|
||||||
- `clawdis browser screenshot --ref 12`
|
- `clawdis browser screenshot --ref 12`
|
||||||
|
- `clawdis browser snapshot`
|
||||||
- `clawdis browser snapshot --format aria --limit 200`
|
- `clawdis browser snapshot --format aria --limit 200`
|
||||||
- `clawdis browser snapshot --format ai`
|
|
||||||
|
|
||||||
Actions:
|
Actions:
|
||||||
- `clawdis browser navigate https://example.com`
|
- `clawdis browser navigate https://example.com`
|
||||||
- `clawdis browser resize 1280 720`
|
- `clawdis browser resize 1280 720`
|
||||||
- `clawdis browser click 12 --double`
|
- `clawdis browser click 12 --double`
|
||||||
- `clawdis browser click --selector 'button.save'`
|
|
||||||
- `clawdis browser type 23 "hello" --submit`
|
- `clawdis browser type 23 "hello" --submit`
|
||||||
- `clawdis browser type --selector "input[name=q]" "hello"`
|
|
||||||
- `clawdis browser press Enter`
|
- `clawdis browser press Enter`
|
||||||
- `clawdis browser hover 44`
|
- `clawdis browser hover 44`
|
||||||
- `clawdis browser drag 10 11`
|
- `clawdis browser drag 10 11`
|
||||||
@@ -186,14 +184,15 @@ Actions:
|
|||||||
- `clawdis browser dialog --accept`
|
- `clawdis browser dialog --accept`
|
||||||
- `clawdis browser wait --text "Done"`
|
- `clawdis browser wait --text "Done"`
|
||||||
- `clawdis browser evaluate --fn '(el) => el.textContent' --ref 7`
|
- `clawdis browser evaluate --fn '(el) => el.textContent' --ref 7`
|
||||||
|
- `clawdis browser evaluate --fn "document.querySelector('.my-class').click()"`
|
||||||
- `clawdis browser console --level error`
|
- `clawdis browser console --level error`
|
||||||
- `clawdis browser pdf`
|
- `clawdis browser pdf`
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
- `upload` and `dialog` are **arming** calls; run them before the click/press that triggers the chooser/dialog.
|
- `upload` and `dialog` are **arming** calls; run them before the click/press that triggers the chooser/dialog.
|
||||||
- The arm default timeout is **2 minutes** (clamped to max 2 minutes); pass `timeoutMs` if you need shorter.
|
- The arm default timeout is **2 minutes** (clamped to max 2 minutes); pass `timeoutMs` if you need shorter.
|
||||||
- `snapshot --format ai` returns AI snapshot markup used for ref-based actions.
|
- `snapshot` defaults to `ai`; `aria` returns an accessibility tree for debugging.
|
||||||
- `click`/`type` accept `--selector` to target CSS selectors instead of AI refs.
|
- `click`/`type` require `ref` from `snapshot --format ai`; use `evaluate` for rare CSS selector one-offs.
|
||||||
|
|
||||||
## Security & privacy notes
|
## Security & privacy notes
|
||||||
|
|
||||||
|
|||||||
@@ -49,7 +49,8 @@ Core actions:
|
|||||||
Notes:
|
Notes:
|
||||||
- Requires `browser.enabled=true` in `~/.clawdis/clawdis.json`.
|
- Requires `browser.enabled=true` in `~/.clawdis/clawdis.json`.
|
||||||
- Uses `browser.controlUrl` unless `controlUrl` is passed explicitly.
|
- Uses `browser.controlUrl` unless `controlUrl` is passed explicitly.
|
||||||
- `act` supports CSS selectors for `click`/`type` via `selector` (use `ref` for AI snapshot targets).
|
- `snapshot` defaults to `ai`; use `aria` for the accessibility tree.
|
||||||
|
- `act` requires `ref` from `snapshot --format ai`; use `evaluate` for rare CSS selector needs.
|
||||||
|
|
||||||
### `clawdis_canvas`
|
### `clawdis_canvas`
|
||||||
Drive the node Canvas (present, eval, snapshot, A2UI).
|
Drive the node Canvas (present, eval, snapshot, A2UI).
|
||||||
|
|||||||
@@ -341,40 +341,74 @@ async function resolveNodeId(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const BrowserActSchema = Type.Object({
|
const BrowserActSchema = Type.Union([
|
||||||
kind: Type.Union([
|
Type.Object({
|
||||||
Type.Literal("click"),
|
kind: Type.Literal("click"),
|
||||||
Type.Literal("type"),
|
ref: Type.String(),
|
||||||
Type.Literal("press"),
|
targetId: Type.Optional(Type.String()),
|
||||||
Type.Literal("hover"),
|
doubleClick: Type.Optional(Type.Boolean()),
|
||||||
Type.Literal("drag"),
|
button: Type.Optional(Type.String()),
|
||||||
Type.Literal("select"),
|
modifiers: Type.Optional(Type.Array(Type.String())),
|
||||||
Type.Literal("fill"),
|
}),
|
||||||
Type.Literal("resize"),
|
Type.Object({
|
||||||
Type.Literal("wait"),
|
kind: Type.Literal("type"),
|
||||||
Type.Literal("evaluate"),
|
ref: Type.String(),
|
||||||
Type.Literal("close"),
|
text: Type.String(),
|
||||||
]),
|
targetId: Type.Optional(Type.String()),
|
||||||
ref: Type.Optional(Type.String()),
|
submit: Type.Optional(Type.Boolean()),
|
||||||
selector: Type.Optional(Type.String()),
|
slowly: Type.Optional(Type.Boolean()),
|
||||||
targetId: Type.Optional(Type.String()),
|
}),
|
||||||
doubleClick: Type.Optional(Type.Boolean()),
|
Type.Object({
|
||||||
button: Type.Optional(Type.String()),
|
kind: Type.Literal("press"),
|
||||||
modifiers: Type.Optional(Type.Array(Type.String())),
|
key: Type.String(),
|
||||||
text: Type.Optional(Type.String()),
|
targetId: Type.Optional(Type.String()),
|
||||||
submit: Type.Optional(Type.Boolean()),
|
}),
|
||||||
slowly: Type.Optional(Type.Boolean()),
|
Type.Object({
|
||||||
key: Type.Optional(Type.String()),
|
kind: Type.Literal("hover"),
|
||||||
startRef: Type.Optional(Type.String()),
|
ref: Type.String(),
|
||||||
endRef: Type.Optional(Type.String()),
|
targetId: Type.Optional(Type.String()),
|
||||||
values: Type.Optional(Type.Array(Type.String())),
|
}),
|
||||||
fields: Type.Optional(Type.Array(Type.Record(Type.String(), Type.Unknown()))),
|
Type.Object({
|
||||||
width: Type.Optional(Type.Number()),
|
kind: Type.Literal("drag"),
|
||||||
height: Type.Optional(Type.Number()),
|
startRef: Type.String(),
|
||||||
timeMs: Type.Optional(Type.Number()),
|
endRef: Type.String(),
|
||||||
textGone: Type.Optional(Type.String()),
|
targetId: Type.Optional(Type.String()),
|
||||||
fn: Type.Optional(Type.String()),
|
}),
|
||||||
});
|
Type.Object({
|
||||||
|
kind: Type.Literal("select"),
|
||||||
|
ref: Type.String(),
|
||||||
|
values: Type.Array(Type.String()),
|
||||||
|
targetId: Type.Optional(Type.String()),
|
||||||
|
}),
|
||||||
|
Type.Object({
|
||||||
|
kind: Type.Literal("fill"),
|
||||||
|
fields: Type.Array(Type.Record(Type.String(), Type.Unknown())),
|
||||||
|
targetId: Type.Optional(Type.String()),
|
||||||
|
}),
|
||||||
|
Type.Object({
|
||||||
|
kind: Type.Literal("resize"),
|
||||||
|
width: Type.Number(),
|
||||||
|
height: Type.Number(),
|
||||||
|
targetId: Type.Optional(Type.String()),
|
||||||
|
}),
|
||||||
|
Type.Object({
|
||||||
|
kind: Type.Literal("wait"),
|
||||||
|
timeMs: Type.Optional(Type.Number()),
|
||||||
|
text: Type.Optional(Type.String()),
|
||||||
|
textGone: Type.Optional(Type.String()),
|
||||||
|
targetId: Type.Optional(Type.String()),
|
||||||
|
}),
|
||||||
|
Type.Object({
|
||||||
|
kind: Type.Literal("evaluate"),
|
||||||
|
fn: Type.String(),
|
||||||
|
ref: Type.Optional(Type.String()),
|
||||||
|
targetId: Type.Optional(Type.String()),
|
||||||
|
}),
|
||||||
|
Type.Object({
|
||||||
|
kind: Type.Literal("close"),
|
||||||
|
targetId: Type.Optional(Type.String()),
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
|
||||||
const BrowserToolSchema = Type.Union([
|
const BrowserToolSchema = Type.Union([
|
||||||
Type.Object({
|
Type.Object({
|
||||||
@@ -514,7 +548,7 @@ function createBrowserTool(): AnyAgentTool {
|
|||||||
const format =
|
const format =
|
||||||
params.format === "ai" || params.format === "aria"
|
params.format === "ai" || params.format === "aria"
|
||||||
? (params.format as "ai" | "aria")
|
? (params.format as "ai" | "aria")
|
||||||
: "aria";
|
: "ai";
|
||||||
const targetId =
|
const targetId =
|
||||||
typeof params.targetId === "string"
|
typeof params.targetId === "string"
|
||||||
? params.targetId.trim()
|
? params.targetId.trim()
|
||||||
|
|||||||
@@ -14,8 +14,7 @@ export type BrowserFormField = {
|
|||||||
export type BrowserActRequest =
|
export type BrowserActRequest =
|
||||||
| {
|
| {
|
||||||
kind: "click";
|
kind: "click";
|
||||||
ref?: string;
|
ref: string;
|
||||||
selector?: string;
|
|
||||||
targetId?: string;
|
targetId?: string;
|
||||||
doubleClick?: boolean;
|
doubleClick?: boolean;
|
||||||
button?: string;
|
button?: string;
|
||||||
@@ -23,8 +22,7 @@ export type BrowserActRequest =
|
|||||||
}
|
}
|
||||||
| {
|
| {
|
||||||
kind: "type";
|
kind: "type";
|
||||||
ref?: string;
|
ref: string;
|
||||||
selector?: string;
|
|
||||||
text: string;
|
text: string;
|
||||||
targetId?: string;
|
targetId?: string;
|
||||||
submit?: boolean;
|
submit?: boolean;
|
||||||
|
|||||||
@@ -111,45 +111,6 @@ describe("pw-ai", () => {
|
|||||||
expect(p1.click).toHaveBeenCalledTimes(1);
|
expect(p1.click).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("clicks a css selector when provided", async () => {
|
|
||||||
const { chromium } = await import("playwright-core");
|
|
||||||
const p1 = createPage({ targetId: "T1" });
|
|
||||||
const browser = createBrowser([p1.page]);
|
|
||||||
(
|
|
||||||
chromium.connectOverCDP as unknown as ReturnType<typeof vi.fn>
|
|
||||||
).mockResolvedValue(browser);
|
|
||||||
|
|
||||||
const mod = await importModule();
|
|
||||||
await mod.clickViaPlaywright({
|
|
||||||
cdpPort: 18792,
|
|
||||||
targetId: "T1",
|
|
||||||
selector: "button.save",
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(p1.locator).toHaveBeenCalledWith("button.save");
|
|
||||||
expect(p1.click).toHaveBeenCalledTimes(1);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("types via css selector when provided", async () => {
|
|
||||||
const { chromium } = await import("playwright-core");
|
|
||||||
const p1 = createPage({ targetId: "T1" });
|
|
||||||
const browser = createBrowser([p1.page]);
|
|
||||||
(
|
|
||||||
chromium.connectOverCDP as unknown as ReturnType<typeof vi.fn>
|
|
||||||
).mockResolvedValue(browser);
|
|
||||||
|
|
||||||
const mod = await importModule();
|
|
||||||
await mod.typeViaPlaywright({
|
|
||||||
cdpPort: 18792,
|
|
||||||
targetId: "T1",
|
|
||||||
selector: "input[name=q]",
|
|
||||||
text: "hello",
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(p1.locator).toHaveBeenCalledWith("input[name=q]");
|
|
||||||
expect(p1.fill).toHaveBeenCalledTimes(1);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("fails with a clear error when _snapshotForAI is missing", async () => {
|
it("fails with a clear error when _snapshotForAI is missing", async () => {
|
||||||
const { chromium } = await import("playwright-core");
|
const { chromium } = await import("playwright-core");
|
||||||
const p1 = createPage({ targetId: "T1", hasSnapshotForAI: false });
|
const p1 = createPage({ targetId: "T1", hasSnapshotForAI: false });
|
||||||
|
|||||||
@@ -10,18 +10,10 @@ import {
|
|||||||
let nextUploadArmId = 0;
|
let nextUploadArmId = 0;
|
||||||
let nextDialogArmId = 0;
|
let nextDialogArmId = 0;
|
||||||
|
|
||||||
type LocatorPage = Parameters<typeof refLocator>[0];
|
function requireRef(value: unknown): string {
|
||||||
|
const ref = typeof value === "string" ? value.trim() : "";
|
||||||
function resolveLocator(
|
if (!ref) throw new Error("ref is required");
|
||||||
page: LocatorPage,
|
return ref;
|
||||||
opts: { ref?: string; selector?: string },
|
|
||||||
) {
|
|
||||||
const selector =
|
|
||||||
typeof opts.selector === "string" ? opts.selector.trim() : "";
|
|
||||||
if (selector) return page.locator(selector);
|
|
||||||
const ref = typeof opts.ref === "string" ? opts.ref.trim() : "";
|
|
||||||
if (ref) return refLocator(page, ref);
|
|
||||||
throw new Error("ref or selector is required");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function snapshotAiViaPlaywright(opts: {
|
export async function snapshotAiViaPlaywright(opts: {
|
||||||
@@ -55,8 +47,7 @@ export async function snapshotAiViaPlaywright(opts: {
|
|||||||
export async function clickViaPlaywright(opts: {
|
export async function clickViaPlaywright(opts: {
|
||||||
cdpPort: number;
|
cdpPort: number;
|
||||||
targetId?: string;
|
targetId?: string;
|
||||||
ref?: string;
|
ref: string;
|
||||||
selector?: string;
|
|
||||||
doubleClick?: boolean;
|
doubleClick?: boolean;
|
||||||
button?: "left" | "right" | "middle";
|
button?: "left" | "right" | "middle";
|
||||||
modifiers?: Array<"Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift">;
|
modifiers?: Array<"Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift">;
|
||||||
@@ -67,10 +58,7 @@ export async function clickViaPlaywright(opts: {
|
|||||||
targetId: opts.targetId,
|
targetId: opts.targetId,
|
||||||
});
|
});
|
||||||
ensurePageState(page);
|
ensurePageState(page);
|
||||||
const locator = resolveLocator(page, {
|
const locator = refLocator(page, requireRef(opts.ref));
|
||||||
ref: opts.ref,
|
|
||||||
selector: opts.selector,
|
|
||||||
});
|
|
||||||
const timeout = Math.max(
|
const timeout = Math.max(
|
||||||
500,
|
500,
|
||||||
Math.min(60_000, Math.floor(opts.timeoutMs ?? 8000)),
|
Math.min(60_000, Math.floor(opts.timeoutMs ?? 8000)),
|
||||||
@@ -157,8 +145,7 @@ export async function pressKeyViaPlaywright(opts: {
|
|||||||
export async function typeViaPlaywright(opts: {
|
export async function typeViaPlaywright(opts: {
|
||||||
cdpPort: number;
|
cdpPort: number;
|
||||||
targetId?: string;
|
targetId?: string;
|
||||||
ref?: string;
|
ref: string;
|
||||||
selector?: string;
|
|
||||||
text: string;
|
text: string;
|
||||||
submit?: boolean;
|
submit?: boolean;
|
||||||
slowly?: boolean;
|
slowly?: boolean;
|
||||||
@@ -167,10 +154,7 @@ export async function typeViaPlaywright(opts: {
|
|||||||
const text = String(opts.text ?? "");
|
const text = String(opts.text ?? "");
|
||||||
const page = await getPageForTargetId(opts);
|
const page = await getPageForTargetId(opts);
|
||||||
ensurePageState(page);
|
ensurePageState(page);
|
||||||
const locator = resolveLocator(page, {
|
const locator = refLocator(page, requireRef(opts.ref));
|
||||||
ref: opts.ref,
|
|
||||||
selector: opts.selector,
|
|
||||||
});
|
|
||||||
const timeout = Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000));
|
const timeout = Math.max(500, Math.min(60_000, opts.timeoutMs ?? 8000));
|
||||||
if (opts.slowly) {
|
if (opts.slowly) {
|
||||||
await locator.click({ timeout });
|
await locator.click({ timeout });
|
||||||
|
|||||||
@@ -35,6 +35,16 @@ type ActKind =
|
|||||||
type ClickButton = "left" | "right" | "middle";
|
type ClickButton = "left" | "right" | "middle";
|
||||||
type ClickModifier = "Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift";
|
type ClickModifier = "Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift";
|
||||||
|
|
||||||
|
const SELECTOR_UNSUPPORTED_MESSAGE = [
|
||||||
|
"Error: 'selector' is not supported. Use 'ref' from snapshot instead.",
|
||||||
|
"",
|
||||||
|
"Example workflow:",
|
||||||
|
"1. snapshot action to get page state with refs",
|
||||||
|
'2. act with ref: "e123" to interact with element',
|
||||||
|
"",
|
||||||
|
"This is more reliable for modern SPAs.",
|
||||||
|
].join("\n");
|
||||||
|
|
||||||
function readBody(req: express.Request): Record<string, unknown> {
|
function readBody(req: express.Request): Record<string, unknown> {
|
||||||
const body = req.body as Record<string, unknown> | undefined;
|
const body = req.body as Record<string, unknown> | undefined;
|
||||||
if (!body || typeof body !== "object" || Array.isArray(body)) return {};
|
if (!body || typeof body !== "object" || Array.isArray(body)) return {};
|
||||||
@@ -113,6 +123,9 @@ export function registerBrowserAgentRoutes(
|
|||||||
const body = readBody(req);
|
const body = readBody(req);
|
||||||
const kind = toStringOrEmpty(body.kind) as ActKind;
|
const kind = toStringOrEmpty(body.kind) as ActKind;
|
||||||
const targetId = toStringOrEmpty(body.targetId) || undefined;
|
const targetId = toStringOrEmpty(body.targetId) || undefined;
|
||||||
|
if (Object.prototype.hasOwnProperty.call(body, "selector")) {
|
||||||
|
return jsonError(res, 400, SELECTOR_UNSUPPORTED_MESSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
if (
|
if (
|
||||||
kind !== "click" &&
|
kind !== "click" &&
|
||||||
@@ -139,9 +152,7 @@ export function registerBrowserAgentRoutes(
|
|||||||
switch (kind) {
|
switch (kind) {
|
||||||
case "click": {
|
case "click": {
|
||||||
const ref = toStringOrEmpty(body.ref);
|
const ref = toStringOrEmpty(body.ref);
|
||||||
const selector = toStringOrEmpty(body.selector);
|
if (!ref) return jsonError(res, 400, "ref is required");
|
||||||
if (!ref && !selector)
|
|
||||||
return jsonError(res, 400, "ref or selector is required");
|
|
||||||
const doubleClick = toBoolean(body.doubleClick) ?? false;
|
const doubleClick = toBoolean(body.doubleClick) ?? false;
|
||||||
const buttonRaw = toStringOrEmpty(body.button) || "";
|
const buttonRaw = toStringOrEmpty(body.button) || "";
|
||||||
const button = buttonRaw ? parseClickButton(buttonRaw) : undefined;
|
const button = buttonRaw ? parseClickButton(buttonRaw) : undefined;
|
||||||
@@ -171,10 +182,9 @@ export function registerBrowserAgentRoutes(
|
|||||||
const clickRequest: Parameters<typeof pw.clickViaPlaywright>[0] = {
|
const clickRequest: Parameters<typeof pw.clickViaPlaywright>[0] = {
|
||||||
cdpPort,
|
cdpPort,
|
||||||
targetId: tab.targetId,
|
targetId: tab.targetId,
|
||||||
|
ref,
|
||||||
doubleClick,
|
doubleClick,
|
||||||
};
|
};
|
||||||
if (ref) clickRequest.ref = ref;
|
|
||||||
if (selector) clickRequest.selector = selector;
|
|
||||||
if (button) clickRequest.button = button;
|
if (button) clickRequest.button = button;
|
||||||
if (modifiers) clickRequest.modifiers = modifiers;
|
if (modifiers) clickRequest.modifiers = modifiers;
|
||||||
await pw.clickViaPlaywright(clickRequest);
|
await pw.clickViaPlaywright(clickRequest);
|
||||||
@@ -182,9 +192,7 @@ export function registerBrowserAgentRoutes(
|
|||||||
}
|
}
|
||||||
case "type": {
|
case "type": {
|
||||||
const ref = toStringOrEmpty(body.ref);
|
const ref = toStringOrEmpty(body.ref);
|
||||||
const selector = toStringOrEmpty(body.selector);
|
if (!ref) return jsonError(res, 400, "ref is required");
|
||||||
if (!ref && !selector)
|
|
||||||
return jsonError(res, 400, "ref or selector is required");
|
|
||||||
if (typeof body.text !== "string")
|
if (typeof body.text !== "string")
|
||||||
return jsonError(res, 400, "text is required");
|
return jsonError(res, 400, "text is required");
|
||||||
const text = body.text;
|
const text = body.text;
|
||||||
@@ -193,12 +201,11 @@ export function registerBrowserAgentRoutes(
|
|||||||
const typeRequest: Parameters<typeof pw.typeViaPlaywright>[0] = {
|
const typeRequest: Parameters<typeof pw.typeViaPlaywright>[0] = {
|
||||||
cdpPort,
|
cdpPort,
|
||||||
targetId: tab.targetId,
|
targetId: tab.targetId,
|
||||||
|
ref,
|
||||||
text,
|
text,
|
||||||
submit,
|
submit,
|
||||||
slowly,
|
slowly,
|
||||||
};
|
};
|
||||||
if (ref) typeRequest.ref = ref;
|
|
||||||
if (selector) typeRequest.selector = selector;
|
|
||||||
await pw.typeViaPlaywright(typeRequest);
|
await pw.typeViaPlaywright(typeRequest);
|
||||||
return res.json({ ok: true, targetId: tab.targetId });
|
return res.json({ ok: true, targetId: tab.targetId });
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -327,21 +327,17 @@ describe("browser control server", () => {
|
|||||||
modifiers: ["Shift"],
|
modifiers: ["Shift"],
|
||||||
});
|
});
|
||||||
|
|
||||||
const clickSelector = (await realFetch(`${base}/act`, {
|
const clickSelector = await realFetch(`${base}/act`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "Content-Type": "application/json" },
|
headers: { "Content-Type": "application/json" },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
kind: "click",
|
kind: "click",
|
||||||
selector: "button.save",
|
selector: "button.save",
|
||||||
}),
|
}),
|
||||||
}).then((r) => r.json())) as { ok: boolean };
|
|
||||||
expect(clickSelector.ok).toBe(true);
|
|
||||||
expect(pwMocks.clickViaPlaywright).toHaveBeenNthCalledWith(2, {
|
|
||||||
cdpPort: testPort + 1,
|
|
||||||
targetId: "abcd1234",
|
|
||||||
selector: "button.save",
|
|
||||||
doubleClick: false,
|
|
||||||
});
|
});
|
||||||
|
expect(clickSelector.status).toBe(400);
|
||||||
|
const clickSelectorBody = (await clickSelector.json()) as { error?: string };
|
||||||
|
expect(clickSelectorBody.error).toMatch(/selector is not supported/i);
|
||||||
|
|
||||||
const type = (await realFetch(`${base}/act`, {
|
const type = (await realFetch(`${base}/act`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
@@ -358,26 +354,6 @@ describe("browser control server", () => {
|
|||||||
slowly: false,
|
slowly: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
const typeSelector = (await realFetch(`${base}/act`, {
|
|
||||||
method: "POST",
|
|
||||||
headers: { "Content-Type": "application/json" },
|
|
||||||
body: JSON.stringify({
|
|
||||||
kind: "type",
|
|
||||||
selector: "input[name=q]",
|
|
||||||
text: "hello",
|
|
||||||
submit: true,
|
|
||||||
}),
|
|
||||||
}).then((r) => r.json())) as { ok: boolean };
|
|
||||||
expect(typeSelector.ok).toBe(true);
|
|
||||||
expect(pwMocks.typeViaPlaywright).toHaveBeenNthCalledWith(2, {
|
|
||||||
cdpPort: testPort + 1,
|
|
||||||
targetId: "abcd1234",
|
|
||||||
selector: "input[name=q]",
|
|
||||||
text: "hello",
|
|
||||||
submit: true,
|
|
||||||
slowly: false,
|
|
||||||
});
|
|
||||||
|
|
||||||
const press = (await realFetch(`${base}/act`, {
|
const press = (await realFetch(`${base}/act`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "Content-Type": "application/json" },
|
headers: { "Content-Type": "application/json" },
|
||||||
|
|||||||
@@ -114,9 +114,8 @@ export function registerBrowserActionInputCommands(
|
|||||||
|
|
||||||
browser
|
browser
|
||||||
.command("click")
|
.command("click")
|
||||||
.description("Click an element by ai ref or CSS selector")
|
.description("Click an element by ref from snapshot")
|
||||||
.argument("[ref]", "Ref id from ai snapshot")
|
.argument("<ref>", "Ref id from ai snapshot")
|
||||||
.option("--selector <css>", "CSS selector (instead of ref)")
|
|
||||||
.option("--target-id <id>", "CDP target id (or unique prefix)")
|
.option("--target-id <id>", "CDP target id (or unique prefix)")
|
||||||
.option("--double", "Double click", false)
|
.option("--double", "Double click", false)
|
||||||
.option("--button <left|right|middle>", "Mouse button to use")
|
.option("--button <left|right|middle>", "Mouse button to use")
|
||||||
@@ -124,11 +123,9 @@ export function registerBrowserActionInputCommands(
|
|||||||
.action(async (ref: string | undefined, opts, cmd) => {
|
.action(async (ref: string | undefined, opts, cmd) => {
|
||||||
const parent = parentOpts(cmd);
|
const parent = parentOpts(cmd);
|
||||||
const baseUrl = resolveBrowserControlUrl(parent?.url);
|
const baseUrl = resolveBrowserControlUrl(parent?.url);
|
||||||
const selector =
|
|
||||||
typeof opts.selector === "string" ? opts.selector.trim() : "";
|
|
||||||
const refValue = typeof ref === "string" ? ref.trim() : "";
|
const refValue = typeof ref === "string" ? ref.trim() : "";
|
||||||
if (!selector && !refValue) {
|
if (!refValue) {
|
||||||
defaultRuntime.error(danger("ref or --selector is required"));
|
defaultRuntime.error(danger("ref is required"));
|
||||||
defaultRuntime.exit(1);
|
defaultRuntime.exit(1);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -141,8 +138,7 @@ export function registerBrowserActionInputCommands(
|
|||||||
try {
|
try {
|
||||||
const result = await browserAct(baseUrl, {
|
const result = await browserAct(baseUrl, {
|
||||||
kind: "click",
|
kind: "click",
|
||||||
ref: refValue || undefined,
|
ref: refValue,
|
||||||
selector: selector || undefined,
|
|
||||||
targetId: opts.targetId?.trim() || undefined,
|
targetId: opts.targetId?.trim() || undefined,
|
||||||
doubleClick: Boolean(opts.double),
|
doubleClick: Boolean(opts.double),
|
||||||
button: opts.button?.trim() || undefined,
|
button: opts.button?.trim() || undefined,
|
||||||
@@ -153,11 +149,7 @@ export function registerBrowserActionInputCommands(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const suffix = result.url ? ` on ${result.url}` : "";
|
const suffix = result.url ? ` on ${result.url}` : "";
|
||||||
if (selector) {
|
defaultRuntime.log(`clicked ref ${refValue}${suffix}`);
|
||||||
defaultRuntime.log(`clicked ${selector}${suffix}`);
|
|
||||||
} else {
|
|
||||||
defaultRuntime.log(`clicked ref ${refValue}${suffix}`);
|
|
||||||
}
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
defaultRuntime.error(danger(String(err)));
|
defaultRuntime.error(danger(String(err)));
|
||||||
defaultRuntime.exit(1);
|
defaultRuntime.exit(1);
|
||||||
@@ -166,29 +158,25 @@ export function registerBrowserActionInputCommands(
|
|||||||
|
|
||||||
browser
|
browser
|
||||||
.command("type")
|
.command("type")
|
||||||
.description("Type into an element by ai ref or CSS selector")
|
.description("Type into an element by ref from snapshot")
|
||||||
.argument("[ref]", "Ref id from ai snapshot")
|
.argument("<ref>", "Ref id from ai snapshot")
|
||||||
.argument("<text>", "Text to type")
|
.argument("<text>", "Text to type")
|
||||||
.option("--selector <css>", "CSS selector (instead of ref)")
|
|
||||||
.option("--submit", "Press Enter after typing", false)
|
.option("--submit", "Press Enter after typing", false)
|
||||||
.option("--slowly", "Type slowly (human-like)", false)
|
.option("--slowly", "Type slowly (human-like)", false)
|
||||||
.option("--target-id <id>", "CDP target id (or unique prefix)")
|
.option("--target-id <id>", "CDP target id (or unique prefix)")
|
||||||
.action(async (ref: string | undefined, text: string, opts, cmd) => {
|
.action(async (ref: string | undefined, text: string, opts, cmd) => {
|
||||||
const parent = parentOpts(cmd);
|
const parent = parentOpts(cmd);
|
||||||
const baseUrl = resolveBrowserControlUrl(parent?.url);
|
const baseUrl = resolveBrowserControlUrl(parent?.url);
|
||||||
const selector =
|
|
||||||
typeof opts.selector === "string" ? opts.selector.trim() : "";
|
|
||||||
const refValue = typeof ref === "string" ? ref.trim() : "";
|
const refValue = typeof ref === "string" ? ref.trim() : "";
|
||||||
if (!selector && !refValue) {
|
if (!refValue) {
|
||||||
defaultRuntime.error(danger("ref or --selector is required"));
|
defaultRuntime.error(danger("ref is required"));
|
||||||
defaultRuntime.exit(1);
|
defaultRuntime.exit(1);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const result = await browserAct(baseUrl, {
|
const result = await browserAct(baseUrl, {
|
||||||
kind: "type",
|
kind: "type",
|
||||||
ref: refValue || undefined,
|
ref: refValue,
|
||||||
selector: selector || undefined,
|
|
||||||
text,
|
text,
|
||||||
submit: Boolean(opts.submit),
|
submit: Boolean(opts.submit),
|
||||||
slowly: Boolean(opts.slowly),
|
slowly: Boolean(opts.slowly),
|
||||||
@@ -198,11 +186,7 @@ export function registerBrowserActionInputCommands(
|
|||||||
defaultRuntime.log(JSON.stringify(result, null, 2));
|
defaultRuntime.log(JSON.stringify(result, null, 2));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (selector) {
|
defaultRuntime.log(`typed into ref ${refValue}`);
|
||||||
defaultRuntime.log(`typed into ${selector}`);
|
|
||||||
} else {
|
|
||||||
defaultRuntime.log(`typed into ref ${refValue}`);
|
|
||||||
}
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
defaultRuntime.error(danger(String(err)));
|
defaultRuntime.error(danger(String(err)));
|
||||||
defaultRuntime.exit(1);
|
defaultRuntime.exit(1);
|
||||||
|
|||||||
@@ -9,17 +9,15 @@ export const browserCoreExamples = [
|
|||||||
"clawdis browser screenshot",
|
"clawdis browser screenshot",
|
||||||
"clawdis browser screenshot --full-page",
|
"clawdis browser screenshot --full-page",
|
||||||
"clawdis browser screenshot --ref 12",
|
"clawdis browser screenshot --ref 12",
|
||||||
|
"clawdis browser snapshot",
|
||||||
"clawdis browser snapshot --format aria --limit 200",
|
"clawdis browser snapshot --format aria --limit 200",
|
||||||
"clawdis browser snapshot --format ai",
|
|
||||||
];
|
];
|
||||||
|
|
||||||
export const browserActionExamples = [
|
export const browserActionExamples = [
|
||||||
"clawdis browser navigate https://example.com",
|
"clawdis browser navigate https://example.com",
|
||||||
"clawdis browser resize 1280 720",
|
"clawdis browser resize 1280 720",
|
||||||
"clawdis browser click 12 --double",
|
"clawdis browser click 12 --double",
|
||||||
"clawdis browser click --selector 'button.save'",
|
|
||||||
'clawdis browser type 23 "hello" --submit',
|
'clawdis browser type 23 "hello" --submit',
|
||||||
'clawdis browser type --selector "input[name=q]" "hello"',
|
|
||||||
"clawdis browser press Enter",
|
"clawdis browser press Enter",
|
||||||
"clawdis browser hover 44",
|
"clawdis browser hover 44",
|
||||||
"clawdis browser drag 10 11",
|
"clawdis browser drag 10 11",
|
||||||
|
|||||||
@@ -45,8 +45,8 @@ export function registerBrowserInspectCommands(
|
|||||||
|
|
||||||
browser
|
browser
|
||||||
.command("snapshot")
|
.command("snapshot")
|
||||||
.description("Capture an AI-friendly snapshot (aria or ai)")
|
.description("Capture a snapshot (default: ai; aria is the accessibility tree)")
|
||||||
.option("--format <aria|ai>", "Snapshot format (default: aria)", "aria")
|
.option("--format <aria|ai>", "Snapshot format (default: ai)", "ai")
|
||||||
.option("--target-id <id>", "CDP target id (or unique prefix)")
|
.option("--target-id <id>", "CDP target id (or unique prefix)")
|
||||||
.option("--limit <n>", "Max nodes (default: 500/800)", (v: string) =>
|
.option("--limit <n>", "Max nodes (default: 500/800)", (v: string) =>
|
||||||
Number(v),
|
Number(v),
|
||||||
@@ -55,7 +55,7 @@ export function registerBrowserInspectCommands(
|
|||||||
.action(async (opts, cmd) => {
|
.action(async (opts, cmd) => {
|
||||||
const parent = parentOpts(cmd);
|
const parent = parentOpts(cmd);
|
||||||
const baseUrl = resolveBrowserControlUrl(parent?.url);
|
const baseUrl = resolveBrowserControlUrl(parent?.url);
|
||||||
const format = opts.format === "ai" ? "ai" : "aria";
|
const format = opts.format === "aria" ? "aria" : "ai";
|
||||||
try {
|
try {
|
||||||
const result = await browserSnapshot(baseUrl, {
|
const result = await browserSnapshot(baseUrl, {
|
||||||
format,
|
format,
|
||||||
|
|||||||
Reference in New Issue
Block a user