diff --git a/CHANGELOG.md b/CHANGELOG.md
index 753a7a42d..09a8b8093 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,7 +19,7 @@
### Fixes
- Docs/agent tools: clarify that browser `wait` should be avoided by default and used only in exceptional cases.
-- Browser tools: `upload` can auto-click a ref after arming and now emits input/change events after `setFiles` so sites like X pick up attachments.
+- Browser tools: `upload` supports auto-click refs, direct `inputRef`/`element` file inputs, and emits input/change after `setFiles` so JS-heavy sites pick up attachments.
- macOS: Voice Wake now fully tears down the Speech pipeline when disabled (cancel pending restarts, drop stale callbacks) to avoid high CPU in the background.
- macOS menu: add a Talk Mode action alongside the Open Dashboard/Chat/Canvas entries.
- macOS Debug: hide “Restart Gateway” when the app won’t start a local gateway (remote mode / attach-only).
diff --git a/docs/browser.md b/docs/browser.md
index 662aa2c01..df49984c3 100644
--- a/docs/browser.md
+++ b/docs/browser.md
@@ -191,6 +191,7 @@ Actions:
Notes:
- `upload` and `dialog` are **arming** calls; run them before the click/press that triggers the chooser/dialog.
- `upload` can take a `ref` to auto-click after arming (useful for single-step file uploads).
+- `upload` can also take `inputRef` (aria ref) or `element` (CSS selector) to set `` directly without waiting for a file chooser.
- The arm default timeout is **2 minutes** (clamped to max 2 minutes); pass `timeoutMs` if you need shorter.
- `snapshot` defaults to `ai`; `aria` returns an accessibility tree for debugging.
- `click`/`type` require `ref` from `snapshot --format ai`; use `evaluate` for rare CSS selector one-offs.
diff --git a/docs/tools.md b/docs/tools.md
index 32d41bc01..bd5a6a489 100644
--- a/docs/tools.md
+++ b/docs/tools.md
@@ -53,6 +53,7 @@ Notes:
- `act` requires `ref` from `snapshot --format ai`; use `evaluate` for rare CSS selector needs.
- Avoid `act` → `wait` by default; use it only in exceptional cases (no reliable UI state to wait on).
- `upload` can optionally pass a `ref` to auto-click after arming.
+- `upload` also supports `inputRef` (aria ref) or `element` (CSS selector) to set `` directly.
### `clawdis_canvas`
Drive the node Canvas (present, eval, snapshot, A2UI).
diff --git a/src/agents/clawdis-tools.ts b/src/agents/clawdis-tools.ts
index 6007fadb0..0a273008e 100644
--- a/src/agents/clawdis-tools.ts
+++ b/src/agents/clawdis-tools.ts
@@ -484,6 +484,8 @@ const BrowserToolSchema = Type.Union([
controlUrl: Type.Optional(Type.String()),
paths: Type.Array(Type.String()),
ref: Type.Optional(Type.String()),
+ inputRef: Type.Optional(Type.String()),
+ element: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
timeoutMs: Type.Optional(Type.Number()),
}),
@@ -627,6 +629,8 @@ function createBrowserTool(): AnyAgentTool {
: [];
if (paths.length === 0) throw new Error("paths required");
const ref = readStringParam(params, "ref");
+ const inputRef = readStringParam(params, "inputRef");
+ const element = readStringParam(params, "element");
const targetId =
typeof params.targetId === "string"
? params.targetId.trim()
@@ -640,6 +644,8 @@ function createBrowserTool(): AnyAgentTool {
await browserArmFileChooser(baseUrl, {
paths,
ref,
+ inputRef,
+ element,
targetId,
timeoutMs,
}),
diff --git a/src/browser/client-actions-core.ts b/src/browser/client-actions-core.ts
index 92a3c1940..5c3fd5a66 100644
--- a/src/browser/client-actions-core.ts
+++ b/src/browser/client-actions-core.ts
@@ -94,6 +94,8 @@ export async function browserArmFileChooser(
opts: {
paths: string[];
ref?: string;
+ inputRef?: string;
+ element?: string;
targetId?: string;
timeoutMs?: number;
},
@@ -106,6 +108,8 @@ export async function browserArmFileChooser(
body: JSON.stringify({
paths: opts.paths,
ref: opts.ref,
+ inputRef: opts.inputRef,
+ element: opts.element,
targetId: opts.targetId,
timeoutMs: opts.timeoutMs,
}),
diff --git a/src/browser/pw-ai.ts b/src/browser/pw-ai.ts
index b984b9e5e..fe14e2ce9 100644
--- a/src/browser/pw-ai.ts
+++ b/src/browser/pw-ai.ts
@@ -22,6 +22,7 @@ export {
pressKeyViaPlaywright,
resizeViewportViaPlaywright,
selectOptionViaPlaywright,
+ setInputFilesViaPlaywright,
snapshotAiViaPlaywright,
takeScreenshotViaPlaywright,
typeViaPlaywright,
diff --git a/src/browser/pw-tools-core.ts b/src/browser/pw-tools-core.ts
index 4f1d0bc27..99f1507eb 100644
--- a/src/browser/pw-tools-core.ts
+++ b/src/browser/pw-tools-core.ts
@@ -303,6 +303,44 @@ export async function armFileUploadViaPlaywright(opts: {
});
}
+export async function setInputFilesViaPlaywright(opts: {
+ cdpPort: number;
+ targetId?: string;
+ inputRef?: string;
+ element?: string;
+ paths: string[];
+}): Promise {
+ const page = await getPageForTargetId(opts);
+ ensurePageState(page);
+ if (!opts.paths.length) throw new Error("paths are required");
+ const inputRef =
+ typeof opts.inputRef === "string" ? opts.inputRef.trim() : "";
+ const element = typeof opts.element === "string" ? opts.element.trim() : "";
+ if (inputRef && element) {
+ throw new Error("inputRef and element are mutually exclusive");
+ }
+ if (!inputRef && !element) {
+ throw new Error("inputRef or element is required");
+ }
+
+ const locator = inputRef
+ ? refLocator(page, inputRef)
+ : page.locator(element).first();
+
+ await locator.setInputFiles(opts.paths);
+ try {
+ const handle = await locator.elementHandle();
+ if (handle) {
+ await handle.evaluate((el) => {
+ el.dispatchEvent(new Event("input", { bubbles: true }));
+ el.dispatchEvent(new Event("change", { bubbles: true }));
+ });
+ }
+ } catch {
+ // Best-effort for sites that don't react to setInputFiles alone.
+ }
+}
+
export async function armDialogViaPlaywright(opts: {
cdpPort: number;
targetId?: string;
diff --git a/src/browser/routes/agent.ts b/src/browser/routes/agent.ts
index b3c4c8c93..41d32ddc1 100644
--- a/src/browser/routes/agent.ts
+++ b/src/browser/routes/agent.ts
@@ -339,6 +339,8 @@ export function registerBrowserAgentRoutes(
const body = readBody(req);
const targetId = toStringOrEmpty(body.targetId) || undefined;
const ref = toStringOrEmpty(body.ref) || undefined;
+ const inputRef = toStringOrEmpty(body.inputRef) || undefined;
+ const element = toStringOrEmpty(body.element) || undefined;
const paths = toStringArray(body.paths) ?? [];
const timeoutMs = toNumber(body.timeoutMs);
if (!paths.length) return jsonError(res, 400, "paths are required");
@@ -346,18 +348,35 @@ export function registerBrowserAgentRoutes(
const tab = await ctx.ensureTabAvailable(targetId);
const pw = await requirePwAi(res, "file chooser hook");
if (!pw) return;
- await pw.armFileUploadViaPlaywright({
- cdpPort: ctx.state().cdpPort,
- targetId: tab.targetId,
- paths,
- timeoutMs: timeoutMs ?? undefined,
- });
- if (ref) {
- await pw.clickViaPlaywright({
+ if (inputRef || element) {
+ if (ref) {
+ return jsonError(
+ res,
+ 400,
+ "ref cannot be combined with inputRef/element",
+ );
+ }
+ await pw.setInputFilesViaPlaywright({
cdpPort: ctx.state().cdpPort,
targetId: tab.targetId,
- ref,
+ inputRef,
+ element,
+ paths,
});
+ } else {
+ await pw.armFileUploadViaPlaywright({
+ cdpPort: ctx.state().cdpPort,
+ targetId: tab.targetId,
+ paths,
+ timeoutMs: timeoutMs ?? undefined,
+ });
+ if (ref) {
+ await pw.clickViaPlaywright({
+ cdpPort: ctx.state().cdpPort,
+ targetId: tab.targetId,
+ ref,
+ });
+ }
}
res.json({ ok: true });
} catch (err) {
diff --git a/src/browser/server.test.ts b/src/browser/server.test.ts
index 51f83fe9b..0e4a98392 100644
--- a/src/browser/server.test.ts
+++ b/src/browser/server.test.ts
@@ -33,6 +33,7 @@ const pwMocks = vi.hoisted(() => ({
pressKeyViaPlaywright: vi.fn(async () => {}),
resizeViewportViaPlaywright: vi.fn(async () => {}),
selectOptionViaPlaywright: vi.fn(async () => {}),
+ setInputFilesViaPlaywright: vi.fn(async () => {}),
snapshotAiViaPlaywright: vi.fn(async () => ({ snapshot: "ok" })),
takeScreenshotViaPlaywright: vi.fn(async () => ({
buffer: Buffer.from("png"),
@@ -493,6 +494,37 @@ describe("browser control server", () => {
ref: "e12",
});
+ const uploadWithInputRef = await realFetch(`${base}/hooks/file-chooser`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ paths: ["/tmp/c.txt"], inputRef: "e99" }),
+ }).then((r) => r.json());
+ expect(uploadWithInputRef).toMatchObject({ ok: true });
+ expect(pwMocks.setInputFilesViaPlaywright).toHaveBeenCalledWith({
+ cdpPort: testPort + 1,
+ targetId: "abcd1234",
+ inputRef: "e99",
+ element: undefined,
+ paths: ["/tmp/c.txt"],
+ });
+
+ const uploadWithElement = await realFetch(`${base}/hooks/file-chooser`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ paths: ["/tmp/d.txt"],
+ element: "input[type=file]",
+ }),
+ }).then((r) => r.json());
+ expect(uploadWithElement).toMatchObject({ ok: true });
+ expect(pwMocks.setInputFilesViaPlaywright).toHaveBeenCalledWith({
+ cdpPort: testPort + 1,
+ targetId: "abcd1234",
+ inputRef: undefined,
+ element: "input[type=file]",
+ paths: ["/tmp/d.txt"],
+ });
+
const dialog = await realFetch(`${base}/hooks/dialog`, {
method: "POST",
headers: { "Content-Type": "application/json" },
diff --git a/src/cli/browser-cli-actions-input.ts b/src/cli/browser-cli-actions-input.ts
index 5f803ae57..54d8b9590 100644
--- a/src/cli/browser-cli-actions-input.ts
+++ b/src/cli/browser-cli-actions-input.ts
@@ -302,6 +302,8 @@ export function registerBrowserActionInputCommands(
.description("Arm file upload for the next file chooser")
.argument("", "File paths to upload")
.option("--ref [", "Ref id from ai snapshot to click after arming")
+ .option("--input-ref ][", "Ref id for to set directly")
+ .option("--element ", "CSS selector for ")
.option("--target-id ", "CDP target id (or unique prefix)")
.option(
"--timeout-ms ",
@@ -315,6 +317,8 @@ export function registerBrowserActionInputCommands(
const result = await browserArmFileChooser(baseUrl, {
paths,
ref: opts.ref?.trim() || undefined,
+ inputRef: opts.inputRef?.trim() || undefined,
+ element: opts.element?.trim() || undefined,
targetId: opts.targetId?.trim() || undefined,
timeoutMs: Number.isFinite(opts.timeoutMs)
? opts.timeoutMs
]