From bf0bee58b3f29a03381e173beb078d5db77c9ea4 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 1 Jan 2026 09:35:20 +0000 Subject: [PATCH] fix: improve browser upload triggering --- CHANGELOG.md | 1 + docs/browser.md | 1 + docs/tools.md | 1 + src/agents/clawdis-tools.ts | 3 +++ src/browser/client-actions-core.ts | 2 ++ src/browser/pw-tools-core.ts | 14 ++++++++++++++ src/browser/routes/agent.ts | 8 ++++++++ src/browser/server.test.ts | 18 ++++++++++++++++++ src/cli/browser-cli-actions-input.ts | 2 ++ 9 files changed, 50 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e49ecf125..753a7a42d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ ### Fixes - Docs/agent tools: clarify that browser `wait` should be avoided by default and used only in exceptional cases. +- Browser tools: `upload` can auto-click a ref after arming and now emits input/change events after `setFiles` so sites like X pick up attachments. - macOS: Voice Wake now fully tears down the Speech pipeline when disabled (cancel pending restarts, drop stale callbacks) to avoid high CPU in the background. - macOS menu: add a Talk Mode action alongside the Open Dashboard/Chat/Canvas entries. - macOS Debug: hide “Restart Gateway” when the app won’t start a local gateway (remote mode / attach-only). diff --git a/docs/browser.md b/docs/browser.md index dd7d17239..662aa2c01 100644 --- a/docs/browser.md +++ b/docs/browser.md @@ -190,6 +190,7 @@ Actions: Notes: - `upload` and `dialog` are **arming** calls; run them before the click/press that triggers the chooser/dialog. +- `upload` can take a `ref` to auto-click after arming (useful for single-step file uploads). - The arm default timeout is **2 minutes** (clamped to max 2 minutes); pass `timeoutMs` if you need shorter. - `snapshot` defaults to `ai`; `aria` returns an accessibility tree for debugging. - `click`/`type` require `ref` from `snapshot --format ai`; use `evaluate` for rare CSS selector one-offs. diff --git a/docs/tools.md b/docs/tools.md index 083827c8f..32d41bc01 100644 --- a/docs/tools.md +++ b/docs/tools.md @@ -52,6 +52,7 @@ Notes: - `snapshot` defaults to `ai`; use `aria` for the accessibility tree. - `act` requires `ref` from `snapshot --format ai`; use `evaluate` for rare CSS selector needs. - Avoid `act` → `wait` by default; use it only in exceptional cases (no reliable UI state to wait on). +- `upload` can optionally pass a `ref` to auto-click after arming. ### `clawdis_canvas` Drive the node Canvas (present, eval, snapshot, A2UI). diff --git a/src/agents/clawdis-tools.ts b/src/agents/clawdis-tools.ts index 6192226fe..6007fadb0 100644 --- a/src/agents/clawdis-tools.ts +++ b/src/agents/clawdis-tools.ts @@ -483,6 +483,7 @@ const BrowserToolSchema = Type.Union([ action: Type.Literal("upload"), controlUrl: Type.Optional(Type.String()), paths: Type.Array(Type.String()), + ref: Type.Optional(Type.String()), targetId: Type.Optional(Type.String()), timeoutMs: Type.Optional(Type.Number()), }), @@ -625,6 +626,7 @@ function createBrowserTool(): AnyAgentTool { ? params.paths.map((p) => String(p)) : []; if (paths.length === 0) throw new Error("paths required"); + const ref = readStringParam(params, "ref"); const targetId = typeof params.targetId === "string" ? params.targetId.trim() @@ -637,6 +639,7 @@ function createBrowserTool(): AnyAgentTool { return jsonResult( await browserArmFileChooser(baseUrl, { paths, + ref, targetId, timeoutMs, }), diff --git a/src/browser/client-actions-core.ts b/src/browser/client-actions-core.ts index 117167349..92a3c1940 100644 --- a/src/browser/client-actions-core.ts +++ b/src/browser/client-actions-core.ts @@ -93,6 +93,7 @@ export async function browserArmFileChooser( baseUrl: string, opts: { paths: string[]; + ref?: string; targetId?: string; timeoutMs?: number; }, @@ -104,6 +105,7 @@ export async function browserArmFileChooser( headers: { "Content-Type": "application/json" }, body: JSON.stringify({ paths: opts.paths, + ref: opts.ref, targetId: opts.targetId, timeoutMs: opts.timeoutMs, }), diff --git a/src/browser/pw-tools-core.ts b/src/browser/pw-tools-core.ts index f5be4a331..4f1d0bc27 100644 --- a/src/browser/pw-tools-core.ts +++ b/src/browser/pw-tools-core.ts @@ -283,6 +283,20 @@ export async function armFileUploadViaPlaywright(opts: { return; } await fileChooser.setFiles(opts.paths); + try { + const input = + typeof fileChooser.element === "function" + ? await fileChooser.element() + : null; + if (input) { + await input.evaluate((el) => { + el.dispatchEvent(new Event("input", { bubbles: true })); + el.dispatchEvent(new Event("change", { bubbles: true })); + }); + } + } catch { + // Best-effort for sites that don't react to setFiles alone. + } }) .catch(() => { // Ignore timeouts; the chooser may never appear. diff --git a/src/browser/routes/agent.ts b/src/browser/routes/agent.ts index 8c0759735..b3c4c8c93 100644 --- a/src/browser/routes/agent.ts +++ b/src/browser/routes/agent.ts @@ -338,6 +338,7 @@ export function registerBrowserAgentRoutes( app.post("/hooks/file-chooser", async (req, res) => { const body = readBody(req); const targetId = toStringOrEmpty(body.targetId) || undefined; + const ref = toStringOrEmpty(body.ref) || undefined; const paths = toStringArray(body.paths) ?? []; const timeoutMs = toNumber(body.timeoutMs); if (!paths.length) return jsonError(res, 400, "paths are required"); @@ -351,6 +352,13 @@ export function registerBrowserAgentRoutes( paths, timeoutMs: timeoutMs ?? undefined, }); + if (ref) { + await pw.clickViaPlaywright({ + cdpPort: ctx.state().cdpPort, + targetId: tab.targetId, + ref, + }); + } res.json({ ok: true }); } catch (err) { handleRouteError(ctx, res, err); diff --git a/src/browser/server.test.ts b/src/browser/server.test.ts index 6440d3928..51f83fe9b 100644 --- a/src/browser/server.test.ts +++ b/src/browser/server.test.ts @@ -475,6 +475,24 @@ describe("browser control server", () => { timeoutMs: 1234, }); + const uploadWithRef = await realFetch(`${base}/hooks/file-chooser`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ paths: ["/tmp/b.txt"], ref: "e12" }), + }).then((r) => r.json()); + expect(uploadWithRef).toMatchObject({ ok: true }); + expect(pwMocks.armFileUploadViaPlaywright).toHaveBeenCalledWith({ + cdpPort: testPort + 1, + targetId: "abcd1234", + paths: ["/tmp/b.txt"], + timeoutMs: undefined, + }); + expect(pwMocks.clickViaPlaywright).toHaveBeenCalledWith({ + cdpPort: testPort + 1, + targetId: "abcd1234", + ref: "e12", + }); + const dialog = await realFetch(`${base}/hooks/dialog`, { method: "POST", headers: { "Content-Type": "application/json" }, diff --git a/src/cli/browser-cli-actions-input.ts b/src/cli/browser-cli-actions-input.ts index d6f14ac28..5f803ae57 100644 --- a/src/cli/browser-cli-actions-input.ts +++ b/src/cli/browser-cli-actions-input.ts @@ -301,6 +301,7 @@ export function registerBrowserActionInputCommands( .command("upload") .description("Arm file upload for the next file chooser") .argument("", "File paths to upload") + .option("--ref ", "Ref id from ai snapshot to click after arming") .option("--target-id ", "CDP target id (or unique prefix)") .option( "--timeout-ms ", @@ -313,6 +314,7 @@ export function registerBrowserActionInputCommands( try { const result = await browserArmFileChooser(baseUrl, { paths, + ref: opts.ref?.trim() || undefined, targetId: opts.targetId?.trim() || undefined, timeoutMs: Number.isFinite(opts.timeoutMs) ? opts.timeoutMs