fix: allow direct file input uploads
This commit is contained in:
@@ -19,7 +19,7 @@
|
||||
|
||||
### Fixes
|
||||
- Docs/agent tools: clarify that browser `wait` should be avoided by default and used only in exceptional cases.
|
||||
- Browser tools: `upload` can auto-click a ref after arming and now emits input/change events after `setFiles` so sites like X pick up attachments.
|
||||
- Browser tools: `upload` supports auto-click refs, direct `inputRef`/`element` file inputs, and emits input/change after `setFiles` so JS-heavy sites pick up attachments.
|
||||
- macOS: Voice Wake now fully tears down the Speech pipeline when disabled (cancel pending restarts, drop stale callbacks) to avoid high CPU in the background.
|
||||
- macOS menu: add a Talk Mode action alongside the Open Dashboard/Chat/Canvas entries.
|
||||
- macOS Debug: hide “Restart Gateway” when the app won’t start a local gateway (remote mode / attach-only).
|
||||
|
||||
@@ -191,6 +191,7 @@ Actions:
|
||||
Notes:
|
||||
- `upload` and `dialog` are **arming** calls; run them before the click/press that triggers the chooser/dialog.
|
||||
- `upload` can take a `ref` to auto-click after arming (useful for single-step file uploads).
|
||||
- `upload` can also take `inputRef` (aria ref) or `element` (CSS selector) to set `<input type="file">` directly without waiting for a file chooser.
|
||||
- The arm default timeout is **2 minutes** (clamped to max 2 minutes); pass `timeoutMs` if you need shorter.
|
||||
- `snapshot` defaults to `ai`; `aria` returns an accessibility tree for debugging.
|
||||
- `click`/`type` require `ref` from `snapshot --format ai`; use `evaluate` for rare CSS selector one-offs.
|
||||
|
||||
@@ -53,6 +53,7 @@ Notes:
|
||||
- `act` requires `ref` from `snapshot --format ai`; use `evaluate` for rare CSS selector needs.
|
||||
- Avoid `act` → `wait` by default; use it only in exceptional cases (no reliable UI state to wait on).
|
||||
- `upload` can optionally pass a `ref` to auto-click after arming.
|
||||
- `upload` also supports `inputRef` (aria ref) or `element` (CSS selector) to set `<input type="file">` directly.
|
||||
|
||||
### `clawdis_canvas`
|
||||
Drive the node Canvas (present, eval, snapshot, A2UI).
|
||||
|
||||
@@ -484,6 +484,8 @@ const BrowserToolSchema = Type.Union([
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
paths: Type.Array(Type.String()),
|
||||
ref: Type.Optional(Type.String()),
|
||||
inputRef: Type.Optional(Type.String()),
|
||||
element: Type.Optional(Type.String()),
|
||||
targetId: Type.Optional(Type.String()),
|
||||
timeoutMs: Type.Optional(Type.Number()),
|
||||
}),
|
||||
@@ -627,6 +629,8 @@ function createBrowserTool(): AnyAgentTool {
|
||||
: [];
|
||||
if (paths.length === 0) throw new Error("paths required");
|
||||
const ref = readStringParam(params, "ref");
|
||||
const inputRef = readStringParam(params, "inputRef");
|
||||
const element = readStringParam(params, "element");
|
||||
const targetId =
|
||||
typeof params.targetId === "string"
|
||||
? params.targetId.trim()
|
||||
@@ -640,6 +644,8 @@ function createBrowserTool(): AnyAgentTool {
|
||||
await browserArmFileChooser(baseUrl, {
|
||||
paths,
|
||||
ref,
|
||||
inputRef,
|
||||
element,
|
||||
targetId,
|
||||
timeoutMs,
|
||||
}),
|
||||
|
||||
@@ -94,6 +94,8 @@ export async function browserArmFileChooser(
|
||||
opts: {
|
||||
paths: string[];
|
||||
ref?: string;
|
||||
inputRef?: string;
|
||||
element?: string;
|
||||
targetId?: string;
|
||||
timeoutMs?: number;
|
||||
},
|
||||
@@ -106,6 +108,8 @@ export async function browserArmFileChooser(
|
||||
body: JSON.stringify({
|
||||
paths: opts.paths,
|
||||
ref: opts.ref,
|
||||
inputRef: opts.inputRef,
|
||||
element: opts.element,
|
||||
targetId: opts.targetId,
|
||||
timeoutMs: opts.timeoutMs,
|
||||
}),
|
||||
|
||||
@@ -22,6 +22,7 @@ export {
|
||||
pressKeyViaPlaywright,
|
||||
resizeViewportViaPlaywright,
|
||||
selectOptionViaPlaywright,
|
||||
setInputFilesViaPlaywright,
|
||||
snapshotAiViaPlaywright,
|
||||
takeScreenshotViaPlaywright,
|
||||
typeViaPlaywright,
|
||||
|
||||
@@ -303,6 +303,44 @@ export async function armFileUploadViaPlaywright(opts: {
|
||||
});
|
||||
}
|
||||
|
||||
export async function setInputFilesViaPlaywright(opts: {
|
||||
cdpPort: number;
|
||||
targetId?: string;
|
||||
inputRef?: string;
|
||||
element?: string;
|
||||
paths: string[];
|
||||
}): Promise<void> {
|
||||
const page = await getPageForTargetId(opts);
|
||||
ensurePageState(page);
|
||||
if (!opts.paths.length) throw new Error("paths are required");
|
||||
const inputRef =
|
||||
typeof opts.inputRef === "string" ? opts.inputRef.trim() : "";
|
||||
const element = typeof opts.element === "string" ? opts.element.trim() : "";
|
||||
if (inputRef && element) {
|
||||
throw new Error("inputRef and element are mutually exclusive");
|
||||
}
|
||||
if (!inputRef && !element) {
|
||||
throw new Error("inputRef or element is required");
|
||||
}
|
||||
|
||||
const locator = inputRef
|
||||
? refLocator(page, inputRef)
|
||||
: page.locator(element).first();
|
||||
|
||||
await locator.setInputFiles(opts.paths);
|
||||
try {
|
||||
const handle = await locator.elementHandle();
|
||||
if (handle) {
|
||||
await handle.evaluate((el) => {
|
||||
el.dispatchEvent(new Event("input", { bubbles: true }));
|
||||
el.dispatchEvent(new Event("change", { bubbles: true }));
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
// Best-effort for sites that don't react to setInputFiles alone.
|
||||
}
|
||||
}
|
||||
|
||||
export async function armDialogViaPlaywright(opts: {
|
||||
cdpPort: number;
|
||||
targetId?: string;
|
||||
|
||||
@@ -339,6 +339,8 @@ export function registerBrowserAgentRoutes(
|
||||
const body = readBody(req);
|
||||
const targetId = toStringOrEmpty(body.targetId) || undefined;
|
||||
const ref = toStringOrEmpty(body.ref) || undefined;
|
||||
const inputRef = toStringOrEmpty(body.inputRef) || undefined;
|
||||
const element = toStringOrEmpty(body.element) || undefined;
|
||||
const paths = toStringArray(body.paths) ?? [];
|
||||
const timeoutMs = toNumber(body.timeoutMs);
|
||||
if (!paths.length) return jsonError(res, 400, "paths are required");
|
||||
@@ -346,18 +348,35 @@ export function registerBrowserAgentRoutes(
|
||||
const tab = await ctx.ensureTabAvailable(targetId);
|
||||
const pw = await requirePwAi(res, "file chooser hook");
|
||||
if (!pw) return;
|
||||
await pw.armFileUploadViaPlaywright({
|
||||
cdpPort: ctx.state().cdpPort,
|
||||
targetId: tab.targetId,
|
||||
paths,
|
||||
timeoutMs: timeoutMs ?? undefined,
|
||||
});
|
||||
if (ref) {
|
||||
await pw.clickViaPlaywright({
|
||||
if (inputRef || element) {
|
||||
if (ref) {
|
||||
return jsonError(
|
||||
res,
|
||||
400,
|
||||
"ref cannot be combined with inputRef/element",
|
||||
);
|
||||
}
|
||||
await pw.setInputFilesViaPlaywright({
|
||||
cdpPort: ctx.state().cdpPort,
|
||||
targetId: tab.targetId,
|
||||
ref,
|
||||
inputRef,
|
||||
element,
|
||||
paths,
|
||||
});
|
||||
} else {
|
||||
await pw.armFileUploadViaPlaywright({
|
||||
cdpPort: ctx.state().cdpPort,
|
||||
targetId: tab.targetId,
|
||||
paths,
|
||||
timeoutMs: timeoutMs ?? undefined,
|
||||
});
|
||||
if (ref) {
|
||||
await pw.clickViaPlaywright({
|
||||
cdpPort: ctx.state().cdpPort,
|
||||
targetId: tab.targetId,
|
||||
ref,
|
||||
});
|
||||
}
|
||||
}
|
||||
res.json({ ok: true });
|
||||
} catch (err) {
|
||||
|
||||
@@ -33,6 +33,7 @@ const pwMocks = vi.hoisted(() => ({
|
||||
pressKeyViaPlaywright: vi.fn(async () => {}),
|
||||
resizeViewportViaPlaywright: vi.fn(async () => {}),
|
||||
selectOptionViaPlaywright: vi.fn(async () => {}),
|
||||
setInputFilesViaPlaywright: vi.fn(async () => {}),
|
||||
snapshotAiViaPlaywright: vi.fn(async () => ({ snapshot: "ok" })),
|
||||
takeScreenshotViaPlaywright: vi.fn(async () => ({
|
||||
buffer: Buffer.from("png"),
|
||||
@@ -493,6 +494,37 @@ describe("browser control server", () => {
|
||||
ref: "e12",
|
||||
});
|
||||
|
||||
const uploadWithInputRef = await realFetch(`${base}/hooks/file-chooser`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ paths: ["/tmp/c.txt"], inputRef: "e99" }),
|
||||
}).then((r) => r.json());
|
||||
expect(uploadWithInputRef).toMatchObject({ ok: true });
|
||||
expect(pwMocks.setInputFilesViaPlaywright).toHaveBeenCalledWith({
|
||||
cdpPort: testPort + 1,
|
||||
targetId: "abcd1234",
|
||||
inputRef: "e99",
|
||||
element: undefined,
|
||||
paths: ["/tmp/c.txt"],
|
||||
});
|
||||
|
||||
const uploadWithElement = await realFetch(`${base}/hooks/file-chooser`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
paths: ["/tmp/d.txt"],
|
||||
element: "input[type=file]",
|
||||
}),
|
||||
}).then((r) => r.json());
|
||||
expect(uploadWithElement).toMatchObject({ ok: true });
|
||||
expect(pwMocks.setInputFilesViaPlaywright).toHaveBeenCalledWith({
|
||||
cdpPort: testPort + 1,
|
||||
targetId: "abcd1234",
|
||||
inputRef: undefined,
|
||||
element: "input[type=file]",
|
||||
paths: ["/tmp/d.txt"],
|
||||
});
|
||||
|
||||
const dialog = await realFetch(`${base}/hooks/dialog`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
|
||||
@@ -302,6 +302,8 @@ export function registerBrowserActionInputCommands(
|
||||
.description("Arm file upload for the next file chooser")
|
||||
.argument("<paths...>", "File paths to upload")
|
||||
.option("--ref <ref>", "Ref id from ai snapshot to click after arming")
|
||||
.option("--input-ref <ref>", "Ref id for <input type=file> to set directly")
|
||||
.option("--element <selector>", "CSS selector for <input type=file>")
|
||||
.option("--target-id <id>", "CDP target id (or unique prefix)")
|
||||
.option(
|
||||
"--timeout-ms <ms>",
|
||||
@@ -315,6 +317,8 @@ export function registerBrowserActionInputCommands(
|
||||
const result = await browserArmFileChooser(baseUrl, {
|
||||
paths,
|
||||
ref: opts.ref?.trim() || undefined,
|
||||
inputRef: opts.inputRef?.trim() || undefined,
|
||||
element: opts.element?.trim() || undefined,
|
||||
targetId: opts.targetId?.trim() || undefined,
|
||||
timeoutMs: Number.isFinite(opts.timeoutMs)
|
||||
? opts.timeoutMs
|
||||
|
||||
Reference in New Issue
Block a user