From c0c20ebf3e9c7bb4e1aba84b21bfc567980908da Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 22 Dec 2025 23:40:57 +0000 Subject: [PATCH] feat: replace clawdis skills with tools --- docs/tools.md | 101 +++ skills/clawdis-browser/SKILL.md | 33 - skills/clawdis-canvas/SKILL.md | 49 -- skills/clawdis-cron/SKILL.md | 82 -- skills/clawdis-nodes/SKILL.md | 91 --- skills/clawdis-notify/SKILL.md | 60 -- src/agents/clawdis-tools.ts | 1272 +++++++++++++++++++++++++++++ src/agents/pi-embedded-helpers.ts | 2 +- src/agents/pi-tools.ts | 140 +--- src/agents/system-prompt.ts | 4 + src/agents/tool-images.ts | 143 ++++ 11 files changed, 1524 insertions(+), 453 deletions(-) create mode 100644 docs/tools.md delete mode 100644 skills/clawdis-browser/SKILL.md delete mode 100644 skills/clawdis-canvas/SKILL.md delete mode 100644 skills/clawdis-cron/SKILL.md delete mode 100644 skills/clawdis-nodes/SKILL.md delete mode 100644 skills/clawdis-notify/SKILL.md create mode 100644 src/agents/clawdis-tools.ts create mode 100644 src/agents/tool-images.ts diff --git a/docs/tools.md b/docs/tools.md new file mode 100644 index 000000000..283e0dfa5 --- /dev/null +++ b/docs/tools.md @@ -0,0 +1,101 @@ +--- +summary: "Agent tool surface for Clawdis (browser, canvas, nodes, cron) replacing clawdis-* skills" +read_when: + - Adding or modifying agent tools + - Retiring or changing clawdis-* skills +--- + +# Tools (Clawdis) + +Clawdis exposes **first-class agent tools** for browser, canvas, nodes, and cron. +These replace the old `clawdis-*` skills: the tools are typed, no shelling, +and the agent should rely on them directly. + +## Tool inventory + +### `clawdis_browser` +Control the dedicated clawd browser. + +Core actions: +- `status`, `start`, `stop`, `tabs`, `open`, `focus`, `close` +- `snapshot` (aria/ai) +- `screenshot` (returns image block + `MEDIA:`) +- `act` (UI actions: click/type/press/hover/drag/select/fill/resize/wait/evaluate) +- `navigate`, `console`, `pdf`, `upload`, `dialog` + +Notes: +- Requires `browser.enabled=true` in `~/.clawdis/clawdis.json`. +- Uses `browser.controlUrl` unless `controlUrl` is passed explicitly. + +### `clawdis_canvas` +Drive the node Canvas (present, eval, snapshot, A2UI). + +Core actions: +- `present`, `hide`, `navigate`, `eval` +- `snapshot` (returns image block + `MEDIA:`) +- `a2ui_push`, `a2ui_reset` + +Notes: +- Uses gateway `node.invoke` under the hood. +- If no `node` is provided, the tool picks a default (single connected node or local mac node). +- A2UI is v0.8 only (no `createSurface`). + +### `clawdis_nodes` +Discover and target paired nodes; send notifications; capture camera/screen. + +Core actions: +- `status`, `describe` +- `pending`, `approve`, `reject` (pairing) +- `notify` (macOS `system.notify`) +- `camera_snap`, `camera_clip`, `screen_record` + +Notes: +- Camera/screen commands require the node app to be foregrounded. +- Images return image blocks + `MEDIA:`. +- Videos return `FILE:` (mp4). + +### `clawdis_cron` +Manage Gateway cron jobs and wakeups. + +Core actions: +- `status`, `list` +- `add`, `update`, `remove`, `run`, `runs` +- `wake` (enqueue system event + optional immediate heartbeat) + +Notes: +- `add` expects a full cron job object (same schema as `cron.add` RPC). +- `update` uses `{ jobId, patch }`. + +## Parameters (common) + +Gateway-backed tools (`clawdis_canvas`, `clawdis_nodes`, `clawdis_cron`): +- `gatewayUrl` (default `ws://127.0.0.1:18789`) +- `gatewayToken` (if auth enabled) +- `timeoutMs` + +Browser tool: +- `controlUrl` (defaults from config) + +## Recommended agent flows + +Browser automation: +1) `clawdis_browser` → `status` / `start` +2) `snapshot` (ai or aria) +3) `act` (click/type/press) +4) `screenshot` if you need visual confirmation + +Canvas render: +1) `clawdis_canvas` → `present` +2) `a2ui_push` (optional) +3) `snapshot` + +Node targeting: +1) `clawdis_nodes` → `status` +2) `describe` on the chosen node +3) `notify` / `camera_snap` / `screen_record` + +## Safety + +- Avoid `system.run` (not exposed as a tool). +- Respect user consent for camera/screen capture. +- Use `status/describe` to ensure permissions before invoking media commands. diff --git a/skills/clawdis-browser/SKILL.md b/skills/clawdis-browser/SKILL.md deleted file mode 100644 index 912e758b0..000000000 --- a/skills/clawdis-browser/SKILL.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -name: clawdis-browser -description: Control clawd's dedicated browser (tabs, snapshots, actions) via the clawdis CLI. -homepage: https://clawdis.ai -metadata: {"clawdis":{"emoji":"🧭","requires":{"config":["browser.enabled"]}}} ---- - -# Clawdis Browser - -Use the clawd-managed Chrome/Chromium instance through `clawdis browser`. -Only available when `browser.enabled` is true. - -Core flow -- `clawdis browser status` -- `clawdis browser start` (if stopped) -- `clawdis browser tabs` -- `clawdis browser open ` - -Inspection -- `clawdis browser snapshot --format ai|aria [--limit N]` -- `clawdis browser screenshot [--full-page]` - -Actions -- `clawdis browser click ` -- `clawdis browser type "text" --submit` -- `clawdis browser press Enter` -- `clawdis browser navigate ` -- `clawdis browser wait --text "Done"` - -Notes -- This is a dedicated profile; do not use the user's personal browser. -- If disabled, ask the user to enable `browser.enabled` in `~/.clawdis/clawdis.json`. -- Canvas UI is full-screen with native overlays. Keep critical controls out of the top-left/top-right/bottom edges (leave explicit gutters ~28px top, ~16px sides, ~20px bottom). Do not rely on safe-area insets. diff --git a/skills/clawdis-canvas/SKILL.md b/skills/clawdis-canvas/SKILL.md deleted file mode 100644 index e7db18bfd..000000000 --- a/skills/clawdis-canvas/SKILL.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -name: clawdis-canvas -description: Drive the Clawdis Canvas panel (present, eval, snapshot, A2UI) via the clawdis CLI, including gateway-hosted A2UI surfaces and action bridging. -homepage: https://clawdis.ai -metadata: {"clawdis":{"emoji":"🎨","always":true}} ---- - -# Clawdis Canvas - -Use Canvas to render HTML/JS or A2UI surfaces and capture snapshots. - -Core commands -- Present: `clawdis canvas present [--node ] [--target ]` -- Hide: `clawdis canvas hide` -- Eval JS: `clawdis canvas eval --js "..."` -- Snapshot: `clawdis canvas snapshot` - -A2UI -- Push JSONL: `clawdis canvas a2ui push --jsonl /path/to/file.jsonl` -- Reset: `clawdis canvas a2ui reset` - -Notes -- Keep HTML under `~/clawd/canvas` when targeting remote nodes. -- Use snapshot after renders to verify UI state. -- Treat A2UI as gateway-hosted at `http(s)://:18789/__clawdis__/a2ui/`. -- Rely on `canvas a2ui push/reset` to auto-navigate the Canvas to the gateway-hosted A2UI page. -- Expect A2UI to fail if the Gateway does not advertise `canvasHostUrl` or is unreachable: - - `A2UI_HOST_NOT_CONFIGURED` - - `A2UI_HOST_UNAVAILABLE` - -A2UI quick flow -1. Ensure the Gateway is running and reachable from the node. -2. Build JSONL with **v0.8** server→client messages (`beginRendering`, `surfaceUpdate`, `dataModelUpdate`, `deleteSurface`). - - Do not use v0.9 `createSurface` (unsupported). -3. Push JSONL and (optionally) snapshot the result. - -Example JSONL (v0.8) -```bash -cat > /tmp/a2ui-v0.8.jsonl <<'EOF' -{"surfaceUpdate":{"surfaceId":"main","components":[{"id":"root","component":{"Column":{"children":{"explicitList":["title","content"]}}}},{"id":"title","component":{"Text":{"text":{"literalString":"A2UI (v0.8)"},"usageHint":"h1"}}},{"id":"content","component":{"Text":{"text":{"literalString":"If you can read this, A2UI is live."},"usageHint":"body"}}}]}} -{"beginRendering":{"surfaceId":"main","root":"root"}} -EOF - -clawdis canvas a2ui push --jsonl /tmp/a2ui-v0.8.jsonl --node -``` - -Action callbacks (A2UI → agent) -- A2UI user actions (buttons, etc.) are bridged from the WebView back to the node via `clawdisCanvasA2UIAction`. -- Handle them on the agent side as `CANVAS_A2UI` messages (node → gateway → agent). diff --git a/skills/clawdis-cron/SKILL.md b/skills/clawdis-cron/SKILL.md deleted file mode 100644 index 026be1fc7..000000000 --- a/skills/clawdis-cron/SKILL.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -name: clawdis-cron -description: Schedule jobs and wakeups via Clawdis Gateway cron.* RPC. -homepage: https://clawdis.ai -metadata: {"clawdis":{"emoji":"⏰","always":true}} ---- - -# Clawdis Cron - -Cron runs inside the Gateway. Jobs live in `~/.clawdis/cron/jobs.json` and run logs in `~/.clawdis/cron/runs/.jsonl`. - -Enable/disable -- Enabled by default. -- Disable with config `cron.enabled=false` or env `CLAWDIS_SKIP_CRON=1`. -- Config: `cron.store`, `cron.maxConcurrentRuns`. - -Job fields -- `name` is required (non-empty). -- `description` is optional. - -RPC methods (Gateway WS) -- `cron.list`, `cron.status`, `cron.add`, `cron.update`, `cron.remove`, `cron.run`, `cron.runs` -- `wake` (enqueue system event + optionally trigger immediate heartbeat) - -Payload rules -- `sessionTarget: "main"` requires `payload.kind: "systemEvent"`. -- `sessionTarget: "isolated"` requires `payload.kind: "agentTurn"`. - -Examples - -One-shot reminder (main session, immediate wake): -```json -{ - "method": "cron.add", - "params": { - "name": "remind-me", - "enabled": true, - "schedule": { "kind": "at", "atMs": 1734715200000 }, - "sessionTarget": "main", - "wakeMode": "now", - "payload": { "kind": "systemEvent", "text": "Remind me in 20 minutes." } - } -} -``` - -Recurring hourly check (isolated job, no external delivery): -```json -{ - "method": "cron.add", - "params": { - "name": "hourly-check", - "enabled": true, - "schedule": { "kind": "every", "everyMs": 3600000 }, - "sessionTarget": "isolated", - "wakeMode": "now", - "payload": { "kind": "agentTurn", "message": "Check battery; report only if < 20%.", "deliver": false }, - "isolation": { "postToMainPrefix": "Cron" } - } -} -``` - -Cron expression (weekday 07:30): -```json -{ - "method": "cron.add", - "params": { - "name": "weekday-wakeup", - "enabled": true, - "schedule": { "kind": "cron", "expr": "30 7 * * 1-5", "tz": "America/Los_Angeles" }, - "sessionTarget": "isolated", - "wakeMode": "now", - "payload": { "kind": "agentTurn", "message": "Wake me up and start music.", "deliver": true, "channel": "whatsapp" } - } -} -``` - -Run history -- `cron.runs` returns recent JSONL entries for a job. - -Notes -- `wakeMode: "now"` triggers an immediate heartbeat for main jobs. -- Isolated jobs run in `cron:` sessions and post a summary back to main. diff --git a/skills/clawdis-nodes/SKILL.md b/skills/clawdis-nodes/SKILL.md deleted file mode 100644 index e02b12fce..000000000 --- a/skills/clawdis-nodes/SKILL.md +++ /dev/null @@ -1,91 +0,0 @@ ---- -name: clawdis-nodes -description: Discover, interpret, and target Clawdis nodes (paired devices) via the Gateway/CLI. Use when an agent must find available nodes, choose the best target machine, or reason about presence vs node availability (Tailnet/Tailscale optional). -homepage: https://clawdis.ai -metadata: {"clawdis":{"emoji":"🛰️"}} ---- - -# Clawdis Nodes - -Use the node system to target specific devices (macOS node mode, iOS, Android) for canvas/camera/screen/system actions. Use presence to infer which **user machine** is active, then pick the matching node. - -## Quick start - -List known nodes and whether they are paired/connected: -```bash -clawdis nodes status -``` - -Inspect a specific node (commands, caps, permissions): -```bash -clawdis nodes describe --node -``` - -## Node discovery workflow (agent) - -1) **List nodes** with `clawdis nodes status`. -2) **Choose a target**: - - Prefer `connected` nodes with the capabilities you need. - - Use `perms` (permissions map) to avoid asking for actions that will fail. -3) **Confirm commands** with `clawdis nodes describe --node …`. -4) **Invoke actions** via `clawdis nodes …` (camera, canvas, screen, system). - -If no nodes are connected: -- Check pairing: `clawdis nodes pending` / `clawdis nodes list` -- Ask the user to open/foreground the node app if the action requires it (canvas/camera/screen on iOS/Android). - -## Presence vs nodes (don’t confuse them) - -**Presence** shows Gateway + connected clients (mac app, WebChat, CLI). -**Nodes** are paired devices that expose commands. - -Use presence to infer **where the user is active**, then map that to a node: - -```bash -clawdis gateway call system-presence -``` - -Heuristics: -- Pick the presence entry with the smallest `lastInputSeconds` (most active). -- Match presence `host` / `deviceFamily` to a node `displayName` / `deviceFamily`. -- If multiple matches, ask for clarification or use `nodes describe` to choose. - -Note: CLI connections (`client.mode=cli`) do **not** show up in presence. - -## Tailnet / Tailscale (optional context) - -Node discovery is Gateway‑owned; Tailnet details only matter for reaching the Gateway: -- On LAN, the Gateway advertises a Bridge via Bonjour. -- Cross‑network, prefer Tailnet MagicDNS or Tailnet IP to reach the Gateway. -- Once connected, always target nodes by id/name/IP via the Gateway (not direct). - -## Pairing & approvals - -List pairing requests: -```bash -clawdis nodes pending -``` - -Approve/reject: -```bash -clawdis nodes approve -clawdis nodes reject -``` - -## Typical agent usages - -Send a notification to a specific Mac node: -```bash -clawdis nodes notify --node --title "Ping" --body "Gateway ready" -``` - -Capture a node canvas snapshot: -```bash -clawdis nodes canvas snapshot --node --format png -``` - -## Troubleshooting - -- `NODE_BACKGROUND_UNAVAILABLE`: the node app must be foregrounded (iOS/Android). -- Missing permissions in `nodes status`: ask the user to grant permissions in the node app. -- No connected nodes: ensure the Gateway is reachable; check tailnet/SSH config if remote. diff --git a/skills/clawdis-notify/SKILL.md b/skills/clawdis-notify/SKILL.md deleted file mode 100644 index bcbf2ac42..000000000 --- a/skills/clawdis-notify/SKILL.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -name: clawdis-notify -description: Send system notifications to specific Clawdis nodes (macOS computers) via the Gateway and CLI. Use when you need to alert a person or confirm a remote action on a particular machine, or when an agent must push a notification to another computer. -homepage: https://clawdis.ai -metadata: {"clawdis":{"emoji":"🔔"}} ---- - -# Clawdis Notify - -## Overview - -Send local notifications to a specific Clawdis node (currently macOS only) via the Gateway CLI. - -## Quick start - -1) Find a target node. -```bash -clawdis nodes status -clawdis nodes describe --node -``` - -2) Send the notification. -```bash -clawdis nodes notify --node --title "Ping" --body "Gateway ready" -``` - -## Core command - -`clawdis nodes notify --node [--title ] [--body ] [--sound ] [--priority ] [--delivery ]` - -Notes: -- Provide at least one of `--title` or `--body`. -- `--delivery` defaults to `system`. -- Only macOS nodes expose `system.notify` right now. -- Notification permission must be granted in the macOS app or the command fails. - -## Multi‑computer usage - -Pick a specific node by id/name/IP, or iterate across nodes: - -```bash -for node in $(clawdis nodes status --json | jq -r '.nodes[].id'); do - clawdis nodes notify --node "$node" --title "Heads up" --body "Maintenance in 5 minutes" -done -``` - -## Troubleshooting - -- `nodes notify failed: ...` usually means the node is offline, not paired, or missing permission. -- If the Gateway is down or unreachable, notifications cannot be delivered. - -## Low‑level fallback (rare) - -If needed, use raw invoke: -```bash -clawdis nodes invoke \ - --node \ - --command system.notify \ - --params '{"title":"Ping","body":"Hello","sound":"Glass","priority":"active","delivery":"system"}' -``` diff --git a/src/agents/clawdis-tools.ts b/src/agents/clawdis-tools.ts new file mode 100644 index 000000000..e5ab22ed5 --- /dev/null +++ b/src/agents/clawdis-tools.ts @@ -0,0 +1,1272 @@ +import crypto from "node:crypto"; +import fs from "node:fs/promises"; + +import type { AgentTool, AgentToolResult } from "@mariozechner/pi-ai"; +import { type TSchema, Type } from "@sinclair/typebox"; + +import { + browserAct, + browserArmDialog, + browserArmFileChooser, + browserConsoleMessages, + browserNavigate, + browserPdfSave, + browserScreenshotAction, +} from "../browser/client-actions.js"; +import { + browserCloseTab, + browserFocusTab, + browserOpenTab, + browserSnapshot, + browserStart, + browserStatus, + browserStop, + browserTabs, +} from "../browser/client.js"; +import { resolveBrowserConfig } from "../browser/config.js"; +import { + cameraTempPath, + parseCameraClipPayload, + parseCameraSnapPayload, + writeBase64ToFile, + type CameraFacing, +} from "../cli/nodes-camera.js"; +import { + canvasSnapshotTempPath, + parseCanvasSnapshotPayload, +} from "../cli/nodes-canvas.js"; +import { + parseScreenRecordPayload, + screenRecordTempPath, + writeScreenRecordToFile, +} from "../cli/nodes-screen.js"; +import { parseDurationMs } from "../cli/parse-duration.js"; +import { loadConfig } from "../config/config.js"; +import { callGateway } from "../gateway/call.js"; +import { detectMime } from "../media/mime.js"; +import { sanitizeToolResultImages } from "./tool-images.js"; + +type AnyAgentTool = AgentTool; + +const DEFAULT_GATEWAY_URL = "ws://127.0.0.1:18789"; + +type GatewayCallOptions = { + gatewayUrl?: string; + gatewayToken?: string; + timeoutMs?: number; +}; + +function resolveGatewayOptions(opts?: GatewayCallOptions) { + const url = + typeof opts?.gatewayUrl === "string" && opts.gatewayUrl.trim() + ? opts.gatewayUrl.trim() + : DEFAULT_GATEWAY_URL; + const token = + typeof opts?.gatewayToken === "string" && opts.gatewayToken.trim() + ? opts.gatewayToken.trim() + : undefined; + const timeoutMs = + typeof opts?.timeoutMs === "number" && Number.isFinite(opts.timeoutMs) + ? Math.max(1, Math.floor(opts.timeoutMs)) + : 10_000; + return { url, token, timeoutMs }; +} + +async function callGatewayTool( + method: string, + opts: GatewayCallOptions, + params?: unknown, + extra?: { expectFinal?: boolean }, +) { + const gateway = resolveGatewayOptions(opts); + return await callGateway({ + url: gateway.url, + token: gateway.token, + method, + params, + timeoutMs: gateway.timeoutMs, + expectFinal: extra?.expectFinal, + clientName: "agent", + mode: "agent", + }); +} + +function jsonResult(payload: unknown): AgentToolResult { + return { + content: [ + { + type: "text", + text: JSON.stringify(payload, null, 2), + }, + ], + details: payload, + }; +} + +async function imageResult(params: { + label: string; + path: string; + base64: string; + mimeType: string; + extraText?: string; + details?: Record; +}): Promise> { + const content: AgentToolResult["content"] = [ + { + type: "text", + text: params.extraText ?? `MEDIA:${params.path}`, + }, + { + type: "image", + data: params.base64, + mimeType: params.mimeType, + }, + ]; + const result: AgentToolResult = { + content, + details: { path: params.path, ...params.details }, + }; + return await sanitizeToolResultImages(result, params.label); +} + +async function imageResultFromFile(params: { + label: string; + path: string; + extraText?: string; + details?: Record; +}): Promise> { + const buf = await fs.readFile(params.path); + const mimeType = + (await detectMime({ buffer: buf.slice(0, 256) })) ?? "image/png"; + return await imageResult({ + label: params.label, + path: params.path, + base64: buf.toString("base64"), + mimeType, + extraText: params.extraText, + details: params.details, + }); +} + +function resolveBrowserBaseUrl(controlUrl?: string) { + const cfg = loadConfig(); + const resolved = resolveBrowserConfig(cfg.browser); + if (!resolved.enabled) { + throw new Error( + "Browser control is disabled. Set browser.enabled=true in ~/.clawdis/clawdis.json.", + ); + } + const url = controlUrl?.trim() ? controlUrl.trim() : resolved.controlUrl; + return url.replace(/\/$/, ""); +} + +type NodeListNode = { + nodeId: string; + displayName?: string; + platform?: string; + remoteIp?: string; + deviceFamily?: string; + modelIdentifier?: string; + caps?: string[]; + commands?: string[]; + permissions?: Record; + paired?: boolean; + connected?: boolean; +}; + +type PendingRequest = { + requestId: string; + nodeId: string; + displayName?: string; + platform?: string; + version?: string; + remoteIp?: string; + isRepair?: boolean; + ts: number; +}; + +type PairedNode = { + nodeId: string; + token?: string; + displayName?: string; + platform?: string; + version?: string; + remoteIp?: string; + permissions?: Record; + createdAtMs?: number; + approvedAtMs?: number; +}; + +type PairingList = { + pending: PendingRequest[]; + paired: PairedNode[]; +}; + +function parseNodeList(value: unknown): NodeListNode[] { + const obj = + typeof value === "object" && value !== null + ? (value as Record) + : {}; + return Array.isArray(obj.nodes) ? (obj.nodes as NodeListNode[]) : []; +} + +function parsePairingList(value: unknown): PairingList { + const obj = + typeof value === "object" && value !== null + ? (value as Record) + : {}; + const pending = Array.isArray(obj.pending) + ? (obj.pending as PendingRequest[]) + : []; + const paired = Array.isArray(obj.paired) ? (obj.paired as PairedNode[]) : []; + return { pending, paired }; +} + +function normalizeNodeKey(value: string) { + return value + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+/, "") + .replace(/-+$/, ""); +} + +async function loadNodes(opts: GatewayCallOptions): Promise { + try { + const res = (await callGatewayTool("node.list", opts, {})) as unknown; + return parseNodeList(res); + } catch { + const res = (await callGatewayTool("node.pair.list", opts, {})) as unknown; + const { paired } = parsePairingList(res); + return paired.map((n) => ({ + nodeId: n.nodeId, + displayName: n.displayName, + platform: n.platform, + remoteIp: n.remoteIp, + })); + } +} + +function pickDefaultNode(nodes: NodeListNode[]): NodeListNode | null { + const withCanvas = nodes.filter((n) => + Array.isArray(n.caps) ? n.caps.includes("canvas") : true, + ); + if (withCanvas.length === 0) return null; + + const connected = withCanvas.filter((n) => n.connected); + const candidates = connected.length > 0 ? connected : withCanvas; + if (candidates.length === 1) return candidates[0]; + + const local = candidates.filter( + (n) => + n.platform?.toLowerCase().startsWith("mac") && + typeof n.nodeId === "string" && + n.nodeId.startsWith("mac-"), + ); + if (local.length === 1) return local[0]; + + return null; +} + +async function resolveNodeId( + opts: GatewayCallOptions, + query?: string, + allowDefault = false, +) { + const nodes = await loadNodes(opts); + const q = String(query ?? "").trim(); + if (!q) { + if (allowDefault) { + const picked = pickDefaultNode(nodes); + if (picked) return picked.nodeId; + } + throw new Error("node required"); + } + + const qNorm = normalizeNodeKey(q); + const matches = nodes.filter((n) => { + if (n.nodeId === q) return true; + if (typeof n.remoteIp === "string" && n.remoteIp === q) return true; + const name = typeof n.displayName === "string" ? n.displayName : ""; + if (name && normalizeNodeKey(name) === qNorm) return true; + if (q.length >= 6 && n.nodeId.startsWith(q)) return true; + return false; + }); + + if (matches.length === 1) return matches[0].nodeId; + if (matches.length === 0) { + const known = nodes + .map((n) => n.displayName || n.remoteIp || n.nodeId) + .filter(Boolean) + .join(", "); + throw new Error(`unknown node: ${q}${known ? ` (known: ${known})` : ""}`); + } + throw new Error( + `ambiguous node: ${q} (matches: ${matches + .map((n) => n.displayName || n.remoteIp || n.nodeId) + .join(", ")})`, + ); +} + +const BrowserActSchema = Type.Object({ + kind: Type.Union([ + Type.Literal("click"), + Type.Literal("type"), + Type.Literal("press"), + Type.Literal("hover"), + Type.Literal("drag"), + Type.Literal("select"), + Type.Literal("fill"), + Type.Literal("resize"), + Type.Literal("wait"), + Type.Literal("evaluate"), + Type.Literal("close"), + ]), + ref: Type.Optional(Type.String()), + targetId: Type.Optional(Type.String()), + doubleClick: Type.Optional(Type.Boolean()), + button: Type.Optional(Type.String()), + modifiers: Type.Optional(Type.Array(Type.String())), + text: Type.Optional(Type.String()), + submit: Type.Optional(Type.Boolean()), + slowly: Type.Optional(Type.Boolean()), + key: Type.Optional(Type.String()), + startRef: Type.Optional(Type.String()), + endRef: Type.Optional(Type.String()), + values: Type.Optional(Type.Array(Type.String())), + fields: Type.Optional(Type.Array(Type.Record(Type.String(), Type.Unknown()))), + width: Type.Optional(Type.Number()), + height: Type.Optional(Type.Number()), + timeMs: Type.Optional(Type.Number()), + textGone: Type.Optional(Type.String()), + fn: Type.Optional(Type.String()), +}); + +const BrowserToolSchema = Type.Union([ + Type.Object({ action: Type.Literal("status"), controlUrl: Type.Optional(Type.String()) }), + Type.Object({ action: Type.Literal("start"), controlUrl: Type.Optional(Type.String()) }), + Type.Object({ action: Type.Literal("stop"), controlUrl: Type.Optional(Type.String()) }), + Type.Object({ action: Type.Literal("tabs"), controlUrl: Type.Optional(Type.String()) }), + Type.Object({ + action: Type.Literal("open"), + controlUrl: Type.Optional(Type.String()), + targetUrl: Type.String(), + }), + Type.Object({ + action: Type.Literal("focus"), + controlUrl: Type.Optional(Type.String()), + targetId: Type.String(), + }), + Type.Object({ + action: Type.Literal("close"), + controlUrl: Type.Optional(Type.String()), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + action: Type.Literal("snapshot"), + controlUrl: Type.Optional(Type.String()), + format: Type.Optional(Type.Union([Type.Literal("aria"), Type.Literal("ai")])), + targetId: Type.Optional(Type.String()), + limit: Type.Optional(Type.Number()), + }), + Type.Object({ + action: Type.Literal("screenshot"), + controlUrl: Type.Optional(Type.String()), + targetId: Type.Optional(Type.String()), + fullPage: Type.Optional(Type.Boolean()), + ref: Type.Optional(Type.String()), + element: Type.Optional(Type.String()), + type: Type.Optional(Type.Union([Type.Literal("png"), Type.Literal("jpeg")])), + }), + Type.Object({ + action: Type.Literal("navigate"), + controlUrl: Type.Optional(Type.String()), + targetUrl: Type.String(), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + action: Type.Literal("console"), + controlUrl: Type.Optional(Type.String()), + level: Type.Optional(Type.String()), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + action: Type.Literal("pdf"), + controlUrl: Type.Optional(Type.String()), + targetId: Type.Optional(Type.String()), + }), + Type.Object({ + action: Type.Literal("upload"), + controlUrl: Type.Optional(Type.String()), + paths: Type.Array(Type.String()), + targetId: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + }), + Type.Object({ + action: Type.Literal("dialog"), + controlUrl: Type.Optional(Type.String()), + accept: Type.Boolean(), + promptText: Type.Optional(Type.String()), + targetId: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + }), + Type.Object({ + action: Type.Literal("act"), + controlUrl: Type.Optional(Type.String()), + request: BrowserActSchema, + }), +]); + +function createBrowserTool(): AnyAgentTool { + return { + label: "Clawdis Browser", + name: "clawdis_browser", + description: + "Control clawd's dedicated browser (status/start/stop/tabs/open/snapshot/screenshot/actions). Use snapshot+act for UI automation.", + parameters: BrowserToolSchema, + execute: async (_toolCallId, args) => { + const params = args as Record; + const action = String(params.action ?? ""); + const controlUrl = + typeof params.controlUrl === "string" ? params.controlUrl : undefined; + const baseUrl = resolveBrowserBaseUrl(controlUrl); + + switch (action) { + case "status": + return jsonResult(await browserStatus(baseUrl)); + case "start": + await browserStart(baseUrl); + return jsonResult(await browserStatus(baseUrl)); + case "stop": + await browserStop(baseUrl); + return jsonResult(await browserStatus(baseUrl)); + case "tabs": + return jsonResult({ tabs: await browserTabs(baseUrl) }); + case "open": { + const targetUrl = String(params.targetUrl ?? "").trim(); + if (!targetUrl) throw new Error("targetUrl required"); + return jsonResult(await browserOpenTab(baseUrl, targetUrl)); + } + case "focus": { + const targetId = String(params.targetId ?? "").trim(); + if (!targetId) throw new Error("targetId required"); + await browserFocusTab(baseUrl, targetId); + return jsonResult({ ok: true }); + } + case "close": { + const targetId = + typeof params.targetId === "string" ? params.targetId.trim() : ""; + if (targetId) await browserCloseTab(baseUrl, targetId); + else await browserAct(baseUrl, { kind: "close" }); + return jsonResult({ ok: true }); + } + case "snapshot": { + const format = + params.format === "ai" || params.format === "aria" + ? (params.format as "ai" | "aria") + : "aria"; + const targetId = + typeof params.targetId === "string" ? params.targetId.trim() : undefined; + const limit = + typeof params.limit === "number" && Number.isFinite(params.limit) + ? params.limit + : undefined; + const snapshot = await browserSnapshot(baseUrl, { + format, + targetId, + limit, + }); + if (snapshot.format === "ai") { + return { + content: [{ type: "text", text: snapshot.snapshot }], + details: snapshot, + }; + } + return jsonResult(snapshot); + } + case "screenshot": { + const targetId = + typeof params.targetId === "string" ? params.targetId.trim() : undefined; + const fullPage = Boolean(params.fullPage); + const ref = + typeof params.ref === "string" ? params.ref.trim() : undefined; + const element = + typeof params.element === "string" ? params.element.trim() : undefined; + const type = params.type === "jpeg" ? "jpeg" : "png"; + const result = await browserScreenshotAction(baseUrl, { + targetId, + fullPage, + ref, + element, + type, + }); + return await imageResultFromFile({ + label: "browser:screenshot", + path: result.path, + details: result, + }); + } + case "navigate": { + const targetUrl = String(params.targetUrl ?? "").trim(); + if (!targetUrl) throw new Error("targetUrl required"); + const targetId = + typeof params.targetId === "string" ? params.targetId.trim() : undefined; + return jsonResult( + await browserNavigate(baseUrl, { url: targetUrl, targetId }), + ); + } + case "console": { + const level = + typeof params.level === "string" ? params.level.trim() : undefined; + const targetId = + typeof params.targetId === "string" ? params.targetId.trim() : undefined; + return jsonResult( + await browserConsoleMessages(baseUrl, { level, targetId }), + ); + } + case "pdf": { + const targetId = + typeof params.targetId === "string" ? params.targetId.trim() : undefined; + const result = await browserPdfSave(baseUrl, { targetId }); + return { + content: [{ type: "text", text: `FILE:${result.path}` }], + details: result, + }; + } + case "upload": { + const paths = Array.isArray(params.paths) + ? params.paths.map((p) => String(p)) + : []; + if (paths.length === 0) throw new Error("paths required"); + const targetId = + typeof params.targetId === "string" ? params.targetId.trim() : undefined; + const timeoutMs = + typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs) + ? params.timeoutMs + : undefined; + return jsonResult( + await browserArmFileChooser(baseUrl, { paths, targetId, timeoutMs }), + ); + } + case "dialog": { + const accept = Boolean(params.accept); + const promptText = + typeof params.promptText === "string" ? params.promptText : undefined; + const targetId = + typeof params.targetId === "string" ? params.targetId.trim() : undefined; + const timeoutMs = + typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs) + ? params.timeoutMs + : undefined; + return jsonResult( + await browserArmDialog(baseUrl, { + accept, + promptText, + targetId, + timeoutMs, + }), + ); + } + case "act": { + const request = params.request as Record | undefined; + if (!request || typeof request !== "object") { + throw new Error("request required"); + } + const result = await browserAct(baseUrl, request as Parameters[1]); + return jsonResult(result); + } + default: + throw new Error(`Unknown action: ${action}`); + } + }, + }; +} + +const CanvasToolSchema = Type.Union([ + Type.Object({ + action: Type.Literal("present"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.Optional(Type.String()), + target: Type.Optional(Type.String()), + x: Type.Optional(Type.Number()), + y: Type.Optional(Type.Number()), + width: Type.Optional(Type.Number()), + height: Type.Optional(Type.Number()), + }), + Type.Object({ + action: Type.Literal("hide"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.Optional(Type.String()), + }), + Type.Object({ + action: Type.Literal("navigate"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.Optional(Type.String()), + url: Type.String(), + }), + Type.Object({ + action: Type.Literal("eval"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.Optional(Type.String()), + javaScript: Type.String(), + }), + Type.Object({ + action: Type.Literal("snapshot"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.Optional(Type.String()), + format: Type.Optional(Type.Union([Type.Literal("png"), Type.Literal("jpg"), Type.Literal("jpeg")])), + maxWidth: Type.Optional(Type.Number()), + quality: Type.Optional(Type.Number()), + }), + Type.Object({ + action: Type.Literal("a2ui_push"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.Optional(Type.String()), + jsonl: Type.Optional(Type.String()), + jsonlPath: Type.Optional(Type.String()), + }), + Type.Object({ + action: Type.Literal("a2ui_reset"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.Optional(Type.String()), + }), +]); + +function createCanvasTool(): AnyAgentTool { + return { + label: "Clawdis Canvas", + name: "clawdis_canvas", + description: + "Control node canvases (present/hide/navigate/eval/snapshot/A2UI). Use snapshot to capture the rendered UI.", + parameters: CanvasToolSchema, + execute: async (_toolCallId, args) => { + const params = args as Record; + const action = String(params.action ?? ""); + const gatewayOpts: GatewayCallOptions = { + gatewayUrl: + typeof params.gatewayUrl === "string" ? params.gatewayUrl : undefined, + gatewayToken: + typeof params.gatewayToken === "string" + ? params.gatewayToken + : undefined, + timeoutMs: + typeof params.timeoutMs === "number" ? params.timeoutMs : undefined, + }; + + const nodeId = await resolveNodeId( + gatewayOpts, + typeof params.node === "string" ? params.node : undefined, + true, + ); + + const invoke = async (command: string, invokeParams?: Record) => + await callGatewayTool("node.invoke", gatewayOpts, { + nodeId, + command, + params: invokeParams, + idempotencyKey: crypto.randomUUID(), + }); + + switch (action) { + case "present": { + const placement = { + x: typeof params.x === "number" ? params.x : undefined, + y: typeof params.y === "number" ? params.y : undefined, + width: typeof params.width === "number" ? params.width : undefined, + height: typeof params.height === "number" ? params.height : undefined, + }; + const invokeParams: Record = {}; + if (typeof params.target === "string" && params.target.trim()) { + invokeParams.url = params.target.trim(); + } + if ( + Number.isFinite(placement.x) || + Number.isFinite(placement.y) || + Number.isFinite(placement.width) || + Number.isFinite(placement.height) + ) { + invokeParams.placement = placement; + } + await invoke("canvas.present", invokeParams); + return jsonResult({ ok: true }); + } + case "hide": + await invoke("canvas.hide", undefined); + return jsonResult({ ok: true }); + case "navigate": { + const url = String(params.url ?? "").trim(); + if (!url) throw new Error("url required"); + await invoke("canvas.navigate", { url }); + return jsonResult({ ok: true }); + } + case "eval": { + const javaScript = String(params.javaScript ?? "").trim(); + if (!javaScript) throw new Error("javaScript required"); + const raw = (await invoke("canvas.eval", { javaScript })) as { + payload?: { result?: string }; + }; + const result = raw?.payload?.result; + if (result) return { content: [{ type: "text", text: result }] }; + return jsonResult({ ok: true }); + } + case "snapshot": { + const formatRaw = + typeof params.format === "string" ? params.format.toLowerCase() : "png"; + const format = + formatRaw === "jpg" || formatRaw === "jpeg" ? "jpeg" : "png"; + const maxWidth = + typeof params.maxWidth === "number" && Number.isFinite(params.maxWidth) + ? params.maxWidth + : undefined; + const quality = + typeof params.quality === "number" && Number.isFinite(params.quality) + ? params.quality + : undefined; + const raw = (await invoke("canvas.snapshot", { + format, + maxWidth, + quality, + })) as { payload?: unknown }; + const payload = parseCanvasSnapshotPayload(raw?.payload); + const filePath = canvasSnapshotTempPath({ + ext: payload.format === "jpeg" ? "jpg" : payload.format, + }); + await writeBase64ToFile(filePath, payload.base64); + const mimeType = + payload.format === "jpeg" ? "image/jpeg" : "image/png"; + return await imageResult({ + label: "canvas:snapshot", + path: filePath, + base64: payload.base64, + mimeType, + details: { format: payload.format }, + }); + } + case "a2ui_push": { + const jsonl = + typeof params.jsonl === "string" && params.jsonl.trim() + ? params.jsonl + : typeof params.jsonlPath === "string" && params.jsonlPath.trim() + ? await fs.readFile(params.jsonlPath.trim(), "utf8") + : ""; + if (!jsonl.trim()) throw new Error("jsonl or jsonlPath required"); + await invoke("canvas.a2ui.pushJSONL", { jsonl }); + return jsonResult({ ok: true }); + } + case "a2ui_reset": + await invoke("canvas.a2ui.reset", undefined); + return jsonResult({ ok: true }); + default: + throw new Error(`Unknown action: ${action}`); + } + }, + }; +} + +const NodesToolSchema = Type.Union([ + Type.Object({ + action: Type.Literal("status"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + }), + Type.Object({ + action: Type.Literal("describe"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.String(), + }), + Type.Object({ + action: Type.Literal("pending"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + }), + Type.Object({ + action: Type.Literal("approve"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + requestId: Type.String(), + }), + Type.Object({ + action: Type.Literal("reject"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + requestId: Type.String(), + }), + Type.Object({ + action: Type.Literal("notify"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.String(), + title: Type.Optional(Type.String()), + body: Type.Optional(Type.String()), + sound: Type.Optional(Type.String()), + priority: Type.Optional(Type.Union([ + Type.Literal("passive"), + Type.Literal("active"), + Type.Literal("timeSensitive"), + ])), + delivery: Type.Optional(Type.Union([ + Type.Literal("system"), + Type.Literal("overlay"), + Type.Literal("auto"), + ])), + }), + Type.Object({ + action: Type.Literal("camera_snap"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.String(), + facing: Type.Optional(Type.Union([Type.Literal("front"), Type.Literal("back"), Type.Literal("both")])), + maxWidth: Type.Optional(Type.Number()), + quality: Type.Optional(Type.Number()), + }), + Type.Object({ + action: Type.Literal("camera_clip"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.String(), + facing: Type.Optional(Type.Union([Type.Literal("front"), Type.Literal("back")])), + duration: Type.Optional(Type.String()), + durationMs: Type.Optional(Type.Number()), + includeAudio: Type.Optional(Type.Boolean()), + }), + Type.Object({ + action: Type.Literal("screen_record"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + node: Type.String(), + duration: Type.Optional(Type.String()), + durationMs: Type.Optional(Type.Number()), + fps: Type.Optional(Type.Number()), + screenIndex: Type.Optional(Type.Number()), + includeAudio: Type.Optional(Type.Boolean()), + outPath: Type.Optional(Type.String()), + }), +]); + +function createNodesTool(): AnyAgentTool { + return { + label: "Clawdis Nodes", + name: "clawdis_nodes", + description: + "Discover and control paired nodes (status/describe/pairing/notify/camera/screen).", + parameters: NodesToolSchema, + execute: async (_toolCallId, args) => { + const params = args as Record; + const action = String(params.action ?? ""); + const gatewayOpts: GatewayCallOptions = { + gatewayUrl: + typeof params.gatewayUrl === "string" ? params.gatewayUrl : undefined, + gatewayToken: + typeof params.gatewayToken === "string" + ? params.gatewayToken + : undefined, + timeoutMs: + typeof params.timeoutMs === "number" ? params.timeoutMs : undefined, + }; + + switch (action) { + case "status": + return jsonResult(await callGatewayTool("node.list", gatewayOpts, {})); + case "describe": { + const node = String(params.node ?? "").trim(); + if (!node) throw new Error("node required"); + const nodeId = await resolveNodeId(gatewayOpts, node); + return jsonResult( + await callGatewayTool("node.describe", gatewayOpts, { nodeId }), + ); + } + case "pending": + return jsonResult( + await callGatewayTool("node.pair.list", gatewayOpts, {}), + ); + case "approve": { + const requestId = String(params.requestId ?? "").trim(); + if (!requestId) throw new Error("requestId required"); + return jsonResult( + await callGatewayTool("node.pair.approve", gatewayOpts, { + requestId, + }), + ); + } + case "reject": { + const requestId = String(params.requestId ?? "").trim(); + if (!requestId) throw new Error("requestId required"); + return jsonResult( + await callGatewayTool("node.pair.reject", gatewayOpts, { + requestId, + }), + ); + } + case "notify": { + const node = String(params.node ?? "").trim(); + if (!node) throw new Error("node required"); + const title = typeof params.title === "string" ? params.title : ""; + const body = typeof params.body === "string" ? params.body : ""; + if (!title.trim() && !body.trim()) { + throw new Error("title or body required"); + } + const nodeId = await resolveNodeId(gatewayOpts, node); + await callGatewayTool("node.invoke", gatewayOpts, { + nodeId, + command: "system.notify", + params: { + title: title.trim() || undefined, + body: body.trim() || undefined, + sound: typeof params.sound === "string" ? params.sound : undefined, + priority: + typeof params.priority === "string" ? params.priority : undefined, + delivery: + typeof params.delivery === "string" ? params.delivery : undefined, + }, + idempotencyKey: crypto.randomUUID(), + }); + return jsonResult({ ok: true }); + } + case "camera_snap": { + const node = String(params.node ?? "").trim(); + if (!node) throw new Error("node required"); + const nodeId = await resolveNodeId(gatewayOpts, node); + const facingRaw = + typeof params.facing === "string" ? params.facing.toLowerCase() : "both"; + const facings: CameraFacing[] = + facingRaw === "both" + ? ["front", "back"] + : facingRaw === "front" || facingRaw === "back" + ? [facingRaw] + : (() => { + throw new Error("invalid facing (front|back|both)"); + })(); + const maxWidth = + typeof params.maxWidth === "number" && Number.isFinite(params.maxWidth) + ? params.maxWidth + : undefined; + const quality = + typeof params.quality === "number" && Number.isFinite(params.quality) + ? params.quality + : undefined; + + const content: AgentToolResult["content"] = []; + const details: Array> = []; + + for (const facing of facings) { + const raw = (await callGatewayTool("node.invoke", gatewayOpts, { + nodeId, + command: "camera.snap", + params: { + facing, + maxWidth, + quality, + format: "jpg", + }, + idempotencyKey: crypto.randomUUID(), + })) as { payload?: unknown }; + const payload = parseCameraSnapPayload(raw?.payload); + const filePath = cameraTempPath({ + kind: "snap", + facing, + ext: payload.format === "jpeg" ? "jpg" : payload.format, + }); + await writeBase64ToFile(filePath, payload.base64); + content.push({ type: "text", text: `MEDIA:${filePath}` }); + content.push({ + type: "image", + data: payload.base64, + mimeType: + payload.format === "jpeg" ? "image/jpeg" : "image/png", + }); + details.push({ + facing, + path: filePath, + width: payload.width, + height: payload.height, + }); + } + + const result: AgentToolResult = { content, details }; + return await sanitizeToolResultImages(result, "nodes:camera_snap"); + } + case "camera_clip": { + const node = String(params.node ?? "").trim(); + if (!node) throw new Error("node required"); + const nodeId = await resolveNodeId(gatewayOpts, node); + const facing = + typeof params.facing === "string" ? params.facing.toLowerCase() : "front"; + if (facing !== "front" && facing !== "back") { + throw new Error("invalid facing (front|back)"); + } + const durationMs = + typeof params.durationMs === "number" && Number.isFinite(params.durationMs) + ? params.durationMs + : typeof params.duration === "string" + ? parseDurationMs(params.duration) + : 3000; + const includeAudio = + typeof params.includeAudio === "boolean" ? params.includeAudio : true; + const raw = (await callGatewayTool("node.invoke", gatewayOpts, { + nodeId, + command: "camera.clip", + params: { + facing, + durationMs, + includeAudio, + format: "mp4", + }, + idempotencyKey: crypto.randomUUID(), + })) as { payload?: unknown }; + const payload = parseCameraClipPayload(raw?.payload); + const filePath = cameraTempPath({ + kind: "clip", + facing, + ext: payload.format, + }); + await writeBase64ToFile(filePath, payload.base64); + return { + content: [{ type: "text", text: `FILE:${filePath}` }], + details: { + facing, + path: filePath, + durationMs: payload.durationMs, + hasAudio: payload.hasAudio, + }, + }; + } + case "screen_record": { + const node = String(params.node ?? "").trim(); + if (!node) throw new Error("node required"); + const nodeId = await resolveNodeId(gatewayOpts, node); + const durationMs = + typeof params.durationMs === "number" && Number.isFinite(params.durationMs) + ? params.durationMs + : typeof params.duration === "string" + ? parseDurationMs(params.duration) + : 10_000; + const fps = + typeof params.fps === "number" && Number.isFinite(params.fps) + ? params.fps + : 10; + const screenIndex = + typeof params.screenIndex === "number" && Number.isFinite(params.screenIndex) + ? params.screenIndex + : 0; + const includeAudio = + typeof params.includeAudio === "boolean" ? params.includeAudio : true; + const raw = (await callGatewayTool("node.invoke", gatewayOpts, { + nodeId, + command: "screen.record", + params: { + durationMs, + screenIndex, + fps, + format: "mp4", + includeAudio, + }, + idempotencyKey: crypto.randomUUID(), + })) as { payload?: unknown }; + const payload = parseScreenRecordPayload(raw?.payload); + const filePath = + typeof params.outPath === "string" && params.outPath.trim() + ? params.outPath.trim() + : screenRecordTempPath({ ext: payload.format || "mp4" }); + const written = await writeScreenRecordToFile( + filePath, + payload.base64, + ); + return { + content: [{ type: "text", text: `FILE:${written.path}` }], + details: { + path: written.path, + durationMs: payload.durationMs, + fps: payload.fps, + screenIndex: payload.screenIndex, + hasAudio: payload.hasAudio, + }, + }; + } + default: + throw new Error(`Unknown action: ${action}`); + } + }, + }; +} + +const CronToolSchema = Type.Union([ + Type.Object({ + action: Type.Literal("status"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + }), + Type.Object({ + action: Type.Literal("list"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + includeDisabled: Type.Optional(Type.Boolean()), + }), + Type.Object({ + action: Type.Literal("add"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + job: Type.Object({}, { additionalProperties: true }), + }), + Type.Object({ + action: Type.Literal("update"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + jobId: Type.String(), + patch: Type.Object({}, { additionalProperties: true }), + }), + Type.Object({ + action: Type.Literal("remove"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + jobId: Type.String(), + }), + Type.Object({ + action: Type.Literal("run"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + jobId: Type.String(), + }), + Type.Object({ + action: Type.Literal("runs"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + jobId: Type.String(), + }), + Type.Object({ + action: Type.Literal("wake"), + gatewayUrl: Type.Optional(Type.String()), + gatewayToken: Type.Optional(Type.String()), + timeoutMs: Type.Optional(Type.Number()), + text: Type.String(), + mode: Type.Optional(Type.Union([Type.Literal("now"), Type.Literal("next-heartbeat")])), + }), +]); + +function createCronTool(): AnyAgentTool { + return { + label: "Clawdis Cron", + name: "clawdis_cron", + description: + "Manage Gateway cron jobs (status/list/add/update/remove/run/runs) and send wake events.", + parameters: CronToolSchema, + execute: async (_toolCallId, args) => { + const params = args as Record; + const action = String(params.action ?? ""); + const gatewayOpts: GatewayCallOptions = { + gatewayUrl: + typeof params.gatewayUrl === "string" ? params.gatewayUrl : undefined, + gatewayToken: + typeof params.gatewayToken === "string" + ? params.gatewayToken + : undefined, + timeoutMs: + typeof params.timeoutMs === "number" ? params.timeoutMs : undefined, + }; + + switch (action) { + case "status": + return jsonResult(await callGatewayTool("cron.status", gatewayOpts, {})); + case "list": + return jsonResult( + await callGatewayTool("cron.list", gatewayOpts, { + includeDisabled: Boolean(params.includeDisabled), + }), + ); + case "add": { + if (!params.job || typeof params.job !== "object") { + throw new Error("job required"); + } + return jsonResult( + await callGatewayTool("cron.add", gatewayOpts, params.job), + ); + } + case "update": { + const jobId = String(params.jobId ?? "").trim(); + if (!jobId) throw new Error("jobId required"); + if (!params.patch || typeof params.patch !== "object") { + throw new Error("patch required"); + } + return jsonResult( + await callGatewayTool("cron.update", gatewayOpts, { + jobId, + patch: params.patch, + }), + ); + } + case "remove": { + const jobId = String(params.jobId ?? "").trim(); + if (!jobId) throw new Error("jobId required"); + return jsonResult( + await callGatewayTool("cron.remove", gatewayOpts, { jobId }), + ); + } + case "run": { + const jobId = String(params.jobId ?? "").trim(); + if (!jobId) throw new Error("jobId required"); + return jsonResult( + await callGatewayTool("cron.run", gatewayOpts, { jobId }), + ); + } + case "runs": { + const jobId = String(params.jobId ?? "").trim(); + if (!jobId) throw new Error("jobId required"); + return jsonResult( + await callGatewayTool("cron.runs", gatewayOpts, { jobId }), + ); + } + case "wake": { + const text = String(params.text ?? "").trim(); + if (!text) throw new Error("text required"); + const mode = + params.mode === "now" || params.mode === "next-heartbeat" + ? params.mode + : "next-heartbeat"; + return jsonResult( + await callGatewayTool( + "wake", + gatewayOpts, + { mode, text }, + { expectFinal: false }, + ), + ); + } + default: + throw new Error(`Unknown action: ${action}`); + } + }, + }; +} + +export function createClawdisTools(): AnyAgentTool[] { + return [createBrowserTool(), createCanvasTool(), createNodesTool(), createCronTool()]; +} diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 0514102bc..6d062448d 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -4,7 +4,7 @@ import path from "node:path"; import type { AppMessage } from "@mariozechner/pi-agent-core"; import type { AgentToolResult, AssistantMessage } from "@mariozechner/pi-ai"; -import { sanitizeContentBlocksImages } from "./pi-tools.js"; +import { sanitizeContentBlocksImages } from "./tool-images.js"; import type { WorkspaceBootstrapFile } from "./workspace.js"; export type EmbeddedContextFile = { path: string; content: string }; diff --git a/src/agents/pi-tools.ts b/src/agents/pi-tools.ts index b4ccadc50..1ed62ba4d 100644 --- a/src/agents/pi-tools.ts +++ b/src/agents/pi-tools.ts @@ -2,9 +2,10 @@ import type { AgentTool, AgentToolResult } from "@mariozechner/pi-ai"; import { bashTool, codingTools, readTool } from "@mariozechner/pi-coding-agent"; import { type TSchema, Type } from "@sinclair/typebox"; -import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js"; import { detectMime } from "../media/mime.js"; import { startWebLoginWithQr, waitForWebLogin } from "../web/login-qr.js"; +import { createClawdisTools } from "./clawdis-tools.js"; +import { sanitizeToolResultImages } from "./tool-images.js"; // TODO(steipete): Remove this wrapper once pi-mono ships file-magic MIME detection // for `read` image payloads in `@mariozechner/pi-coding-agent` (then switch back to `codingTools` directly). @@ -12,14 +13,6 @@ type ToolContentBlock = AgentToolResult["content"][number]; type ImageContentBlock = Extract; type TextContentBlock = Extract; -// Anthropic Messages API limitation (observed in Clawdis sessions): -// When sending many images in a single request (e.g. via session history + tool results), -// Anthropic rejects any image where *either* dimension exceeds 2000px. -// -// To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale -// all base64 image blocks above this limit while preserving aspect ratio. -const MAX_IMAGE_DIMENSION_PX = 2000; - async function sniffMimeFromBase64( base64: string, ): Promise { @@ -170,133 +163,6 @@ function createWhatsAppLoginTool(): AnyAgentTool { }; } -function isImageBlock(block: unknown): block is ImageContentBlock { - if (!block || typeof block !== "object") return false; - const rec = block as Record; - return ( - rec.type === "image" && - typeof rec.data === "string" && - typeof rec.mimeType === "string" - ); -} - -function isTextBlock(block: unknown): block is TextContentBlock { - if (!block || typeof block !== "object") return false; - const rec = block as Record; - return rec.type === "text" && typeof rec.text === "string"; -} - -async function resizeImageBase64IfNeeded(params: { - base64: string; - mimeType: string; - maxDimensionPx: number; -}): Promise<{ base64: string; mimeType: string; resized: boolean }> { - const buf = Buffer.from(params.base64, "base64"); - const meta = await getImageMetadata(buf); - const width = meta?.width; - const height = meta?.height; - if ( - typeof width !== "number" || - typeof height !== "number" || - (width <= params.maxDimensionPx && height <= params.maxDimensionPx) - ) { - return { base64: params.base64, mimeType: params.mimeType, resized: false }; - } - - const mime = params.mimeType.toLowerCase(); - let out: Buffer; - try { - const mod = (await import("sharp")) as unknown as { - default?: typeof import("sharp"); - }; - const sharp = mod.default ?? (mod as unknown as typeof import("sharp")); - const img = sharp(buf, { failOnError: false }).resize({ - width: params.maxDimensionPx, - height: params.maxDimensionPx, - fit: "inside", - withoutEnlargement: true, - }); - if (mime === "image/jpeg" || mime === "image/jpg") { - out = await img.jpeg({ quality: 85 }).toBuffer(); - } else if (mime === "image/webp") { - out = await img.webp({ quality: 85 }).toBuffer(); - } else if (mime === "image/png") { - out = await img.png().toBuffer(); - } else { - out = await img.png().toBuffer(); - } - } catch { - // Bun can't load sharp native addons. Fall back to a JPEG conversion. - out = await resizeToJpeg({ - buffer: buf, - maxSide: params.maxDimensionPx, - quality: 85, - withoutEnlargement: true, - }); - } - - const sniffed = await detectMime({ buffer: out.slice(0, 256) }); - const nextMime = sniffed?.startsWith("image/") ? sniffed : params.mimeType; - - return { base64: out.toString("base64"), mimeType: nextMime, resized: true }; -} - -export async function sanitizeContentBlocksImages( - blocks: ToolContentBlock[], - label: string, - opts: { maxDimensionPx?: number } = {}, -): Promise { - const maxDimensionPx = Math.max( - opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX, - 1, - ); - const out: ToolContentBlock[] = []; - - for (const block of blocks) { - if (!isImageBlock(block)) { - out.push(block); - continue; - } - - const data = block.data.trim(); - if (!data) { - out.push({ - type: "text", - text: `[${label}] omitted empty image payload`, - } satisfies TextContentBlock); - continue; - } - - try { - const resized = await resizeImageBase64IfNeeded({ - base64: data, - mimeType: block.mimeType, - maxDimensionPx, - }); - out.push({ ...block, data: resized.base64, mimeType: resized.mimeType }); - } catch (err) { - out.push({ - type: "text", - text: `[${label}] omitted image payload: ${String(err)}`, - } satisfies TextContentBlock); - } - } - - return out; -} - -export async function sanitizeToolResultImages( - result: AgentToolResult, - label: string, - opts: { maxDimensionPx?: number } = {}, -): Promise> { - const content = Array.isArray(result.content) ? result.content : []; - if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) return result; - - const next = await sanitizeContentBlocksImages(content, label, opts); - return { ...result, content: next }; -} - function createClawdisReadTool(base: AnyAgentTool): AnyAgentTool { return { ...base, @@ -340,5 +206,5 @@ export function createClawdisCodingTools(): AnyAgentTool[] { ? createClawdisBashTool(tool) : (tool as AnyAgentTool), ); - return [...base, createWhatsAppLoginTool()]; + return [...base, createWhatsAppLoginTool(), ...createClawdisTools()]; } diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts index 7e4ef6698..8cd22d26c 100644 --- a/src/agents/system-prompt.ts +++ b/src/agents/system-prompt.ts @@ -18,6 +18,10 @@ export function buildAgentSystemPromptAppend(params: { "- find: find files by glob pattern", "- ls: list directory contents", "- whatsapp_login: generate a WhatsApp QR code and wait for linking", + "- clawdis_browser: control clawd's dedicated browser", + "- clawdis_canvas: present/eval/snapshot the Canvas", + "- clawdis_nodes: list/describe/notify/camera/screen on paired nodes", + "- clawdis_cron: manage cron jobs and wake events", "TOOLS.md does not control tool availability; it is user guidance for how to use external tools.", "", "## Workspace", diff --git a/src/agents/tool-images.ts b/src/agents/tool-images.ts new file mode 100644 index 000000000..a4357b504 --- /dev/null +++ b/src/agents/tool-images.ts @@ -0,0 +1,143 @@ +import type { AgentToolResult } from "@mariozechner/pi-ai"; + +import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js"; +import { detectMime } from "../media/mime.js"; + +type ToolContentBlock = AgentToolResult["content"][number]; +type ImageContentBlock = Extract; +type TextContentBlock = Extract; + +// Anthropic Messages API limitation (observed in Clawdis sessions): +// When sending many images in a single request (e.g. via session history + tool results), +// Anthropic rejects any image where *either* dimension exceeds 2000px. +// +// To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale +// all base64 image blocks above this limit while preserving aspect ratio. +const MAX_IMAGE_DIMENSION_PX = 2000; + +function isImageBlock(block: unknown): block is ImageContentBlock { + if (!block || typeof block !== "object") return false; + const rec = block as Record; + return ( + rec.type === "image" && + typeof rec.data === "string" && + typeof rec.mimeType === "string" + ); +} + +function isTextBlock(block: unknown): block is TextContentBlock { + if (!block || typeof block !== "object") return false; + const rec = block as Record; + return rec.type === "text" && typeof rec.text === "string"; +} + +async function resizeImageBase64IfNeeded(params: { + base64: string; + mimeType: string; + maxDimensionPx: number; +}): Promise<{ base64: string; mimeType: string; resized: boolean }> { + const buf = Buffer.from(params.base64, "base64"); + const meta = await getImageMetadata(buf); + const width = meta?.width; + const height = meta?.height; + if ( + typeof width !== "number" || + typeof height !== "number" || + (width <= params.maxDimensionPx && height <= params.maxDimensionPx) + ) { + return { base64: params.base64, mimeType: params.mimeType, resized: false }; + } + + const mime = params.mimeType.toLowerCase(); + let out: Buffer; + try { + const mod = (await import("sharp")) as unknown as { + default?: typeof import("sharp"); + }; + const sharp = mod.default ?? (mod as unknown as typeof import("sharp")); + const img = sharp(buf, { failOnError: false }).resize({ + width: params.maxDimensionPx, + height: params.maxDimensionPx, + fit: "inside", + withoutEnlargement: true, + }); + if (mime === "image/jpeg" || mime === "image/jpg") { + out = await img.jpeg({ quality: 85 }).toBuffer(); + } else if (mime === "image/webp") { + out = await img.webp({ quality: 85 }).toBuffer(); + } else if (mime === "image/png") { + out = await img.png().toBuffer(); + } else { + out = await img.png().toBuffer(); + } + } catch { + // Bun can't load sharp native addons. Fall back to a JPEG conversion. + out = await resizeToJpeg({ + buffer: buf, + maxSide: params.maxDimensionPx, + quality: 85, + withoutEnlargement: true, + }); + } + + const sniffed = await detectMime({ buffer: out.slice(0, 256) }); + const nextMime = sniffed?.startsWith("image/") ? sniffed : params.mimeType; + + return { base64: out.toString("base64"), mimeType: nextMime, resized: true }; +} + +export async function sanitizeContentBlocksImages( + blocks: ToolContentBlock[], + label: string, + opts: { maxDimensionPx?: number } = {}, +): Promise { + const maxDimensionPx = Math.max( + opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX, + 1, + ); + const out: ToolContentBlock[] = []; + + for (const block of blocks) { + if (!isImageBlock(block)) { + out.push(block); + continue; + } + + const data = block.data.trim(); + if (!data) { + out.push({ + type: "text", + text: `[${label}] omitted empty image payload`, + } satisfies TextContentBlock); + continue; + } + + try { + const resized = await resizeImageBase64IfNeeded({ + base64: data, + mimeType: block.mimeType, + maxDimensionPx, + }); + out.push({ ...block, data: resized.base64, mimeType: resized.mimeType }); + } catch (err) { + out.push({ + type: "text", + text: `[${label}] omitted image payload: ${String(err)}`, + } satisfies TextContentBlock); + } + } + + return out; +} + +export async function sanitizeToolResultImages( + result: AgentToolResult, + label: string, + opts: { maxDimensionPx?: number } = {}, +): Promise> { + const content = Array.isArray(result.content) ? result.content : []; + if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) return result; + + const next = await sanitizeContentBlocksImages(content, label, opts); + return { ...result, content: next }; +}