From 57ee34839d407e1342bdfa2fa4df74ef29a45176 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 18 Dec 2025 02:05:26 +0000 Subject: [PATCH] CLI/docs: expose node metadata and commands --- docs/gateway.md | 4 ++ docs/ios/spec.md | 33 +++++++++-------- src/cli/nodes-cli.ts | 82 ++++++++++++++++++++++++++++++++++++----- src/cli/program.test.ts | 55 ++++++++++++++++++++++++++- 4 files changed, 147 insertions(+), 27 deletions(-) diff --git a/docs/gateway.md b/docs/gateway.md index 6c05ee620..ce9e01f80 100644 --- a/docs/gateway.md +++ b/docs/gateway.md @@ -51,6 +51,10 @@ pnpm clawdis gateway --force - `system-event` — post a presence/system note (structured). - `send` — send a message via the active provider(s). - `agent` — run an agent turn (streams events back on same connection). +- `node.list` — list paired + currently-connected bridge nodes (includes `caps`, `deviceFamily`, `modelIdentifier`, `paired`, `connected`, and advertised `commands`). +- `node.describe` — describe a node (capabilities + supported `node.invoke` commands; works for paired nodes and for currently-connected unpaired nodes). +- `node.invoke` — invoke a command on a node (e.g. `canvas.*`, `camera.*`). +- `node.pair.*` — pairing lifecycle (`request`, `list`, `approve`, `reject`, `verify`). See also: `docs/presence.md` for how presence is produced/deduped and why `instanceId` matters. diff --git a/docs/ios/spec.md b/docs/ios/spec.md index c8a1ae741..9deb2c465 100644 --- a/docs/ios/spec.md +++ b/docs/ios/spec.md @@ -1,11 +1,11 @@ --- -summary: "Plan for an iOS voice + screen (Canvas) node that connects via a secure Bonjour-discovered macOS bridge" +summary: "Plan for an iOS voice + canvas node that connects via a secure Bonjour-discovered macOS bridge" read_when: - Designing iOS node + gateway integration - - Extending the Gateway protocol for node/screen commands + - Extending the Gateway protocol for node/canvas commands - Implementing Bonjour pairing or transport security --- -# iOS Node (internal) — Voice Trigger + Screen/Canvas +# iOS Node (internal) — Voice Trigger + Canvas Status: prototype implemented (internal) · Date: 2025-12-13 @@ -14,13 +14,13 @@ Runbook (how to connect/pair + drive Canvas): `docs/ios/connect.md` ## Goals - Build an **iOS app** that acts as a **remote node** for Clawdis: - **Voice trigger** (wake-word / always-listening intent) that forwards transcripts to the Gateway `agent` method. - - **Screen/Canvas** surface that the agent can control: navigate, draw/render, evaluate JS, snapshot. + - **Canvas** surface that the agent can control: navigate, draw/render, evaluate JS, snapshot. - **Dead-simple setup**: - Auto-discover the host on the local network via **Bonjour**. - One-tap pairing with an approval prompt on the Mac. - iOS is **never** a local gateway; it is always a remote node. - Operational clarity: - - When iOS is backgrounded, voice may still run; **screen/canvas commands must fail fast** with a structured error. + - When iOS is backgrounded, voice may still run; **canvas commands must fail fast** with a structured error. - Provide **settings**: node display name, enable/disable voice wake, pairing status. Non-goals (v1): @@ -41,7 +41,7 @@ Keep the Node gateway loopback-only; expose a dedicated **gateway-owned bridge** Why: - Preserves current threat model: Gateway remains local-only. - Centralizes auth, rate limiting, and allowlisting in the bridge. -- Lets us unify “screen node” semantics across mac + iOS without exposing raw gateway methods. +- Lets us unify “canvas node” semantics across mac + iOS without exposing raw gateway methods. ## Security plan (internal, but still robust) ### Transport @@ -84,7 +84,7 @@ The bridge must not be a raw proxy to every gateway method. - Allow by default: - `agent` (with guardrails; idempotency required) - minimal `system-event` beacons (presence updates for the node) - - node/screen methods defined below (new protocol surface) + - node/canvas methods defined below (new protocol surface) - Deny by default: - anything that widens control without explicit intent (future “shell”, “files”, etc.) - Rate limit: @@ -92,7 +92,7 @@ The bridge must not be a raw proxy to every gateway method. - voice forwards per minute - snapshot frequency / payload size -## Protocol unification: add “node/screen” to Gateway protocol +## Protocol unification: add “node/canvas” to Gateway protocol ### Principle Unify mac Canvas + iOS Canvas under a single conceptual surface: - The agent talks to the Gateway using a stable method set (typed protocol). @@ -108,14 +108,15 @@ Add to `src/gateway/protocol/schema.ts` (and regenerate Swift models): **Methods** - `node.list` → list paired/connected nodes + capabilities +- `node.describe` → describe a node (capabilities + supported `node.invoke` commands) - `node.invoke` → send a command to a specific node - Params: `{ nodeId, command, params?, timeoutMs? }` **Events** - `node.event` → async node status/errors - - e.g. background/foreground transitions, voice availability, screen availability + - e.g. background/foreground transitions, voice availability, canvas availability -### Node command set (screen-focused) +### Node command set (canvas) These are values for `node.invoke.command`: - `canvas.show` / `canvas.hide` - `canvas.navigate` with `{ url }` (Canvas URL or https URL) @@ -153,8 +154,8 @@ When iOS is backgrounded: - `BridgeConnection`: TCP session + pairing handshake + reconnect (TLS planned) - `NodeRuntime`: - Voice pipeline (wake-word + capture + forward) - - Screen pipeline (WKWebView controller + snapshot + eval) - - Background state tracking; enforces “screen unavailable in background” + - Canvas pipeline (WKWebView controller + snapshot + eval) + - Background state tracking; enforces “canvas unavailable in background” ### Voice in background (internal) - Enable background audio mode (and required session configuration) so the mic pipeline can keep running when the user switches apps. @@ -164,7 +165,7 @@ When iOS is backgrounded: Create/expand SwiftPM targets so both apps share: - `ClawdisProtocol` (generated models; platform-neutral) - `ClawdisGatewayClient` (shared WS framing + connect/req/res + seq-gap handling) -- `ClawdisKit` (node/screen command types + deep links + shared utilities) +- `ClawdisKit` (node/canvas command types + deep links + shared utilities) macOS continues to own: - local Canvas implementation details (custom scheme handler serving on-disk HTML, window/panel presentation) @@ -217,8 +218,8 @@ open Clawdis.xcodeproj 3) **Protocol additions for nodes** - Add `node.list` / `node.invoke` / `node.event` to Gateway - Implement bridge routing + ACLs -4) **iOS screen/canvas** - - WKWebView screen surface +4) **iOS canvas** + - WKWebView canvas surface - `canvas.navigate/eval/snapshot` - Background fast-fail for `canvas.*` 5) **Unify mac Canvas under the same node.invoke** @@ -226,5 +227,5 @@ open Clawdis.xcodeproj ## Open questions - Should `connect.params.client.mode` be `"node"` with `platform="ios ..."` or a distinct mode `"ios-node"`? (Presence filtering currently excludes `"cli"` only.) -- Do we want a “permissions” model per node (voice only vs voice+screen) at pairing time? +- Do we want a “permissions” model per node (voice only vs voice+canvas) at pairing time? - Should “website mode” allow arbitrary https, or enforce an allowlist to reduce risk? diff --git a/src/cli/nodes-cli.ts b/src/cli/nodes-cli.ts index 8306cb465..dd233cd21 100644 --- a/src/cli/nodes-cli.ts +++ b/src/cli/nodes-cli.ts @@ -35,6 +35,8 @@ type NodeListNode = { deviceFamily?: string; modelIdentifier?: string; caps?: string[]; + commands?: string[]; + paired?: boolean; connected?: boolean; }; @@ -183,18 +185,25 @@ export function registerNodesCli(program: Command) { nodesCallOpts( nodes .command("status") - .description("List paired nodes with connection status and capabilities") + .description("List known nodes with connection status and capabilities") .action(async (opts: NodesRpcOpts) => { try { - const result = (await callGatewayCli("node.list", opts, {})) as unknown; + const result = (await callGatewayCli( + "node.list", + opts, + {}, + )) as unknown; if (opts.json) { defaultRuntime.log(JSON.stringify(result, null, 2)); return; } const nodes = parseNodeList(result); - const connectedCount = nodes.filter((n) => Boolean(n.connected)).length; + const pairedCount = nodes.filter((n) => Boolean(n.paired)).length; + const connectedCount = nodes.filter((n) => + Boolean(n.connected), + ).length; defaultRuntime.log( - `Paired: ${nodes.length} · Connected: ${connectedCount}`, + `Known: ${nodes.length} · Paired: ${pairedCount} · Connected: ${connectedCount}`, ); for (const n of nodes) { const name = n.displayName || n.nodeId; @@ -207,8 +216,9 @@ export function registerNodesCli(program: Command) { : Array.isArray(n.caps) ? "[]" : "?"; + const pairing = n.paired ? "paired" : "unpaired"; defaultRuntime.log( - `- ${name} · ${n.nodeId}${ip}${device}${hw} · ${n.connected ? "connected" : "disconnected"} · caps: ${caps}`, + `- ${name} · ${n.nodeId}${ip}${device}${hw} · ${pairing} · ${n.connected ? "connected" : "disconnected"} · caps: ${caps}`, ); } } catch (err) { @@ -218,6 +228,63 @@ export function registerNodesCli(program: Command) { }), ); + nodesCallOpts( + nodes + .command("describe") + .description("Describe a node (capabilities + supported invoke commands)") + .requiredOption("--node ", "Node id, name, or IP") + .action(async (opts: NodesRpcOpts) => { + try { + const nodeId = await resolveNodeId(opts, String(opts.node ?? "")); + const result = (await callGatewayCli("node.describe", opts, { + nodeId, + })) as unknown; + if (opts.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + + const obj = + typeof result === "object" && result !== null + ? (result as Record) + : {}; + const displayName = + typeof obj.displayName === "string" ? obj.displayName : nodeId; + const connected = Boolean(obj.connected); + const caps = Array.isArray(obj.caps) + ? obj.caps.map(String).filter(Boolean).sort() + : null; + const commands = Array.isArray(obj.commands) + ? obj.commands.map(String).filter(Boolean).sort() + : []; + const family = + typeof obj.deviceFamily === "string" ? obj.deviceFamily : null; + const model = + typeof obj.modelIdentifier === "string" + ? obj.modelIdentifier + : null; + const ip = typeof obj.remoteIp === "string" ? obj.remoteIp : null; + + const parts: string[] = ["Node:", displayName, nodeId]; + if (ip) parts.push(ip); + if (family) parts.push(`device: ${family}`); + if (model) parts.push(`hw: ${model}`); + parts.push(connected ? "connected" : "disconnected"); + parts.push(`caps: ${caps ? `[${caps.join(",")}]` : "?"}`); + defaultRuntime.log(parts.join(" · ")); + defaultRuntime.log("Commands:"); + if (commands.length === 0) { + defaultRuntime.log("- (none reported)"); + return; + } + for (const c of commands) defaultRuntime.log(`- ${c}`); + } catch (err) { + defaultRuntime.error(`nodes describe failed: ${String(err)}`); + defaultRuntime.exit(1); + } + }), + ); + nodesCallOpts( nodes .command("list") @@ -345,10 +412,7 @@ export function registerNodesCli(program: Command) { .command("invoke") .description("Invoke a command on a paired node") .requiredOption("--node ", "Node id, name, or IP") - .requiredOption( - "--command ", - "Command (e.g. canvas.eval)", - ) + .requiredOption("--command ", "Command (e.g. canvas.eval)") .option("--params ", "JSON object string for params", "{}") .option( "--invoke-timeout ", diff --git a/src/cli/program.test.ts b/src/cli/program.test.ts index aae37e730..2a55c90d6 100644 --- a/src/cli/program.test.ts +++ b/src/cli/program.test.ts @@ -73,6 +73,7 @@ describe("cli program", () => { deviceFamily: "iPad", modelIdentifier: "iPad16,6", caps: ["canvas", "camera"], + paired: true, connected: true, }, ], @@ -85,14 +86,64 @@ describe("cli program", () => { expect.objectContaining({ method: "node.list", params: {} }), ); - const output = runtime.log.mock.calls.map((c) => String(c[0] ?? "")).join("\n"); - expect(output).toContain("Paired: 1 · Connected: 1"); + const output = runtime.log.mock.calls + .map((c) => String(c[0] ?? "")) + .join("\n"); + expect(output).toContain("Known: 1 · Paired: 1 · Connected: 1"); expect(output).toContain("iOS Node"); expect(output).toContain("device: iPad"); expect(output).toContain("hw: iPad16,6"); + expect(output).toContain("paired"); expect(output).toContain("caps: [camera,canvas]"); }); + it("runs nodes describe and calls node.describe", async () => { + callGateway + .mockResolvedValueOnce({ + ts: Date.now(), + nodes: [ + { + nodeId: "ios-node", + displayName: "iOS Node", + remoteIp: "192.168.0.88", + connected: true, + }, + ], + }) + .mockResolvedValueOnce({ + ts: Date.now(), + nodeId: "ios-node", + displayName: "iOS Node", + caps: ["canvas", "camera"], + commands: ["canvas.eval", "canvas.snapshot", "camera.snap"], + connected: true, + }); + + const program = buildProgram(); + runtime.log.mockClear(); + await program.parseAsync(["nodes", "describe", "--node", "ios-node"], { + from: "user", + }); + + expect(callGateway).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ method: "node.list", params: {} }), + ); + expect(callGateway).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + method: "node.describe", + params: { nodeId: "ios-node" }, + }), + ); + + const out = runtime.log.mock.calls + .map((c) => String(c[0] ?? "")) + .join("\n"); + expect(out).toContain("Commands:"); + expect(out).toContain("canvas.eval"); + }); + it("runs nodes approve and calls node.pair.approve", async () => { callGateway.mockResolvedValue({ requestId: "r1",