From 95ea67de289210854088c1f7bea90ab5283ea766 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 19 Dec 2025 02:33:43 +0100 Subject: [PATCH] feat: add mac node screen recording and ssh tunnel --- .../NodeMode/MacNodeModeCoordinator.swift | 41 ++++++- .../Clawdis/NodeMode/MacNodeRuntime.swift | 26 +++++ .../NodeMode/MacNodeScreenCommands.swift | 12 +++ .../Sources/ClawdisKit/ScreenCommands.swift | 24 +++++ docs/nodes.md | 13 +++ src/cli/nodes-cli.ts | 100 ++++++++++++++++++ src/cli/nodes-screen.test.ts | 38 +++++++ src/cli/nodes-screen.ts | 58 ++++++++++ 8 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 apps/macos/Sources/Clawdis/NodeMode/MacNodeScreenCommands.swift create mode 100644 apps/shared/ClawdisKit/Sources/ClawdisKit/ScreenCommands.swift create mode 100644 src/cli/nodes-screen.test.ts create mode 100644 src/cli/nodes-screen.ts diff --git a/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift b/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift index 8627b6c3c..41b93b5c1 100644 --- a/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift +++ b/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift @@ -11,6 +11,7 @@ final class MacNodeModeCoordinator { private var task: Task? private let runtime = MacNodeRuntime() private let session = MacNodeBridgeSession() + private var tunnel: RemotePortTunnel? func start() { guard self.task == nil else { return } @@ -23,17 +24,30 @@ final class MacNodeModeCoordinator { self.task?.cancel() self.task = nil Task { await self.session.disconnect() } + self.tunnel?.terminate() + self.tunnel = nil } private func run() async { var retryDelay: UInt64 = 1_000_000_000 + var lastCameraEnabled: Bool? = nil + let defaults = UserDefaults.standard while !Task.isCancelled { if await MainActor.run(body: { AppStateStore.shared.isPaused }) { try? await Task.sleep(nanoseconds: 1_000_000_000) continue } - guard let endpoint = await Self.discoverBridgeEndpoint(timeoutSeconds: 5) else { + let cameraEnabled = defaults.object(forKey: cameraEnabledKey) as? Bool ?? false + if lastCameraEnabled == nil { + lastCameraEnabled = cameraEnabled + } else if lastCameraEnabled != cameraEnabled { + lastCameraEnabled = cameraEnabled + await self.session.disconnect() + try? await Task.sleep(nanoseconds: 200_000_000) + } + + guard let endpoint = await self.resolveBridgeEndpoint(timeoutSeconds: 5) else { try? await Task.sleep(nanoseconds: min(retryDelay, 5_000_000_000)) retryDelay = min(retryDelay * 2, 10_000_000_000) continue @@ -101,6 +115,7 @@ final class MacNodeModeCoordinator { ClawdisCanvasA2UICommand.push.rawValue, ClawdisCanvasA2UICommand.pushJSONL.rawValue, ClawdisCanvasA2UICommand.reset.rawValue, + MacNodeScreenCommand.record.rawValue, ] let capsSet = Set(caps) @@ -138,6 +153,30 @@ final class MacNodeModeCoordinator { "mac-\(InstanceIdentity.instanceId)" } + private func resolveBridgeEndpoint(timeoutSeconds: Double) async -> NWEndpoint? { + let mode = await MainActor.run(body: { AppStateStore.shared.connectionMode }) + if mode == .remote { + do { + if self.tunnel == nil || self.tunnel?.process.isRunning == false { + self.tunnel = try await RemotePortTunnel.create(remotePort: 18790) + } + if let localPort = self.tunnel?.localPort, + let port = NWEndpoint.Port(rawValue: localPort) + { + return .hostPort(host: "127.0.0.1", port: port) + } + } catch { + self.logger.error("mac node bridge tunnel failed: \(error.localizedDescription, privacy: .public)") + self.tunnel?.terminate() + self.tunnel = nil + } + } else if let tunnel = self.tunnel { + tunnel.terminate() + self.tunnel = nil + } + return await Self.discoverBridgeEndpoint(timeoutSeconds: timeoutSeconds) + } + private static func discoverBridgeEndpoint(timeoutSeconds: Double) async -> NWEndpoint? { final class DiscoveryState: @unchecked Sendable { let lock = NSLock() diff --git a/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift b/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift index 72e690c7a..157e0a62f 100644 --- a/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift +++ b/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift @@ -5,6 +5,7 @@ import Foundation actor MacNodeRuntime { private let cameraCapture = CameraCaptureService() + @MainActor private let screenRecorder = ScreenRecordService() func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse { let command = req.command @@ -115,6 +116,31 @@ actor MacNodeRuntime { hasAudio: res.hasAudio)) return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + case MacNodeScreenCommand.record.rawValue: + let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ?? + MacNodeScreenRecordParams() + let path = try await self.screenRecorder.record( + screenIndex: params.screenIndex, + durationMs: params.durationMs, + fps: params.fps, + outPath: nil) + defer { try? FileManager.default.removeItem(atPath: path) } + let data = try Data(contentsOf: URL(fileURLWithPath: path)) + struct ScreenPayload: Encodable { + var format: String + var base64: String + var durationMs: Int? + var fps: Double? + var screenIndex: Int? + } + let payload = try Self.encodePayload(ScreenPayload( + format: params.format ?? "mp4", + base64: data.base64EncodedString(), + durationMs: params.durationMs, + fps: params.fps, + screenIndex: params.screenIndex)) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + default: return Self.errorResponse(req, code: .invalidRequest, message: "INVALID_REQUEST: unknown command") } diff --git a/apps/macos/Sources/Clawdis/NodeMode/MacNodeScreenCommands.swift b/apps/macos/Sources/Clawdis/NodeMode/MacNodeScreenCommands.swift new file mode 100644 index 000000000..31a210edf --- /dev/null +++ b/apps/macos/Sources/Clawdis/NodeMode/MacNodeScreenCommands.swift @@ -0,0 +1,12 @@ +import Foundation + +enum MacNodeScreenCommand: String, Codable, Sendable { + case record = "screen.record" +} + +struct MacNodeScreenRecordParams: Codable, Sendable, Equatable { + var screenIndex: Int? + var durationMs: Int? + var fps: Double? + var format: String? +} diff --git a/apps/shared/ClawdisKit/Sources/ClawdisKit/ScreenCommands.swift b/apps/shared/ClawdisKit/Sources/ClawdisKit/ScreenCommands.swift new file mode 100644 index 000000000..5e84446de --- /dev/null +++ b/apps/shared/ClawdisKit/Sources/ClawdisKit/ScreenCommands.swift @@ -0,0 +1,24 @@ +import Foundation + +public enum ClawdisScreenCommand: String, Codable, Sendable { + case record = "screen.record" +} + +public struct ClawdisScreenRecordParams: Codable, Sendable, Equatable { + public var screenIndex: Int? + public var durationMs: Int? + public var fps: Double? + public var format: String? + + public init( + screenIndex: Int? = nil, + durationMs: Int? = nil, + fps: Double? = nil, + format: String? = nil) + { + self.screenIndex = screenIndex + self.durationMs = durationMs + self.fps = fps + self.format = format + } +} diff --git a/docs/nodes.md b/docs/nodes.md index 0b9c6ea2b..52fa095f3 100644 --- a/docs/nodes.md +++ b/docs/nodes.md @@ -68,6 +68,19 @@ Notes: - Clip duration is clamped (currently `<= 60s`) to avoid oversized base64 payloads. - Android will prompt for `CAMERA`/`RECORD_AUDIO` permissions when possible; denied permissions fail with `*_PERMISSION_REQUIRED`. +## Screen recordings (mac node) + +Mac node mode exposes `screen.record` (mp4). Example: + +```bash +clawdis nodes screen record --node --duration 10s --fps 10 +``` + +## Mac node mode + +- The macOS menubar app connects to the Gateway bridge as a node (so `clawdis nodes …` works against this Mac). +- In remote mode, the app opens an SSH tunnel for the bridge port and connects to `localhost`. + ## Where to look in code - CLI wiring: `src/cli/nodes-cli.ts` diff --git a/src/cli/nodes-cli.ts b/src/cli/nodes-cli.ts index b77eda7ad..8a140bc20 100644 --- a/src/cli/nodes-cli.ts +++ b/src/cli/nodes-cli.ts @@ -12,6 +12,11 @@ import { canvasSnapshotTempPath, parseCanvasSnapshotPayload, } from "./nodes-canvas.js"; +import { + parseScreenRecordPayload, + screenRecordTempPath, + writeScreenRecordToFile, +} from "./nodes-screen.js"; import { parseDurationMs } from "./parse-duration.js"; type NodesRpcOpts = { @@ -29,6 +34,8 @@ type NodesRpcOpts = { maxWidth?: string; quality?: string; duration?: string; + screen?: string; + fps?: string; audio?: boolean; }; @@ -760,4 +767,97 @@ export function registerNodesCli(program: Command) { }), { timeoutMs: 90_000 }, ); + + const screen = nodes + .command("screen") + .description("Capture screen recordings from a paired node"); + + nodesCallOpts( + screen + .command("record") + .description( + "Capture a short screen recording from a node (prints MEDIA:)", + ) + .requiredOption("--node ", "Node id, name, or IP") + .option("--screen ", "Screen index (0 = primary)", "0") + .option("--duration ", "Clip duration (ms or 10s)", "10000") + .option("--fps ", "Frames per second", "10") + .option("--out ", "Output path") + .option( + "--invoke-timeout ", + "Node invoke timeout in ms (default 120000)", + "120000", + ) + .action(async (opts: NodesRpcOpts & { out?: string }) => { + try { + const nodeId = await resolveNodeId(opts, String(opts.node ?? "")); + const durationMs = parseDurationMs(opts.duration ?? ""); + const screenIndex = Number.parseInt(String(opts.screen ?? "0"), 10); + const fps = Number.parseFloat(String(opts.fps ?? "10")); + const timeoutMs = opts.invokeTimeout + ? Number.parseInt(String(opts.invokeTimeout), 10) + : undefined; + + const invokeParams: Record = { + nodeId, + command: "screen.record", + params: { + durationMs: Number.isFinite(durationMs) ? durationMs : undefined, + screenIndex: Number.isFinite(screenIndex) + ? screenIndex + : undefined, + fps: Number.isFinite(fps) ? fps : undefined, + format: "mp4", + }, + idempotencyKey: randomIdempotencyKey(), + }; + if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) { + invokeParams.timeoutMs = timeoutMs; + } + + const raw = (await callGatewayCli( + "node.invoke", + opts, + invokeParams, + )) as unknown; + const res = + typeof raw === "object" && raw !== null + ? (raw as { payload?: unknown }) + : {}; + const parsed = parseScreenRecordPayload(res.payload); + const filePath = + opts.out ?? + screenRecordTempPath({ + ext: parsed.format || "mp4", + }); + const written = await writeScreenRecordToFile( + filePath, + parsed.base64, + ); + + if (opts.json) { + defaultRuntime.log( + JSON.stringify( + { + file: { + path: written.path, + durationMs: parsed.durationMs, + fps: parsed.fps, + screenIndex: parsed.screenIndex, + }, + }, + null, + 2, + ), + ); + return; + } + defaultRuntime.log(`MEDIA:${written.path}`); + } catch (err) { + defaultRuntime.error(`nodes screen record failed: ${String(err)}`); + defaultRuntime.exit(1); + } + }), + { timeoutMs: 180_000 }, + ); } diff --git a/src/cli/nodes-screen.test.ts b/src/cli/nodes-screen.test.ts new file mode 100644 index 000000000..7e13b29d6 --- /dev/null +++ b/src/cli/nodes-screen.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from "vitest"; + +import { + parseScreenRecordPayload, + screenRecordTempPath, +} from "./nodes-screen.js"; + +describe("nodes screen helpers", () => { + it("parses screen.record payload", () => { + const payload = parseScreenRecordPayload({ + format: "mp4", + base64: "Zm9v", + durationMs: 1000, + fps: 12, + screenIndex: 0, + }); + expect(payload.format).toBe("mp4"); + expect(payload.base64).toBe("Zm9v"); + expect(payload.durationMs).toBe(1000); + expect(payload.fps).toBe(12); + expect(payload.screenIndex).toBe(0); + }); + + it("rejects invalid screen.record payload", () => { + expect(() => parseScreenRecordPayload({ format: "mp4" })).toThrow( + /invalid screen\.record payload/i, + ); + }); + + it("builds screen record temp path", () => { + const p = screenRecordTempPath({ + ext: "mp4", + tmpDir: "/tmp", + id: "id1", + }); + expect(p).toBe("/tmp/clawdis-screen-record-id1.mp4"); + }); +}); diff --git a/src/cli/nodes-screen.ts b/src/cli/nodes-screen.ts new file mode 100644 index 000000000..84ac85991 --- /dev/null +++ b/src/cli/nodes-screen.ts @@ -0,0 +1,58 @@ +import { randomUUID } from "node:crypto"; +import * as os from "node:os"; +import * as path from "node:path"; + +import { writeBase64ToFile } from "./nodes-camera.js"; + +export type ScreenRecordPayload = { + format: string; + base64: string; + durationMs?: number; + fps?: number; + screenIndex?: number; +}; + +function asRecord(value: unknown): Record { + return typeof value === "object" && value !== null + ? (value as Record) + : {}; +} + +function asString(value: unknown): string | undefined { + return typeof value === "string" ? value : undefined; +} + +export function parseScreenRecordPayload(value: unknown): ScreenRecordPayload { + const obj = asRecord(value); + const format = asString(obj.format); + const base64 = asString(obj.base64); + if (!format || !base64) { + throw new Error("invalid screen.record payload"); + } + return { + format, + base64, + durationMs: typeof obj.durationMs === "number" ? obj.durationMs : undefined, + fps: typeof obj.fps === "number" ? obj.fps : undefined, + screenIndex: + typeof obj.screenIndex === "number" ? obj.screenIndex : undefined, + }; +} + +export function screenRecordTempPath(opts: { + ext: string; + tmpDir?: string; + id?: string; +}) { + const tmpDir = opts.tmpDir ?? os.tmpdir(); + const id = opts.id ?? randomUUID(); + const ext = opts.ext.startsWith(".") ? opts.ext : `.${opts.ext}`; + return path.join(tmpDir, `clawdis-screen-record-${id}${ext}`); +} + +export async function writeScreenRecordToFile( + filePath: string, + base64: string, +) { + return writeBase64ToFile(filePath, base64); +}