feat: add mac node screen recording and ssh tunnel

This commit is contained in:
Peter Steinberger
2025-12-19 02:33:43 +01:00
parent 1fbd84da39
commit 95ea67de28
8 changed files with 311 additions and 1 deletions

View File

@@ -11,6 +11,7 @@ final class MacNodeModeCoordinator {
private var task: Task<Void, Never>?
private let runtime = MacNodeRuntime()
private let session = MacNodeBridgeSession()
private var tunnel: RemotePortTunnel?
func start() {
guard self.task == nil else { return }
@@ -23,17 +24,30 @@ final class MacNodeModeCoordinator {
self.task?.cancel()
self.task = nil
Task { await self.session.disconnect() }
self.tunnel?.terminate()
self.tunnel = nil
}
private func run() async {
var retryDelay: UInt64 = 1_000_000_000
var lastCameraEnabled: Bool? = nil
let defaults = UserDefaults.standard
while !Task.isCancelled {
if await MainActor.run(body: { AppStateStore.shared.isPaused }) {
try? await Task.sleep(nanoseconds: 1_000_000_000)
continue
}
guard let endpoint = await Self.discoverBridgeEndpoint(timeoutSeconds: 5) else {
let cameraEnabled = defaults.object(forKey: cameraEnabledKey) as? Bool ?? false
if lastCameraEnabled == nil {
lastCameraEnabled = cameraEnabled
} else if lastCameraEnabled != cameraEnabled {
lastCameraEnabled = cameraEnabled
await self.session.disconnect()
try? await Task.sleep(nanoseconds: 200_000_000)
}
guard let endpoint = await self.resolveBridgeEndpoint(timeoutSeconds: 5) else {
try? await Task.sleep(nanoseconds: min(retryDelay, 5_000_000_000))
retryDelay = min(retryDelay * 2, 10_000_000_000)
continue
@@ -101,6 +115,7 @@ final class MacNodeModeCoordinator {
ClawdisCanvasA2UICommand.push.rawValue,
ClawdisCanvasA2UICommand.pushJSONL.rawValue,
ClawdisCanvasA2UICommand.reset.rawValue,
MacNodeScreenCommand.record.rawValue,
]
let capsSet = Set(caps)
@@ -138,6 +153,30 @@ final class MacNodeModeCoordinator {
"mac-\(InstanceIdentity.instanceId)"
}
private func resolveBridgeEndpoint(timeoutSeconds: Double) async -> NWEndpoint? {
let mode = await MainActor.run(body: { AppStateStore.shared.connectionMode })
if mode == .remote {
do {
if self.tunnel == nil || self.tunnel?.process.isRunning == false {
self.tunnel = try await RemotePortTunnel.create(remotePort: 18790)
}
if let localPort = self.tunnel?.localPort,
let port = NWEndpoint.Port(rawValue: localPort)
{
return .hostPort(host: "127.0.0.1", port: port)
}
} catch {
self.logger.error("mac node bridge tunnel failed: \(error.localizedDescription, privacy: .public)")
self.tunnel?.terminate()
self.tunnel = nil
}
} else if let tunnel = self.tunnel {
tunnel.terminate()
self.tunnel = nil
}
return await Self.discoverBridgeEndpoint(timeoutSeconds: timeoutSeconds)
}
private static func discoverBridgeEndpoint(timeoutSeconds: Double) async -> NWEndpoint? {
final class DiscoveryState: @unchecked Sendable {
let lock = NSLock()

View File

@@ -5,6 +5,7 @@ import Foundation
actor MacNodeRuntime {
private let cameraCapture = CameraCaptureService()
@MainActor private let screenRecorder = ScreenRecordService()
func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse {
let command = req.command
@@ -115,6 +116,31 @@ actor MacNodeRuntime {
hasAudio: res.hasAudio))
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case MacNodeScreenCommand.record.rawValue:
let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ??
MacNodeScreenRecordParams()
let path = try await self.screenRecorder.record(
screenIndex: params.screenIndex,
durationMs: params.durationMs,
fps: params.fps,
outPath: nil)
defer { try? FileManager.default.removeItem(atPath: path) }
let data = try Data(contentsOf: URL(fileURLWithPath: path))
struct ScreenPayload: Encodable {
var format: String
var base64: String
var durationMs: Int?
var fps: Double?
var screenIndex: Int?
}
let payload = try Self.encodePayload(ScreenPayload(
format: params.format ?? "mp4",
base64: data.base64EncodedString(),
durationMs: params.durationMs,
fps: params.fps,
screenIndex: params.screenIndex))
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
default:
return Self.errorResponse(req, code: .invalidRequest, message: "INVALID_REQUEST: unknown command")
}

View File

@@ -0,0 +1,12 @@
import Foundation
enum MacNodeScreenCommand: String, Codable, Sendable {
case record = "screen.record"
}
struct MacNodeScreenRecordParams: Codable, Sendable, Equatable {
var screenIndex: Int?
var durationMs: Int?
var fps: Double?
var format: String?
}

View File

@@ -0,0 +1,24 @@
import Foundation
public enum ClawdisScreenCommand: String, Codable, Sendable {
case record = "screen.record"
}
public struct ClawdisScreenRecordParams: Codable, Sendable, Equatable {
public var screenIndex: Int?
public var durationMs: Int?
public var fps: Double?
public var format: String?
public init(
screenIndex: Int? = nil,
durationMs: Int? = nil,
fps: Double? = nil,
format: String? = nil)
{
self.screenIndex = screenIndex
self.durationMs = durationMs
self.fps = fps
self.format = format
}
}

View File

@@ -68,6 +68,19 @@ Notes:
- Clip duration is clamped (currently `<= 60s`) to avoid oversized base64 payloads.
- Android will prompt for `CAMERA`/`RECORD_AUDIO` permissions when possible; denied permissions fail with `*_PERMISSION_REQUIRED`.
## Screen recordings (mac node)
Mac node mode exposes `screen.record` (mp4). Example:
```bash
clawdis nodes screen record --node <idOrNameOrIp> --duration 10s --fps 10
```
## Mac node mode
- The macOS menubar app connects to the Gateway bridge as a node (so `clawdis nodes …` works against this Mac).
- In remote mode, the app opens an SSH tunnel for the bridge port and connects to `localhost`.
## Where to look in code
- CLI wiring: `src/cli/nodes-cli.ts`

View File

@@ -12,6 +12,11 @@ import {
canvasSnapshotTempPath,
parseCanvasSnapshotPayload,
} from "./nodes-canvas.js";
import {
parseScreenRecordPayload,
screenRecordTempPath,
writeScreenRecordToFile,
} from "./nodes-screen.js";
import { parseDurationMs } from "./parse-duration.js";
type NodesRpcOpts = {
@@ -29,6 +34,8 @@ type NodesRpcOpts = {
maxWidth?: string;
quality?: string;
duration?: string;
screen?: string;
fps?: string;
audio?: boolean;
};
@@ -760,4 +767,97 @@ export function registerNodesCli(program: Command) {
}),
{ timeoutMs: 90_000 },
);
const screen = nodes
.command("screen")
.description("Capture screen recordings from a paired node");
nodesCallOpts(
screen
.command("record")
.description(
"Capture a short screen recording from a node (prints MEDIA:<path>)",
)
.requiredOption("--node <idOrNameOrIp>", "Node id, name, or IP")
.option("--screen <index>", "Screen index (0 = primary)", "0")
.option("--duration <ms|10s>", "Clip duration (ms or 10s)", "10000")
.option("--fps <fps>", "Frames per second", "10")
.option("--out <path>", "Output path")
.option(
"--invoke-timeout <ms>",
"Node invoke timeout in ms (default 120000)",
"120000",
)
.action(async (opts: NodesRpcOpts & { out?: string }) => {
try {
const nodeId = await resolveNodeId(opts, String(opts.node ?? ""));
const durationMs = parseDurationMs(opts.duration ?? "");
const screenIndex = Number.parseInt(String(opts.screen ?? "0"), 10);
const fps = Number.parseFloat(String(opts.fps ?? "10"));
const timeoutMs = opts.invokeTimeout
? Number.parseInt(String(opts.invokeTimeout), 10)
: undefined;
const invokeParams: Record<string, unknown> = {
nodeId,
command: "screen.record",
params: {
durationMs: Number.isFinite(durationMs) ? durationMs : undefined,
screenIndex: Number.isFinite(screenIndex)
? screenIndex
: undefined,
fps: Number.isFinite(fps) ? fps : undefined,
format: "mp4",
},
idempotencyKey: randomIdempotencyKey(),
};
if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) {
invokeParams.timeoutMs = timeoutMs;
}
const raw = (await callGatewayCli(
"node.invoke",
opts,
invokeParams,
)) as unknown;
const res =
typeof raw === "object" && raw !== null
? (raw as { payload?: unknown })
: {};
const parsed = parseScreenRecordPayload(res.payload);
const filePath =
opts.out ??
screenRecordTempPath({
ext: parsed.format || "mp4",
});
const written = await writeScreenRecordToFile(
filePath,
parsed.base64,
);
if (opts.json) {
defaultRuntime.log(
JSON.stringify(
{
file: {
path: written.path,
durationMs: parsed.durationMs,
fps: parsed.fps,
screenIndex: parsed.screenIndex,
},
},
null,
2,
),
);
return;
}
defaultRuntime.log(`MEDIA:${written.path}`);
} catch (err) {
defaultRuntime.error(`nodes screen record failed: ${String(err)}`);
defaultRuntime.exit(1);
}
}),
{ timeoutMs: 180_000 },
);
}

View File

@@ -0,0 +1,38 @@
import { describe, expect, it } from "vitest";
import {
parseScreenRecordPayload,
screenRecordTempPath,
} from "./nodes-screen.js";
describe("nodes screen helpers", () => {
it("parses screen.record payload", () => {
const payload = parseScreenRecordPayload({
format: "mp4",
base64: "Zm9v",
durationMs: 1000,
fps: 12,
screenIndex: 0,
});
expect(payload.format).toBe("mp4");
expect(payload.base64).toBe("Zm9v");
expect(payload.durationMs).toBe(1000);
expect(payload.fps).toBe(12);
expect(payload.screenIndex).toBe(0);
});
it("rejects invalid screen.record payload", () => {
expect(() => parseScreenRecordPayload({ format: "mp4" })).toThrow(
/invalid screen\.record payload/i,
);
});
it("builds screen record temp path", () => {
const p = screenRecordTempPath({
ext: "mp4",
tmpDir: "/tmp",
id: "id1",
});
expect(p).toBe("/tmp/clawdis-screen-record-id1.mp4");
});
});

58
src/cli/nodes-screen.ts Normal file
View File

@@ -0,0 +1,58 @@
import { randomUUID } from "node:crypto";
import * as os from "node:os";
import * as path from "node:path";
import { writeBase64ToFile } from "./nodes-camera.js";
export type ScreenRecordPayload = {
format: string;
base64: string;
durationMs?: number;
fps?: number;
screenIndex?: number;
};
function asRecord(value: unknown): Record<string, unknown> {
return typeof value === "object" && value !== null
? (value as Record<string, unknown>)
: {};
}
function asString(value: unknown): string | undefined {
return typeof value === "string" ? value : undefined;
}
export function parseScreenRecordPayload(value: unknown): ScreenRecordPayload {
const obj = asRecord(value);
const format = asString(obj.format);
const base64 = asString(obj.base64);
if (!format || !base64) {
throw new Error("invalid screen.record payload");
}
return {
format,
base64,
durationMs: typeof obj.durationMs === "number" ? obj.durationMs : undefined,
fps: typeof obj.fps === "number" ? obj.fps : undefined,
screenIndex:
typeof obj.screenIndex === "number" ? obj.screenIndex : undefined,
};
}
export function screenRecordTempPath(opts: {
ext: string;
tmpDir?: string;
id?: string;
}) {
const tmpDir = opts.tmpDir ?? os.tmpdir();
const id = opts.id ?? randomUUID();
const ext = opts.ext.startsWith(".") ? opts.ext : `.${opts.ext}`;
return path.join(tmpDir, `clawdis-screen-record-${id}${ext}`);
}
export async function writeScreenRecordToFile(
filePath: string,
base64: string,
) {
return writeBase64ToFile(filePath, base64);
}