From fd95ededaab568288038e47eb7db6e12af18034b Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 4 Jan 2026 16:23:46 +0100 Subject: [PATCH] refactor: streamline node invoke handling --- .../Sources/Location/LocationService.swift | 8 +- apps/ios/Sources/Model/NodeAppModel.swift | 590 ++++++++++-------- .../Sources/Screen/ScreenRecordService.swift | 348 +++++++---- apps/ios/Sources/Voice/TalkModeManager.swift | 19 +- .../NodeMode/MacNodeBridgePairingClient.swift | 2 - .../NodeMode/MacNodeBridgeSession.swift | 2 - .../NodeMode/MacNodeLocationService.swift | 8 +- .../Clawdbot/NodeMode/MacNodeRuntime.swift | 499 ++++++++------- 8 files changed, 810 insertions(+), 666 deletions(-) diff --git a/apps/ios/Sources/Location/LocationService.swift b/apps/ios/Sources/Location/LocationService.swift index a095dc02d..320a5ab38 100644 --- a/apps/ios/Sources/Location/LocationService.swift +++ b/apps/ios/Sources/Location/LocationService.swift @@ -67,7 +67,7 @@ final class LocationService: NSObject, CLLocationManagerDelegate { } self.manager.desiredAccuracy = Self.accuracyValue(desiredAccuracy) - let timeout = max(0, timeoutMs ?? 10_000) + let timeout = max(0, timeoutMs ?? 10000) return try await self.withTimeout(timeoutMs: timeout) { try await self.requestLocation() } @@ -109,11 +109,11 @@ final class LocationService: NSObject, CLLocationManagerDelegate { private static func accuracyValue(_ accuracy: ClawdbotLocationAccuracy) -> CLLocationAccuracy { switch accuracy { case .coarse: - return kCLLocationAccuracyKilometer + kCLLocationAccuracyKilometer case .balanced: - return kCLLocationAccuracyHundredMeters + kCLLocationAccuracyHundredMeters case .precise: - return kCLLocationAccuracyBest + kCLLocationAccuracyBest } } diff --git a/apps/ios/Sources/Model/NodeAppModel.swift b/apps/ios/Sources/Model/NodeAppModel.swift index 5c9bc36d6..92d173816 100644 --- a/apps/ios/Sources/Model/NodeAppModel.swift +++ b/apps/ios/Sources/Model/NodeAppModel.swift @@ -250,7 +250,9 @@ final class NodeAppModel { return BridgeInvokeResponse( id: req.id, ok: false, - error: ClawdbotNodeError(code: .unavailable, message: "UNAVAILABLE: node not ready")) + error: ClawdbotNodeError( + code: .unavailable, + message: "UNAVAILABLE: node not ready")) } return await self.handleInvoke(req) }) @@ -454,13 +456,10 @@ final class NodeAppModel { return false } - // swiftlint:disable:next function_body_length cyclomatic_complexity private func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse { let command = req.command - if command.hasPrefix("canvas.") || command.hasPrefix("camera.") || command.hasPrefix("screen."), - self.isBackgrounded - { + if self.isBackgrounded, self.isBackgroundRestricted(command) { return BridgeInvokeResponse( id: req.id, ok: false, @@ -481,275 +480,23 @@ final class NodeAppModel { do { switch command { case ClawdbotLocationCommand.get.rawValue: - let mode = self.locationMode() - guard mode != .off else { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "LOCATION_DISABLED: enable Location in Settings")) - } - if self.isBackgrounded, mode != .always { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .backgroundUnavailable, - message: "LOCATION_BACKGROUND_UNAVAILABLE: background location requires Always")) - } - let params = (try? Self.decodeParams(ClawdbotLocationGetParams.self, from: req.paramsJSON)) ?? - ClawdbotLocationGetParams() - let desired = params.desiredAccuracy ?? - (self.isLocationPreciseEnabled() ? .precise : .balanced) - let status = self.locationService.authorizationStatus() - if status != .authorizedAlways && status != .authorizedWhenInUse { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "LOCATION_PERMISSION_REQUIRED: grant Location permission")) - } - if self.isBackgrounded && status != .authorizedAlways { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "LOCATION_PERMISSION_REQUIRED: enable Always for background access")) - } - let location = try await self.locationService.currentLocation( - params: params, - desiredAccuracy: desired, - maxAgeMs: params.maxAgeMs, - timeoutMs: params.timeoutMs) - let isPrecise = self.locationService.accuracyAuthorization() == .fullAccuracy - let payload = ClawdbotLocationPayload( - lat: location.coordinate.latitude, - lon: location.coordinate.longitude, - accuracyMeters: location.horizontalAccuracy, - altitudeMeters: location.verticalAccuracy >= 0 ? location.altitude : nil, - speedMps: location.speed >= 0 ? location.speed : nil, - headingDeg: location.course >= 0 ? location.course : nil, - timestamp: ISO8601DateFormatter().string(from: location.timestamp), - isPrecise: isPrecise, - source: nil) - let json = try Self.encodePayload(payload) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) - - case ClawdbotCanvasCommand.present.rawValue: - let params = (try? Self.decodeParams(ClawdbotCanvasPresentParams.self, from: req.paramsJSON)) ?? - ClawdbotCanvasPresentParams() - let url = params.url?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - if url.isEmpty { - self.screen.showDefaultCanvas() - } else { - self.screen.navigate(to: url) - } - return BridgeInvokeResponse(id: req.id, ok: true) - - case ClawdbotCanvasCommand.hide.rawValue: - return BridgeInvokeResponse(id: req.id, ok: true) - - case ClawdbotCanvasCommand.navigate.rawValue: - let params = try Self.decodeParams(ClawdbotCanvasNavigateParams.self, from: req.paramsJSON) - self.screen.navigate(to: params.url) - return BridgeInvokeResponse(id: req.id, ok: true) - - case ClawdbotCanvasCommand.evalJS.rawValue: - let params = try Self.decodeParams(ClawdbotCanvasEvalParams.self, from: req.paramsJSON) - let result = try await self.screen.eval(javaScript: params.javaScript) - let payload = try Self.encodePayload(["result": result]) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - - case ClawdbotCanvasCommand.snapshot.rawValue: - let params = try? Self.decodeParams(ClawdbotCanvasSnapshotParams.self, from: req.paramsJSON) - let format = params?.format ?? .jpeg - let maxWidth: CGFloat? = { - if let raw = params?.maxWidth, raw > 0 { return CGFloat(raw) } - // Keep default snapshots comfortably below the gateway client's maxPayload. - // For full-res, clients should explicitly request a larger maxWidth. - return switch format { - case .png: 900 - case .jpeg: 1600 - } - }() - let base64 = try await self.screen.snapshotBase64( - maxWidth: maxWidth, - format: format, - quality: params?.quality) - let payload = try Self.encodePayload([ - "format": format == .jpeg ? "jpeg" : "png", - "base64": base64, - ]) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - - case ClawdbotCanvasA2UICommand.reset.rawValue: - guard let a2uiUrl = await self.resolveA2UIHostURL() else { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "A2UI_HOST_NOT_CONFIGURED: gateway did not advertise canvas host")) - } - self.screen.navigate(to: a2uiUrl) - if await !self.screen.waitForA2UIReady(timeoutMs: 5000) { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "A2UI_HOST_UNAVAILABLE: A2UI host not reachable")) - } - - let json = try await self.screen.eval(javaScript: """ - (() => { - if (!globalThis.clawdbotA2UI) return JSON.stringify({ ok: false, error: "missing clawdbotA2UI" }); - return JSON.stringify(globalThis.clawdbotA2UI.reset()); - })() - """) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) - - case ClawdbotCanvasA2UICommand.push.rawValue, ClawdbotCanvasA2UICommand.pushJSONL.rawValue: - let messages: [AnyCodable] - if command == ClawdbotCanvasA2UICommand.pushJSONL.rawValue { - let params = try Self.decodeParams(ClawdbotCanvasA2UIPushJSONLParams.self, from: req.paramsJSON) - messages = try ClawdbotCanvasA2UIJSONL.decodeMessagesFromJSONL(params.jsonl) - } else { - do { - let params = try Self.decodeParams(ClawdbotCanvasA2UIPushParams.self, from: req.paramsJSON) - messages = params.messages - } catch { - // Be forgiving: some clients still send JSONL payloads to `canvas.a2ui.push`. - let params = try Self.decodeParams(ClawdbotCanvasA2UIPushJSONLParams.self, from: req.paramsJSON) - messages = try ClawdbotCanvasA2UIJSONL.decodeMessagesFromJSONL(params.jsonl) - } - } - - guard let a2uiUrl = await self.resolveA2UIHostURL() else { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "A2UI_HOST_NOT_CONFIGURED: gateway did not advertise canvas host")) - } - self.screen.navigate(to: a2uiUrl) - if await !self.screen.waitForA2UIReady(timeoutMs: 5000) { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "A2UI_HOST_UNAVAILABLE: A2UI host not reachable")) - } - - let messagesJSON = try ClawdbotCanvasA2UIJSONL.encodeMessagesJSONArray(messages) - let js = """ - (() => { - try { - if (!globalThis.clawdbotA2UI) return JSON.stringify({ ok: false, error: "missing clawdbotA2UI" }); - const messages = \(messagesJSON); - return JSON.stringify(globalThis.clawdbotA2UI.applyMessages(messages)); - } catch (e) { - return JSON.stringify({ ok: false, error: String(e?.message ?? e) }); - } - })() - """ - let resultJSON = try await self.screen.eval(javaScript: js) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: resultJSON) - - case ClawdbotCameraCommand.list.rawValue: - let devices = await self.camera.listDevices() - struct Payload: Codable { - var devices: [CameraController.CameraDeviceInfo] - } - let payload = try Self.encodePayload(Payload(devices: devices)) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - - case ClawdbotCameraCommand.snap.rawValue: - self.showCameraHUD(text: "Taking photo…", kind: .photo) - self.triggerCameraFlash() - let params = (try? Self.decodeParams(ClawdbotCameraSnapParams.self, from: req.paramsJSON)) ?? - ClawdbotCameraSnapParams() - let res = try await self.camera.snap(params: params) - - struct Payload: Codable { - var format: String - var base64: String - var width: Int - var height: Int - } - let payload = try Self.encodePayload(Payload( - format: res.format, - base64: res.base64, - width: res.width, - height: res.height)) - self.showCameraHUD(text: "Photo captured", kind: .success, autoHideSeconds: 1.6) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - - case ClawdbotCameraCommand.clip.rawValue: - let params = (try? Self.decodeParams(ClawdbotCameraClipParams.self, from: req.paramsJSON)) ?? - ClawdbotCameraClipParams() - - let suspended = (params.includeAudio ?? true) ? self.voiceWake.suspendForExternalAudioCapture() : false - defer { self.voiceWake.resumeAfterExternalAudioCapture(wasSuspended: suspended) } - - self.showCameraHUD(text: "Recording…", kind: .recording) - let res = try await self.camera.clip(params: params) - - struct Payload: Codable { - var format: String - var base64: String - var durationMs: Int - var hasAudio: Bool - } - let payload = try Self.encodePayload(Payload( - format: res.format, - base64: res.base64, - durationMs: res.durationMs, - hasAudio: res.hasAudio)) - self.showCameraHUD(text: "Clip captured", kind: .success, autoHideSeconds: 1.8) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - + return try await self.handleLocationInvoke(req) + case ClawdbotCanvasCommand.present.rawValue, + ClawdbotCanvasCommand.hide.rawValue, + ClawdbotCanvasCommand.navigate.rawValue, + ClawdbotCanvasCommand.evalJS.rawValue, + ClawdbotCanvasCommand.snapshot.rawValue: + return try await self.handleCanvasInvoke(req) + case ClawdbotCanvasA2UICommand.reset.rawValue, + ClawdbotCanvasA2UICommand.push.rawValue, + ClawdbotCanvasA2UICommand.pushJSONL.rawValue: + return try await self.handleCanvasA2UIInvoke(req) + case ClawdbotCameraCommand.list.rawValue, + ClawdbotCameraCommand.snap.rawValue, + ClawdbotCameraCommand.clip.rawValue: + return try await self.handleCameraInvoke(req) case ClawdbotScreenCommand.record.rawValue: - let params = (try? Self.decodeParams(ClawdbotScreenRecordParams.self, from: req.paramsJSON)) ?? - ClawdbotScreenRecordParams() - if let format = params.format, format.lowercased() != "mp4" { - throw NSError(domain: "Screen", code: 30, userInfo: [ - NSLocalizedDescriptionKey: "INVALID_REQUEST: screen format must be mp4", - ]) - } - // Status pill mirrors screen recording state so it stays visible without overlay stacking. - self.screenRecordActive = true - defer { self.screenRecordActive = false } - let path = try await self.screenRecorder.record( - screenIndex: params.screenIndex, - durationMs: params.durationMs, - fps: params.fps, - includeAudio: params.includeAudio, - outPath: nil) - defer { try? FileManager.default.removeItem(atPath: path) } - let data = try Data(contentsOf: URL(fileURLWithPath: path)) - struct Payload: Codable { - var format: String - var base64: String - var durationMs: Int? - var fps: Double? - var screenIndex: Int? - var hasAudio: Bool - } - let payload = try Self.encodePayload(Payload( - format: "mp4", - base64: data.base64EncodedString(), - durationMs: params.durationMs, - fps: params.fps, - screenIndex: params.screenIndex, - hasAudio: params.includeAudio ?? true)) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - + return try await self.handleScreenRecordInvoke(req) default: return BridgeInvokeResponse( id: req.id, @@ -768,6 +515,303 @@ final class NodeAppModel { } } + private func isBackgroundRestricted(_ command: String) -> Bool { + command.hasPrefix("canvas.") || command.hasPrefix("camera.") || command.hasPrefix("screen.") + } + + private func handleLocationInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + let mode = self.locationMode() + guard mode != .off else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "LOCATION_DISABLED: enable Location in Settings")) + } + if self.isBackgrounded, mode != .always { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .backgroundUnavailable, + message: "LOCATION_BACKGROUND_UNAVAILABLE: background location requires Always")) + } + let params = (try? Self.decodeParams(ClawdbotLocationGetParams.self, from: req.paramsJSON)) ?? + ClawdbotLocationGetParams() + let desired = params.desiredAccuracy ?? + (self.isLocationPreciseEnabled() ? .precise : .balanced) + let status = self.locationService.authorizationStatus() + if status != .authorizedAlways, status != .authorizedWhenInUse { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "LOCATION_PERMISSION_REQUIRED: grant Location permission")) + } + if self.isBackgrounded, status != .authorizedAlways { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "LOCATION_PERMISSION_REQUIRED: enable Always for background access")) + } + let location = try await self.locationService.currentLocation( + params: params, + desiredAccuracy: desired, + maxAgeMs: params.maxAgeMs, + timeoutMs: params.timeoutMs) + let isPrecise = self.locationService.accuracyAuthorization() == .fullAccuracy + let payload = ClawdbotLocationPayload( + lat: location.coordinate.latitude, + lon: location.coordinate.longitude, + accuracyMeters: location.horizontalAccuracy, + altitudeMeters: location.verticalAccuracy >= 0 ? location.altitude : nil, + speedMps: location.speed >= 0 ? location.speed : nil, + headingDeg: location.course >= 0 ? location.course : nil, + timestamp: ISO8601DateFormatter().string(from: location.timestamp), + isPrecise: isPrecise, + source: nil) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + } + + private func handleCanvasInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + switch req.command { + case ClawdbotCanvasCommand.present.rawValue: + let params = (try? Self.decodeParams(ClawdbotCanvasPresentParams.self, from: req.paramsJSON)) ?? + ClawdbotCanvasPresentParams() + let url = params.url?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if url.isEmpty { + self.screen.showDefaultCanvas() + } else { + self.screen.navigate(to: url) + } + return BridgeInvokeResponse(id: req.id, ok: true) + case ClawdbotCanvasCommand.hide.rawValue: + return BridgeInvokeResponse(id: req.id, ok: true) + case ClawdbotCanvasCommand.navigate.rawValue: + let params = try Self.decodeParams(ClawdbotCanvasNavigateParams.self, from: req.paramsJSON) + self.screen.navigate(to: params.url) + return BridgeInvokeResponse(id: req.id, ok: true) + case ClawdbotCanvasCommand.evalJS.rawValue: + let params = try Self.decodeParams(ClawdbotCanvasEvalParams.self, from: req.paramsJSON) + let result = try await self.screen.eval(javaScript: params.javaScript) + let payload = try Self.encodePayload(["result": result]) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + case ClawdbotCanvasCommand.snapshot.rawValue: + let params = try? Self.decodeParams(ClawdbotCanvasSnapshotParams.self, from: req.paramsJSON) + let format = params?.format ?? .jpeg + let maxWidth: CGFloat? = { + if let raw = params?.maxWidth, raw > 0 { return CGFloat(raw) } + // Keep default snapshots comfortably below the gateway client's maxPayload. + // For full-res, clients should explicitly request a larger maxWidth. + return switch format { + case .png: 900 + case .jpeg: 1600 + } + }() + let base64 = try await self.screen.snapshotBase64( + maxWidth: maxWidth, + format: format, + quality: params?.quality) + let payload = try Self.encodePayload([ + "format": format == .jpeg ? "jpeg" : "png", + "base64": base64, + ]) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + default: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + } + } + + private func handleCanvasA2UIInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + let command = req.command + switch command { + case ClawdbotCanvasA2UICommand.reset.rawValue: + guard let a2uiUrl = await self.resolveA2UIHostURL() else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "A2UI_HOST_NOT_CONFIGURED: gateway did not advertise canvas host")) + } + self.screen.navigate(to: a2uiUrl) + if await !self.screen.waitForA2UIReady(timeoutMs: 5000) { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "A2UI_HOST_UNAVAILABLE: A2UI host not reachable")) + } + + let json = try await self.screen.eval(javaScript: """ + (() => { + if (!globalThis.clawdbotA2UI) return JSON.stringify({ ok: false, error: "missing clawdbotA2UI" }); + return JSON.stringify(globalThis.clawdbotA2UI.reset()); + })() + """) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + case ClawdbotCanvasA2UICommand.push.rawValue, ClawdbotCanvasA2UICommand.pushJSONL.rawValue: + let messages: [AnyCodable] + if command == ClawdbotCanvasA2UICommand.pushJSONL.rawValue { + let params = try Self.decodeParams(ClawdbotCanvasA2UIPushJSONLParams.self, from: req.paramsJSON) + messages = try ClawdbotCanvasA2UIJSONL.decodeMessagesFromJSONL(params.jsonl) + } else { + do { + let params = try Self.decodeParams(ClawdbotCanvasA2UIPushParams.self, from: req.paramsJSON) + messages = params.messages + } catch { + // Be forgiving: some clients still send JSONL payloads to `canvas.a2ui.push`. + let params = try Self.decodeParams(ClawdbotCanvasA2UIPushJSONLParams.self, from: req.paramsJSON) + messages = try ClawdbotCanvasA2UIJSONL.decodeMessagesFromJSONL(params.jsonl) + } + } + + guard let a2uiUrl = await self.resolveA2UIHostURL() else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "A2UI_HOST_NOT_CONFIGURED: gateway did not advertise canvas host")) + } + self.screen.navigate(to: a2uiUrl) + if await !self.screen.waitForA2UIReady(timeoutMs: 5000) { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "A2UI_HOST_UNAVAILABLE: A2UI host not reachable")) + } + + let messagesJSON = try ClawdbotCanvasA2UIJSONL.encodeMessagesJSONArray(messages) + let js = """ + (() => { + try { + if (!globalThis.clawdbotA2UI) return JSON.stringify({ ok: false, error: "missing clawdbotA2UI" }); + const messages = \(messagesJSON); + return JSON.stringify(globalThis.clawdbotA2UI.applyMessages(messages)); + } catch (e) { + return JSON.stringify({ ok: false, error: String(e?.message ?? e) }); + } + })() + """ + let resultJSON = try await self.screen.eval(javaScript: js) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: resultJSON) + default: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + } + } + + private func handleCameraInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + switch req.command { + case ClawdbotCameraCommand.list.rawValue: + let devices = await self.camera.listDevices() + struct Payload: Codable { + var devices: [CameraController.CameraDeviceInfo] + } + let payload = try Self.encodePayload(Payload(devices: devices)) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + case ClawdbotCameraCommand.snap.rawValue: + self.showCameraHUD(text: "Taking photo…", kind: .photo) + self.triggerCameraFlash() + let params = (try? Self.decodeParams(ClawdbotCameraSnapParams.self, from: req.paramsJSON)) ?? + ClawdbotCameraSnapParams() + let res = try await self.camera.snap(params: params) + + struct Payload: Codable { + var format: String + var base64: String + var width: Int + var height: Int + } + let payload = try Self.encodePayload(Payload( + format: res.format, + base64: res.base64, + width: res.width, + height: res.height)) + self.showCameraHUD(text: "Photo captured", kind: .success, autoHideSeconds: 1.6) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + case ClawdbotCameraCommand.clip.rawValue: + let params = (try? Self.decodeParams(ClawdbotCameraClipParams.self, from: req.paramsJSON)) ?? + ClawdbotCameraClipParams() + + let suspended = (params.includeAudio ?? true) ? self.voiceWake.suspendForExternalAudioCapture() : false + defer { self.voiceWake.resumeAfterExternalAudioCapture(wasSuspended: suspended) } + + self.showCameraHUD(text: "Recording…", kind: .recording) + let res = try await self.camera.clip(params: params) + + struct Payload: Codable { + var format: String + var base64: String + var durationMs: Int + var hasAudio: Bool + } + let payload = try Self.encodePayload(Payload( + format: res.format, + base64: res.base64, + durationMs: res.durationMs, + hasAudio: res.hasAudio)) + self.showCameraHUD(text: "Clip captured", kind: .success, autoHideSeconds: 1.8) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + default: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + } + } + + private func handleScreenRecordInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + let params = (try? Self.decodeParams(ClawdbotScreenRecordParams.self, from: req.paramsJSON)) ?? + ClawdbotScreenRecordParams() + if let format = params.format, format.lowercased() != "mp4" { + throw NSError(domain: "Screen", code: 30, userInfo: [ + NSLocalizedDescriptionKey: "INVALID_REQUEST: screen format must be mp4", + ]) + } + // Status pill mirrors screen recording state so it stays visible without overlay stacking. + self.screenRecordActive = true + defer { self.screenRecordActive = false } + let path = try await self.screenRecorder.record( + screenIndex: params.screenIndex, + durationMs: params.durationMs, + fps: params.fps, + includeAudio: params.includeAudio, + outPath: nil) + defer { try? FileManager.default.removeItem(atPath: path) } + let data = try Data(contentsOf: URL(fileURLWithPath: path)) + struct Payload: Codable { + var format: String + var base64: String + var durationMs: Int? + var fps: Double? + var screenIndex: Int? + var hasAudio: Bool + } + let payload = try Self.encodePayload(Payload( + format: "mp4", + base64: data.base64EncodedString(), + durationMs: params.durationMs, + fps: params.fps, + screenIndex: params.screenIndex, + hasAudio: params.includeAudio ?? true)) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + } + private func locationMode() -> ClawdbotLocationMode { let raw = UserDefaults.standard.string(forKey: "location.enabledMode") ?? "off" return ClawdbotLocationMode(rawValue: raw) ?? .off diff --git a/apps/ios/Sources/Screen/ScreenRecordService.swift b/apps/ios/Sources/Screen/ScreenRecordService.swift index 1b8bc38f7..d87835511 100644 --- a/apps/ios/Sources/Screen/ScreenRecordService.swift +++ b/apps/ios/Sources/Screen/ScreenRecordService.swift @@ -40,7 +40,6 @@ final class ScreenRecordService: @unchecked Sendable { } } - // swiftlint:disable:next cyclomatic_complexity func record( screenIndex: Int?, durationMs: Int?, @@ -48,165 +47,244 @@ final class ScreenRecordService: @unchecked Sendable { includeAudio: Bool?, outPath: String?) async throws -> String { + let config = try self.makeRecordConfig( + screenIndex: screenIndex, + durationMs: durationMs, + fps: fps, + includeAudio: includeAudio, + outPath: outPath) + + let state = CaptureState() + let recordQueue = DispatchQueue(label: "com.clawdis.screenrecord") + + try await self.startCapture(state: state, config: config, recordQueue: recordQueue) + try await Task.sleep(nanoseconds: UInt64(config.durationMs) * 1_000_000) + try await self.stopCapture() + try self.finalizeCapture(state: state) + try await self.finishWriting(state: state) + + return config.outURL.path + } + + private struct RecordConfig { + let durationMs: Int + let fpsValue: Double + let includeAudio: Bool + let outURL: URL + } + + private func makeRecordConfig( + screenIndex: Int?, + durationMs: Int?, + fps: Double?, + includeAudio: Bool?, + outPath: String?) throws -> RecordConfig + { + if let idx = screenIndex, idx != 0 { + throw ScreenRecordError.invalidScreenIndex(idx) + } + let durationMs = Self.clampDurationMs(durationMs) let fps = Self.clampFps(fps) let fpsInt = Int32(fps.rounded()) let fpsValue = Double(fpsInt) let includeAudio = includeAudio ?? true - if let idx = screenIndex, idx != 0 { - throw ScreenRecordError.invalidScreenIndex(idx) - } - - let outURL: URL = { - if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { - return URL(fileURLWithPath: outPath) - } - return FileManager.default.temporaryDirectory - .appendingPathComponent("clawdbot-screen-record-\(UUID().uuidString).mp4") - }() + let outURL = self.makeOutputURL(outPath: outPath) try? FileManager.default.removeItem(at: outURL) - let state = CaptureState() - let recordQueue = DispatchQueue(label: "com.clawdbot.screenrecord") + return RecordConfig( + durationMs: durationMs, + fpsValue: fpsValue, + includeAudio: includeAudio, + outURL: outURL) + } + private func makeOutputURL(outPath: String?) -> URL { + if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + return URL(fileURLWithPath: outPath) + } + return FileManager.default.temporaryDirectory + .appendingPathComponent("clawdbot-screen-record-\(UUID().uuidString).mp4") + } + + private func startCapture( + state: CaptureState, + config: RecordConfig, + recordQueue: DispatchQueue) async throws + { try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in - let handler: @Sendable (CMSampleBuffer, RPSampleBufferType, Error?) -> Void = { sample, type, error in - // ReplayKit can call the capture handler on a background queue. - // Serialize writes to avoid queue asserts. - recordQueue.async { - if let error { - state.withLock { state in - if state.handlerError == nil { state.handlerError = error } - } - return - } - guard CMSampleBufferDataIsReady(sample) else { return } - - switch type { - case .video: - let pts = CMSampleBufferGetPresentationTimeStamp(sample) - let shouldSkip = state.withLock { state in - if let lastVideoTime = state.lastVideoTime { - let delta = CMTimeSubtract(pts, lastVideoTime) - return delta.seconds < (1.0 / fpsValue) - } - return false - } - if shouldSkip { return } - - if state.withLock({ $0.writer == nil }) { - guard let imageBuffer = CMSampleBufferGetImageBuffer(sample) else { - state.withLock { state in - if state.handlerError == nil { - state.handlerError = ScreenRecordError.captureFailed("Missing image buffer") - } - } - return - } - let width = CVPixelBufferGetWidth(imageBuffer) - let height = CVPixelBufferGetHeight(imageBuffer) - do { - let w = try AVAssetWriter(outputURL: outURL, fileType: .mp4) - let settings: [String: Any] = [ - AVVideoCodecKey: AVVideoCodecType.h264, - AVVideoWidthKey: width, - AVVideoHeightKey: height, - ] - let vInput = AVAssetWriterInput(mediaType: .video, outputSettings: settings) - vInput.expectsMediaDataInRealTime = true - guard w.canAdd(vInput) else { - throw ScreenRecordError.writeFailed("Cannot add video input") - } - w.add(vInput) - - if includeAudio { - let aInput = AVAssetWriterInput(mediaType: .audio, outputSettings: nil) - aInput.expectsMediaDataInRealTime = true - if w.canAdd(aInput) { - w.add(aInput) - state.withLock { state in - state.audioInput = aInput - } - } - } - - guard w.startWriting() else { - throw ScreenRecordError - .writeFailed(w.error?.localizedDescription ?? "Failed to start writer") - } - w.startSession(atSourceTime: pts) - state.withLock { state in - state.writer = w - state.videoInput = vInput - state.started = true - } - } catch { - state.withLock { state in - if state.handlerError == nil { state.handlerError = error } - } - return - } - } - - let vInput = state.withLock { $0.videoInput } - let isStarted = state.withLock { $0.started } - guard let vInput, isStarted else { return } - if vInput.isReadyForMoreMediaData { - if vInput.append(sample) { - state.withLock { state in - state.sawVideo = true - state.lastVideoTime = pts - } - } else { - let err = state.withLock { $0.writer?.error } - if let err { - state.withLock { state in - if state.handlerError == nil { - state.handlerError = ScreenRecordError.writeFailed(err.localizedDescription) - } - } - } - } - } - - case .audioApp, .audioMic: - let aInput = state.withLock { $0.audioInput } - let isStarted = state.withLock { $0.started } - guard includeAudio, let aInput, isStarted else { return } - if aInput.isReadyForMoreMediaData { - _ = aInput.append(sample) - } - - @unknown default: - break - } - } - } - + let handler = self.makeCaptureHandler( + state: state, + config: config, + recordQueue: recordQueue) let completion: @Sendable (Error?) -> Void = { error in if let error { cont.resume(throwing: error) } else { cont.resume() } } Task { @MainActor in startReplayKitCapture( - includeAudio: includeAudio, + includeAudio: config.includeAudio, handler: handler, completion: completion) } } + } - try await Task.sleep(nanoseconds: UInt64(durationMs) * 1_000_000) + private func makeCaptureHandler( + state: CaptureState, + config: RecordConfig, + recordQueue: DispatchQueue) -> @Sendable (CMSampleBuffer, RPSampleBufferType, Error?) -> Void + { + { sample, type, error in + // ReplayKit can call the capture handler on a background queue. + // Serialize writes to avoid queue asserts. + recordQueue.async { + if let error { + state.withLock { state in + if state.handlerError == nil { state.handlerError = error } + } + return + } + guard CMSampleBufferDataIsReady(sample) else { return } + switch type { + case .video: + self.handleVideoSample(sample, state: state, config: config) + case .audioApp, .audioMic: + self.handleAudioSample(sample, state: state, includeAudio: config.includeAudio) + @unknown default: + break + } + } + } + } + + private func handleVideoSample( + _ sample: CMSampleBuffer, + state: CaptureState, + config: RecordConfig) + { + let pts = CMSampleBufferGetPresentationTimeStamp(sample) + let shouldSkip = state.withLock { state in + if let lastVideoTime = state.lastVideoTime { + let delta = CMTimeSubtract(pts, lastVideoTime) + return delta.seconds < (1.0 / config.fpsValue) + } + return false + } + if shouldSkip { return } + + if state.withLock({ $0.writer == nil }) { + self.prepareWriter(sample: sample, state: state, config: config, pts: pts) + } + + let vInput = state.withLock { $0.videoInput } + let isStarted = state.withLock { $0.started } + guard let vInput, isStarted else { return } + if vInput.isReadyForMoreMediaData { + if vInput.append(sample) { + state.withLock { state in + state.sawVideo = true + state.lastVideoTime = pts + } + } else { + let err = state.withLock { $0.writer?.error } + if let err { + state.withLock { state in + if state.handlerError == nil { + state.handlerError = ScreenRecordError.writeFailed(err.localizedDescription) + } + } + } + } + } + } + + private func prepareWriter( + sample: CMSampleBuffer, + state: CaptureState, + config: RecordConfig, + pts: CMTime) + { + guard let imageBuffer = CMSampleBufferGetImageBuffer(sample) else { + state.withLock { state in + if state.handlerError == nil { + state.handlerError = ScreenRecordError.captureFailed("Missing image buffer") + } + } + return + } + let width = CVPixelBufferGetWidth(imageBuffer) + let height = CVPixelBufferGetHeight(imageBuffer) + do { + let writer = try AVAssetWriter(outputURL: config.outURL, fileType: .mp4) + let settings: [String: Any] = [ + AVVideoCodecKey: AVVideoCodecType.h264, + AVVideoWidthKey: width, + AVVideoHeightKey: height, + ] + let vInput = AVAssetWriterInput(mediaType: .video, outputSettings: settings) + vInput.expectsMediaDataInRealTime = true + guard writer.canAdd(vInput) else { + throw ScreenRecordError.writeFailed("Cannot add video input") + } + writer.add(vInput) + + if config.includeAudio { + let aInput = AVAssetWriterInput(mediaType: .audio, outputSettings: nil) + aInput.expectsMediaDataInRealTime = true + if writer.canAdd(aInput) { + writer.add(aInput) + state.withLock { state in + state.audioInput = aInput + } + } + } + + guard writer.startWriting() else { + throw ScreenRecordError.writeFailed( + writer.error?.localizedDescription ?? "Failed to start writer") + } + writer.startSession(atSourceTime: pts) + state.withLock { state in + state.writer = writer + state.videoInput = vInput + state.started = true + } + } catch { + state.withLock { state in + if state.handlerError == nil { state.handlerError = error } + } + } + } + + private func handleAudioSample( + _ sample: CMSampleBuffer, + state: CaptureState, + includeAudio: Bool) + { + let aInput = state.withLock { $0.audioInput } + let isStarted = state.withLock { $0.started } + guard includeAudio, let aInput, isStarted else { return } + if aInput.isReadyForMoreMediaData { + _ = aInput.append(sample) + } + } + + private func stopCapture() async throws { let stopError = await withCheckedContinuation { cont in Task { @MainActor in stopReplayKitCapture { error in cont.resume(returning: error) } } } if let stopError { throw stopError } + } - let handlerErrorSnapshot = state.withLock { $0.handlerError } - if let handlerErrorSnapshot { throw handlerErrorSnapshot } + private func finalizeCapture(state: CaptureState) throws { + if let handlerErrorSnapshot = state.withLock({ $0.handlerError }) { + throw handlerErrorSnapshot + } let writerSnapshot = state.withLock { $0.writer } let videoInputSnapshot = state.withLock { $0.videoInput } let audioInputSnapshot = state.withLock { $0.audioInput } @@ -217,7 +295,13 @@ final class ScreenRecordService: @unchecked Sendable { videoInputSnapshot.markAsFinished() audioInputSnapshot?.markAsFinished() + _ = writerSnapshot + } + private func finishWriting(state: CaptureState) async throws { + guard let writerSnapshot = state.withLock({ $0.writer }) else { + throw ScreenRecordError.captureFailed("Missing writer") + } let writerBox = UncheckedSendableBox(value: writerSnapshot) try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in writerBox.value.finishWriting { @@ -231,8 +315,6 @@ final class ScreenRecordService: @unchecked Sendable { } } } - - return outURL.path } private nonisolated static func clampDurationMs(_ ms: Int?) -> Int { diff --git a/apps/ios/Sources/Voice/TalkModeManager.swift b/apps/ios/Sources/Voice/TalkModeManager.swift index f9f21f430..21ab74111 100644 --- a/apps/ios/Sources/Voice/TalkModeManager.swift +++ b/apps/ios/Sources/Voice/TalkModeManager.swift @@ -288,9 +288,9 @@ final class TalkModeManager: NSObject { self.chatSubscribedSessionKeys.insert(key) self.logger.info("chat.subscribe ok sessionKey=\(key, privacy: .public)") } catch { - self.logger - .warning( - "chat.subscribe failed sessionKey=\(key, privacy: .public) err=\(error.localizedDescription, privacy: .public)") + self.logger.warning( + "chat.subscribe failed sessionKey=\(key, privacy: .public) " + + "err=\(error.localizedDescription, privacy: .public)") } } @@ -340,7 +340,12 @@ final class TalkModeManager: NSObject { "idempotencyKey": UUID().uuidString, ] let data = try JSONSerialization.data(withJSONObject: payload) - let json = String(decoding: data, as: UTF8.self) + guard let json = String(bytes: data, encoding: .utf8) else { + throw NSError( + domain: "TalkModeManager", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "Failed to encode chat payload"]) + } let res = try await bridge.request(method: "chat.send", paramsJSON: json, timeoutSeconds: 30) let decoded = try JSONDecoder().decode(SendResponse.self, from: res) return decoded.runId @@ -523,9 +528,9 @@ final class TalkModeManager: NSObject { self.lastPlaybackWasPCM = false result = await self.mp3Player.play(stream: stream) } - self.logger - .info( - "elevenlabs stream finished=\(result.finished, privacy: .public) dur=\(Date().timeIntervalSince(started), privacy: .public)s") + self.logger.info( + "elevenlabs stream finished=\(result.finished, privacy: .public) " + + "dur=\(Date().timeIntervalSince(started), privacy: .public)s") if !result.finished, let interruptedAt = result.interruptedAt { self.lastInterruptedAtSeconds = interruptedAt } diff --git a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeBridgePairingClient.swift b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeBridgePairingClient.swift index 1fbaeb67e..2feae8482 100644 --- a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeBridgePairingClient.swift +++ b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeBridgePairingClient.swift @@ -192,6 +192,4 @@ actor MacNodeBridgePairingClient { } } } - - } diff --git a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeBridgeSession.swift b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeBridgeSession.swift index f2631faac..d9b7c5777 100644 --- a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeBridgeSession.swift +++ b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeBridgeSession.swift @@ -325,6 +325,4 @@ actor MacNodeBridgeSession { ]) }) } - - } diff --git a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeLocationService.swift b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeLocationService.swift index 1ac734697..2124ba720 100644 --- a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeLocationService.swift +++ b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeLocationService.swift @@ -47,7 +47,7 @@ final class MacNodeLocationService: NSObject, CLLocationManagerDelegate { } self.manager.desiredAccuracy = Self.accuracyValue(desiredAccuracy) - let timeout = max(0, timeoutMs ?? 10_000) + let timeout = max(0, timeoutMs ?? 10000) return try await self.withTimeout(timeoutMs: timeout) { try await self.requestLocation() } @@ -83,11 +83,11 @@ final class MacNodeLocationService: NSObject, CLLocationManagerDelegate { private static func accuracyValue(_ accuracy: ClawdbotLocationAccuracy) -> CLLocationAccuracy { switch accuracy { case .coarse: - return kCLLocationAccuracyKilometer + kCLLocationAccuracyKilometer case .balanced: - return kCLLocationAccuracyHundredMeters + kCLLocationAccuracyHundredMeters case .precise: - return kCLLocationAccuracyBest + kCLLocationAccuracyBest } } diff --git a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntime.swift b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntime.swift index e39aaf280..cf0e28372 100644 --- a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntime.swift +++ b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntime.swift @@ -8,10 +8,9 @@ actor MacNodeRuntime { @MainActor private let screenRecorder = ScreenRecordService() @MainActor private let locationService = MacNodeLocationService() - // swiftlint:disable:next function_body_length cyclomatic_complexity func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse { let command = req.command - if command.hasPrefix("canvas.") || command.hasPrefix("canvas.a2ui."), !Self.canvasEnabled() { + if self.isCanvasCommand(command), !Self.canvasEnabled() { return BridgeInvokeResponse( id: req.id, ok: false, @@ -21,251 +20,28 @@ actor MacNodeRuntime { } do { switch command { - case ClawdbotCanvasCommand.present.rawValue: - let params = (try? Self.decodeParams(ClawdbotCanvasPresentParams.self, from: req.paramsJSON)) ?? - ClawdbotCanvasPresentParams() - let urlTrimmed = params.url?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - let url = urlTrimmed.isEmpty ? nil : urlTrimmed - let placement = params.placement.map { - CanvasPlacement(x: $0.x, y: $0.y, width: $0.width, height: $0.height) - } - try await MainActor.run { - _ = try CanvasManager.shared.showDetailed( - sessionKey: "main", - target: url, - placement: placement) - } - return BridgeInvokeResponse(id: req.id, ok: true) - - case ClawdbotCanvasCommand.hide.rawValue: - await MainActor.run { - CanvasManager.shared.hide(sessionKey: "main") - } - return BridgeInvokeResponse(id: req.id, ok: true) - - case ClawdbotCanvasCommand.navigate.rawValue: - let params = try Self.decodeParams(ClawdbotCanvasNavigateParams.self, from: req.paramsJSON) - try await MainActor.run { - _ = try CanvasManager.shared.show(sessionKey: "main", path: params.url) - } - return BridgeInvokeResponse(id: req.id, ok: true) - - case ClawdbotCanvasCommand.evalJS.rawValue: - let params = try Self.decodeParams(ClawdbotCanvasEvalParams.self, from: req.paramsJSON) - let result = try await CanvasManager.shared.eval( - sessionKey: "main", - javaScript: params.javaScript) - let payload = try Self.encodePayload(["result": result] as [String: String]) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - - case ClawdbotCanvasCommand.snapshot.rawValue: - let params = try? Self.decodeParams(ClawdbotCanvasSnapshotParams.self, from: req.paramsJSON) - let format = params?.format ?? .jpeg - let maxWidth: Int? = { - if let raw = params?.maxWidth, raw > 0 { return raw } - return switch format { - case .png: 900 - case .jpeg: 1600 - } - }() - let quality = params?.quality ?? 0.9 - - let path = try await CanvasManager.shared.snapshot(sessionKey: "main", outPath: nil) - defer { try? FileManager.default.removeItem(atPath: path) } - let data = try Data(contentsOf: URL(fileURLWithPath: path)) - guard let image = NSImage(data: data) else { - return Self.errorResponse(req, code: .unavailable, message: "canvas snapshot decode failed") - } - let encoded = try Self.encodeCanvasSnapshot( - image: image, - format: format, - maxWidth: maxWidth, - quality: quality) - let payload = try Self.encodePayload([ - "format": format == .jpeg ? "jpeg" : "png", - "base64": encoded.base64EncodedString(), - ]) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - - case ClawdbotCanvasA2UICommand.reset.rawValue: - return try await self.handleA2UIReset(req) - - case ClawdbotCanvasA2UICommand.push.rawValue, ClawdbotCanvasA2UICommand.pushJSONL.rawValue: - return try await self.handleA2UIPush(req) - - case ClawdbotCameraCommand.snap.rawValue: - guard Self.cameraEnabled() else { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "CAMERA_DISABLED: enable Camera in Settings")) - } - let params = (try? Self.decodeParams(ClawdbotCameraSnapParams.self, from: req.paramsJSON)) ?? - ClawdbotCameraSnapParams() - let delayMs = min(10_000, max(0, params.delayMs ?? 2000)) - let res = try await self.cameraCapture.snap( - facing: CameraFacing(rawValue: params.facing?.rawValue ?? "") ?? .front, - maxWidth: params.maxWidth, - quality: params.quality, - deviceId: params.deviceId, - delayMs: delayMs) - struct SnapPayload: Encodable { - var format: String - var base64: String - var width: Int - var height: Int - } - let payload = try Self.encodePayload(SnapPayload( - format: (params.format ?? .jpg).rawValue, - base64: res.data.base64EncodedString(), - width: Int(res.size.width), - height: Int(res.size.height))) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - - case ClawdbotCameraCommand.clip.rawValue: - guard Self.cameraEnabled() else { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "CAMERA_DISABLED: enable Camera in Settings")) - } - let params = (try? Self.decodeParams(ClawdbotCameraClipParams.self, from: req.paramsJSON)) ?? - ClawdbotCameraClipParams() - let res = try await self.cameraCapture.clip( - facing: CameraFacing(rawValue: params.facing?.rawValue ?? "") ?? .front, - durationMs: params.durationMs, - includeAudio: params.includeAudio ?? true, - deviceId: params.deviceId, - outPath: nil) - defer { try? FileManager.default.removeItem(atPath: res.path) } - let data = try Data(contentsOf: URL(fileURLWithPath: res.path)) - struct ClipPayload: Encodable { - var format: String - var base64: String - var durationMs: Int - var hasAudio: Bool - } - let payload = try Self.encodePayload(ClipPayload( - format: (params.format ?? .mp4).rawValue, - base64: data.base64EncodedString(), - durationMs: res.durationMs, - hasAudio: res.hasAudio)) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - - case ClawdbotCameraCommand.list.rawValue: - guard Self.cameraEnabled() else { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "CAMERA_DISABLED: enable Camera in Settings")) - } - let devices = await self.cameraCapture.listDevices() - let payload = try Self.encodePayload(["devices": devices]) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - + case ClawdbotCanvasCommand.present.rawValue, + ClawdbotCanvasCommand.hide.rawValue, + ClawdbotCanvasCommand.navigate.rawValue, + ClawdbotCanvasCommand.evalJS.rawValue, + ClawdbotCanvasCommand.snapshot.rawValue: + return try await self.handleCanvasInvoke(req) + case ClawdbotCanvasA2UICommand.reset.rawValue, + ClawdbotCanvasA2UICommand.push.rawValue, + ClawdbotCanvasA2UICommand.pushJSONL.rawValue: + return try await self.handleA2UIInvoke(req) + case ClawdbotCameraCommand.snap.rawValue, + ClawdbotCameraCommand.clip.rawValue, + ClawdbotCameraCommand.list.rawValue: + return try await self.handleCameraInvoke(req) case ClawdbotLocationCommand.get.rawValue: - let mode = Self.locationMode() - guard mode != .off else { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "LOCATION_DISABLED: enable Location in Settings")) - } - let params = (try? Self.decodeParams(ClawdbotLocationGetParams.self, from: req.paramsJSON)) ?? - ClawdbotLocationGetParams() - let desired = params.desiredAccuracy ?? - (Self.locationPreciseEnabled() ? .precise : .balanced) - let status = await self.locationService.authorizationStatus() - if status != .authorizedAlways && status != .authorizedWhenInUse { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "LOCATION_PERMISSION_REQUIRED: grant Location permission")) - } - do { - let location = try await self.locationService.currentLocation( - desiredAccuracy: desired, - maxAgeMs: params.maxAgeMs, - timeoutMs: params.timeoutMs) - let isPrecise = await self.locationService.accuracyAuthorization() == .fullAccuracy - let payload = ClawdbotLocationPayload( - lat: location.coordinate.latitude, - lon: location.coordinate.longitude, - accuracyMeters: location.horizontalAccuracy, - altitudeMeters: location.verticalAccuracy >= 0 ? location.altitude : nil, - speedMps: location.speed >= 0 ? location.speed : nil, - headingDeg: location.course >= 0 ? location.course : nil, - timestamp: ISO8601DateFormatter().string(from: location.timestamp), - isPrecise: isPrecise, - source: nil) - let json = try Self.encodePayload(payload) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) - } catch MacNodeLocationService.Error.timeout { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "LOCATION_TIMEOUT: no fix in time")) - } catch { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: ClawdbotNodeError( - code: .unavailable, - message: "LOCATION_UNAVAILABLE: \(error.localizedDescription)")) - } - + return try await self.handleLocationInvoke(req) case MacNodeScreenCommand.record.rawValue: - let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ?? - MacNodeScreenRecordParams() - if let format = params.format?.lowercased(), !format.isEmpty, format != "mp4" { - return Self.errorResponse( - req, - code: .invalidRequest, - message: "INVALID_REQUEST: screen format must be mp4") - } - let res = try await self.screenRecorder.record( - screenIndex: params.screenIndex, - durationMs: params.durationMs, - fps: params.fps, - includeAudio: params.includeAudio, - outPath: nil) - defer { try? FileManager.default.removeItem(atPath: res.path) } - let data = try Data(contentsOf: URL(fileURLWithPath: res.path)) - struct ScreenPayload: Encodable { - var format: String - var base64: String - var durationMs: Int? - var fps: Double? - var screenIndex: Int? - var hasAudio: Bool - } - let payload = try Self.encodePayload(ScreenPayload( - format: "mp4", - base64: data.base64EncodedString(), - durationMs: params.durationMs, - fps: params.fps, - screenIndex: params.screenIndex, - hasAudio: res.hasAudio)) - return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) - + return try await self.handleScreenRecordInvoke(req) case ClawdbotSystemCommand.run.rawValue: return try await self.handleSystemRun(req) - case ClawdbotSystemCommand.notify.rawValue: return try await self.handleSystemNotify(req) - default: return Self.errorResponse(req, code: .invalidRequest, message: "INVALID_REQUEST: unknown command") } @@ -274,6 +50,247 @@ actor MacNodeRuntime { } } + private func isCanvasCommand(_ command: String) -> Bool { + command.hasPrefix("canvas.") || command.hasPrefix("canvas.a2ui.") + } + + private func handleCanvasInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + switch req.command { + case ClawdbotCanvasCommand.present.rawValue: + let params = (try? Self.decodeParams(ClawdbotCanvasPresentParams.self, from: req.paramsJSON)) ?? + ClawdbotCanvasPresentParams() + let urlTrimmed = params.url?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + let url = urlTrimmed.isEmpty ? nil : urlTrimmed + let placement = params.placement.map { + CanvasPlacement(x: $0.x, y: $0.y, width: $0.width, height: $0.height) + } + try await MainActor.run { + _ = try CanvasManager.shared.showDetailed( + sessionKey: "main", + target: url, + placement: placement) + } + return BridgeInvokeResponse(id: req.id, ok: true) + case ClawdbotCanvasCommand.hide.rawValue: + await MainActor.run { + CanvasManager.shared.hide(sessionKey: "main") + } + return BridgeInvokeResponse(id: req.id, ok: true) + case ClawdbotCanvasCommand.navigate.rawValue: + let params = try Self.decodeParams(ClawdbotCanvasNavigateParams.self, from: req.paramsJSON) + try await MainActor.run { + _ = try CanvasManager.shared.show(sessionKey: "main", path: params.url) + } + return BridgeInvokeResponse(id: req.id, ok: true) + case ClawdbotCanvasCommand.evalJS.rawValue: + let params = try Self.decodeParams(ClawdbotCanvasEvalParams.self, from: req.paramsJSON) + let result = try await CanvasManager.shared.eval( + sessionKey: "main", + javaScript: params.javaScript) + let payload = try Self.encodePayload(["result": result] as [String: String]) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + case ClawdbotCanvasCommand.snapshot.rawValue: + let params = try? Self.decodeParams(ClawdbotCanvasSnapshotParams.self, from: req.paramsJSON) + let format = params?.format ?? .jpeg + let maxWidth: Int? = { + if let raw = params?.maxWidth, raw > 0 { return raw } + return switch format { + case .png: 900 + case .jpeg: 1600 + } + }() + let quality = params?.quality ?? 0.9 + + let path = try await CanvasManager.shared.snapshot(sessionKey: "main", outPath: nil) + defer { try? FileManager.default.removeItem(atPath: path) } + let data = try Data(contentsOf: URL(fileURLWithPath: path)) + guard let image = NSImage(data: data) else { + return Self.errorResponse(req, code: .unavailable, message: "canvas snapshot decode failed") + } + let encoded = try Self.encodeCanvasSnapshot( + image: image, + format: format, + maxWidth: maxWidth, + quality: quality) + let payload = try Self.encodePayload([ + "format": format == .jpeg ? "jpeg" : "png", + "base64": encoded.base64EncodedString(), + ]) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + default: + return Self.errorResponse(req, code: .invalidRequest, message: "INVALID_REQUEST: unknown command") + } + } + + private func handleA2UIInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + switch req.command { + case ClawdbotCanvasA2UICommand.reset.rawValue: + try await self.handleA2UIReset(req) + case ClawdbotCanvasA2UICommand.push.rawValue, + ClawdbotCanvasA2UICommand.pushJSONL.rawValue: + try await self.handleA2UIPush(req) + default: + Self.errorResponse(req, code: .invalidRequest, message: "INVALID_REQUEST: unknown command") + } + } + + private func handleCameraInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + guard Self.cameraEnabled() else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "CAMERA_DISABLED: enable Camera in Settings")) + } + switch req.command { + case ClawdbotCameraCommand.snap.rawValue: + let params = (try? Self.decodeParams(ClawdbotCameraSnapParams.self, from: req.paramsJSON)) ?? + ClawdbotCameraSnapParams() + let delayMs = min(10000, max(0, params.delayMs ?? 2000)) + let res = try await self.cameraCapture.snap( + facing: CameraFacing(rawValue: params.facing?.rawValue ?? "") ?? .front, + maxWidth: params.maxWidth, + quality: params.quality, + deviceId: params.deviceId, + delayMs: delayMs) + struct SnapPayload: Encodable { + var format: String + var base64: String + var width: Int + var height: Int + } + let payload = try Self.encodePayload(SnapPayload( + format: (params.format ?? .jpg).rawValue, + base64: res.data.base64EncodedString(), + width: Int(res.size.width), + height: Int(res.size.height))) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + case ClawdbotCameraCommand.clip.rawValue: + let params = (try? Self.decodeParams(ClawdbotCameraClipParams.self, from: req.paramsJSON)) ?? + ClawdbotCameraClipParams() + let res = try await self.cameraCapture.clip( + facing: CameraFacing(rawValue: params.facing?.rawValue ?? "") ?? .front, + durationMs: params.durationMs, + includeAudio: params.includeAudio ?? true, + deviceId: params.deviceId, + outPath: nil) + defer { try? FileManager.default.removeItem(atPath: res.path) } + let data = try Data(contentsOf: URL(fileURLWithPath: res.path)) + struct ClipPayload: Encodable { + var format: String + var base64: String + var durationMs: Int + var hasAudio: Bool + } + let payload = try Self.encodePayload(ClipPayload( + format: (params.format ?? .mp4).rawValue, + base64: data.base64EncodedString(), + durationMs: res.durationMs, + hasAudio: res.hasAudio)) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + case ClawdbotCameraCommand.list.rawValue: + let devices = await self.cameraCapture.listDevices() + let payload = try Self.encodePayload(["devices": devices]) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + default: + return Self.errorResponse(req, code: .invalidRequest, message: "INVALID_REQUEST: unknown command") + } + } + + private func handleLocationInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + let mode = Self.locationMode() + guard mode != .off else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "LOCATION_DISABLED: enable Location in Settings")) + } + let params = (try? Self.decodeParams(ClawdbotLocationGetParams.self, from: req.paramsJSON)) ?? + ClawdbotLocationGetParams() + let desired = params.desiredAccuracy ?? + (Self.locationPreciseEnabled() ? .precise : .balanced) + let status = await self.locationService.authorizationStatus() + if status != .authorizedAlways { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "LOCATION_PERMISSION_REQUIRED: grant Location permission")) + } + do { + let location = try await self.locationService.currentLocation( + desiredAccuracy: desired, + maxAgeMs: params.maxAgeMs, + timeoutMs: params.timeoutMs) + let isPrecise = await self.locationService.accuracyAuthorization() == .fullAccuracy + let payload = ClawdbotLocationPayload( + lat: location.coordinate.latitude, + lon: location.coordinate.longitude, + accuracyMeters: location.horizontalAccuracy, + altitudeMeters: location.verticalAccuracy >= 0 ? location.altitude : nil, + speedMps: location.speed >= 0 ? location.speed : nil, + headingDeg: location.course >= 0 ? location.course : nil, + timestamp: ISO8601DateFormatter().string(from: location.timestamp), + isPrecise: isPrecise, + source: nil) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + } catch MacNodeLocationService.Error.timeout { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "LOCATION_TIMEOUT: no fix in time")) + } catch { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdbotNodeError( + code: .unavailable, + message: "LOCATION_UNAVAILABLE: \(error.localizedDescription)")) + } + } + + private func handleScreenRecordInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ?? + MacNodeScreenRecordParams() + if let format = params.format?.lowercased(), !format.isEmpty, format != "mp4" { + return Self.errorResponse( + req, + code: .invalidRequest, + message: "INVALID_REQUEST: screen format must be mp4") + } + let res = try await self.screenRecorder.record( + screenIndex: params.screenIndex, + durationMs: params.durationMs, + fps: params.fps, + includeAudio: params.includeAudio, + outPath: nil) + defer { try? FileManager.default.removeItem(atPath: res.path) } + let data = try Data(contentsOf: URL(fileURLWithPath: res.path)) + struct ScreenPayload: Encodable { + var format: String + var base64: String + var durationMs: Int? + var fps: Double? + var screenIndex: Int? + var hasAudio: Bool + } + let payload = try Self.encodePayload(ScreenPayload( + format: "mp4", + base64: data.base64EncodedString(), + durationMs: params.durationMs, + fps: params.fps, + screenIndex: params.screenIndex, + hasAudio: res.hasAudio)) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + } + private func handleA2UIReset(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { try await self.ensureA2UIHost()