diff --git a/apps/ios/Sources/Bridge/BridgeConnectionController.swift b/apps/ios/Sources/Bridge/BridgeConnectionController.swift index 6361093de..390ae41fa 100644 --- a/apps/ios/Sources/Bridge/BridgeConnectionController.swift +++ b/apps/ios/Sources/Bridge/BridgeConnectionController.swift @@ -13,6 +13,7 @@ final class BridgeConnectionController: ObservableObject { private weak var appModel: NodeAppModel? private var cancellables = Set() private var didAutoConnect = false + private var seenStableIDs = Set() init(appModel: NodeAppModel) { self.appModel = appModel @@ -23,6 +24,7 @@ final class BridgeConnectionController: ObservableObject { .sink { [weak self] newValue in guard let self else { return } self.bridges = newValue + self.updateLastDiscoveredBridge(from: newValue) self.maybeAutoConnect() } .store(in: &self.cancellables) @@ -50,9 +52,9 @@ final class BridgeConnectionController: ObservableObject { guard appModel.bridgeServerName == nil else { return } let defaults = UserDefaults.standard - let preferredStableID = defaults.string(forKey: "bridge.preferredStableID")? + let targetStableID = defaults.string(forKey: "bridge.lastDiscoveredStableID")? .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - guard !preferredStableID.isEmpty else { return } + guard !targetStableID.isEmpty else { return } let instanceId = defaults.string(forKey: "node.instanceId")? .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" @@ -64,12 +66,20 @@ final class BridgeConnectionController: ObservableObject { .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" guard !token.isEmpty else { return } - guard let target = self.bridges.first(where: { $0.stableID == preferredStableID }) else { return } + guard let target = self.bridges.first(where: { $0.stableID == targetStableID }) else { return } self.didAutoConnect = true appModel.connectToBridge(endpoint: target.endpoint, hello: self.makeHello(token: token)) } + private func updateLastDiscoveredBridge(from bridges: [BridgeDiscoveryModel.DiscoveredBridge]) { + let newlyDiscovered = bridges.filter { self.seenStableIDs.insert($0.stableID).inserted } + guard let last = newlyDiscovered.last else { return } + + UserDefaults.standard.set(last.stableID, forKey: "bridge.lastDiscoveredStableID") + BridgeSettingsStore.saveLastDiscoveredBridgeStableID(last.stableID) + } + private func makeHello(token: String) -> BridgeHello { let defaults = UserDefaults.standard let nodeId = defaults.string(forKey: "node.instanceId") ?? "ios-node" diff --git a/apps/ios/Sources/Bridge/BridgeSettingsStore.swift b/apps/ios/Sources/Bridge/BridgeSettingsStore.swift index 653d56280..f73a02637 100644 --- a/apps/ios/Sources/Bridge/BridgeSettingsStore.swift +++ b/apps/ios/Sources/Bridge/BridgeSettingsStore.swift @@ -6,13 +6,16 @@ enum BridgeSettingsStore { private static let instanceIdDefaultsKey = "node.instanceId" private static let preferredBridgeStableIDDefaultsKey = "bridge.preferredStableID" + private static let lastDiscoveredBridgeStableIDDefaultsKey = "bridge.lastDiscoveredStableID" private static let instanceIdAccount = "instanceId" private static let preferredBridgeStableIDAccount = "preferredStableID" + private static let lastDiscoveredBridgeStableIDAccount = "lastDiscoveredStableID" static func bootstrapPersistence() { self.ensureStableInstanceID() self.ensurePreferredBridgeStableID() + self.ensureLastDiscoveredBridgeStableID() } static func loadStableInstanceID() -> String? { @@ -36,6 +39,18 @@ enum BridgeSettingsStore { account: self.preferredBridgeStableIDAccount) } + static func loadLastDiscoveredBridgeStableID() -> String? { + KeychainStore.loadString(service: self.bridgeService, account: self.lastDiscoveredBridgeStableIDAccount)? + .trimmingCharacters(in: .whitespacesAndNewlines) + } + + static func saveLastDiscoveredBridgeStableID(_ stableID: String) { + _ = KeychainStore.saveString( + stableID, + service: self.bridgeService, + account: self.lastDiscoveredBridgeStableIDAccount) + } + private static func ensureStableInstanceID() { let defaults = UserDefaults.standard @@ -76,4 +91,22 @@ enum BridgeSettingsStore { defaults.set(stored, forKey: self.preferredBridgeStableIDDefaultsKey) } } + + private static func ensureLastDiscoveredBridgeStableID() { + let defaults = UserDefaults.standard + + if let existing = defaults.string(forKey: self.lastDiscoveredBridgeStableIDDefaultsKey)? + .trimmingCharacters(in: .whitespacesAndNewlines), + !existing.isEmpty + { + if self.loadLastDiscoveredBridgeStableID() == nil { + self.saveLastDiscoveredBridgeStableID(existing) + } + return + } + + if let stored = self.loadLastDiscoveredBridgeStableID(), !stored.isEmpty { + defaults.set(stored, forKey: self.lastDiscoveredBridgeStableIDDefaultsKey) + } + } } diff --git a/apps/ios/Sources/Camera/CameraController.swift b/apps/ios/Sources/Camera/CameraController.swift new file mode 100644 index 000000000..5579307b9 --- /dev/null +++ b/apps/ios/Sources/Camera/CameraController.swift @@ -0,0 +1,319 @@ +import AVFoundation +import ClawdisKit +import Foundation +import UIKit + +actor CameraController { + enum CameraError: LocalizedError, Sendable { + case cameraUnavailable + case microphoneUnavailable + case permissionDenied(kind: String) + case invalidParams(String) + case captureFailed(String) + case exportFailed(String) + + var errorDescription: String? { + switch self { + case .cameraUnavailable: + "Camera unavailable" + case .microphoneUnavailable: + "Microphone unavailable" + case let .permissionDenied(kind): + "\(kind) permission denied" + case let .invalidParams(msg): + msg + case let .captureFailed(msg): + msg + case let .exportFailed(msg): + msg + } + } + } + + func snap(params: ClawdisCameraSnapParams) async throws -> ( + format: String, + base64: String, + width: Int, + height: Int) + { + let facing = params.facing ?? .front + let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil } + let quality = Self.clampQuality(params.quality) + + try await self.ensureAccess(for: .video) + + let session = AVCaptureSession() + session.sessionPreset = .photo + + guard let device = Self.pickCamera(facing: facing) else { + throw CameraError.cameraUnavailable + } + + let input = try AVCaptureDeviceInput(device: device) + guard session.canAddInput(input) else { + throw CameraError.captureFailed("Failed to add camera input") + } + session.addInput(input) + + let output = AVCapturePhotoOutput() + guard session.canAddOutput(output) else { + throw CameraError.captureFailed("Failed to add photo output") + } + session.addOutput(output) + output.maxPhotoQualityPrioritization = .quality + + session.startRunning() + defer { session.stopRunning() } + + let settings: AVCapturePhotoSettings = { + if output.availablePhotoCodecTypes.contains(.jpeg) { + return AVCapturePhotoSettings(format: [AVVideoCodecKey: AVVideoCodecType.jpeg]) + } + return AVCapturePhotoSettings() + }() + settings.photoQualityPrioritization = .quality + + let rawData: Data = try await withCheckedThrowingContinuation { cont in + output.capturePhoto(with: settings, delegate: PhotoCaptureDelegate(cont)) + } + + let (finalData, size) = try Self.reencodeJPEG( + imageData: rawData, + maxWidth: maxWidth, + quality: quality) + + return ( + format: "jpg", + base64: finalData.base64EncodedString(), + width: Int(size.width.rounded()), + height: Int(size.height.rounded())) + } + + func clip(params: ClawdisCameraClipParams) async throws -> ( + format: String, + base64: String, + durationMs: Int, + hasAudio: Bool) + { + let facing = params.facing ?? .front + let durationMs = Self.clampDurationMs(params.durationMs) + let includeAudio = params.includeAudio ?? true + + try await self.ensureAccess(for: .video) + if includeAudio { + try await self.ensureAccess(for: .audio) + } + + let session = AVCaptureSession() + session.sessionPreset = .high + + guard let camera = Self.pickCamera(facing: facing) else { + throw CameraError.cameraUnavailable + } + let cameraInput = try AVCaptureDeviceInput(device: camera) + guard session.canAddInput(cameraInput) else { + throw CameraError.captureFailed("Failed to add camera input") + } + session.addInput(cameraInput) + + if includeAudio { + guard let mic = AVCaptureDevice.default(for: .audio) else { + throw CameraError.microphoneUnavailable + } + let micInput = try AVCaptureDeviceInput(device: mic) + if session.canAddInput(micInput) { + session.addInput(micInput) + } else { + throw CameraError.captureFailed("Failed to add microphone input") + } + } + + let output = AVCaptureMovieFileOutput() + guard session.canAddOutput(output) else { + throw CameraError.captureFailed("Failed to add movie output") + } + session.addOutput(output) + output.maxRecordedDuration = CMTime(value: Int64(durationMs), timescale: 1000) + + session.startRunning() + defer { session.stopRunning() } + + let movURL = FileManager.default.temporaryDirectory + .appendingPathComponent("clawdis-camera-\(UUID().uuidString).mov") + let mp4URL = FileManager.default.temporaryDirectory + .appendingPathComponent("clawdis-camera-\(UUID().uuidString).mp4") + + defer { + try? FileManager.default.removeItem(at: movURL) + try? FileManager.default.removeItem(at: mp4URL) + } + + let recordedURL: URL = try await withCheckedThrowingContinuation { cont in + let delegate = MovieFileDelegate(cont) + output.startRecording(to: movURL, recordingDelegate: delegate) + } + + // Transcode .mov -> .mp4 for easier downstream handling. + try await Self.exportToMP4(inputURL: recordedURL, outputURL: mp4URL) + + let data = try Data(contentsOf: mp4URL) + return (format: "mp4", base64: data.base64EncodedString(), durationMs: durationMs, hasAudio: includeAudio) + } + + private func ensureAccess(for mediaType: AVMediaType) async throws { + let status = AVCaptureDevice.authorizationStatus(for: mediaType) + switch status { + case .authorized: + return + case .notDetermined: + let ok = await withCheckedContinuation(isolation: nil) { cont in + AVCaptureDevice.requestAccess(for: mediaType) { granted in + cont.resume(returning: granted) + } + } + if !ok { + throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone") + } + case .denied, .restricted: + throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone") + @unknown default: + throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone") + } + } + + private nonisolated static func pickCamera(facing: ClawdisCameraFacing) -> AVCaptureDevice? { + let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back + return AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) + } + + private nonisolated static func clampQuality(_ quality: Double?) -> Double { + let q = quality ?? 0.9 + return min(1.0, max(0.05, q)) + } + + private nonisolated static func clampDurationMs(_ ms: Int?) -> Int { + let v = ms ?? 3000 + // Keep clips short by default; avoid huge base64 payloads on the bridge. + return min(15000, max(250, v)) + } + + private nonisolated static func reencodeJPEG( + imageData: Data, + maxWidth: Int?, + quality: Double) throws -> (data: Data, size: CGSize) + { + guard let image = UIImage(data: imageData) else { + throw CameraError.captureFailed("Failed to decode captured image") + } + + let finalImage: UIImage = if let maxWidth, maxWidth > 0 { + Self.downscale(image: image, maxWidth: CGFloat(maxWidth)) + } else { + image + } + + guard let out = finalImage.jpegData(compressionQuality: quality) else { + throw CameraError.captureFailed("Failed to encode JPEG") + } + + return (out, finalImage.size) + } + + private nonisolated static func downscale(image: UIImage, maxWidth: CGFloat) -> UIImage { + let w = image.size.width + let h = image.size.height + guard w > 0, h > 0 else { return image } + guard w > maxWidth else { return image } + + let scale = maxWidth / w + let target = CGSize(width: maxWidth, height: max(1, h * scale)) + + let format = UIGraphicsImageRendererFormat.default() + format.opaque = false + let renderer = UIGraphicsImageRenderer(size: target, format: format) + return renderer.image { _ in + image.draw(in: CGRect(origin: .zero, size: target)) + } + } + + private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws { + let asset = AVAsset(url: inputURL) + guard let exporter = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetHighestQuality) else { + throw CameraError.exportFailed("Failed to create export session") + } + exporter.outputURL = outputURL + exporter.outputFileType = .mp4 + exporter.shouldOptimizeForNetworkUse = true + + try await withCheckedThrowingContinuation(isolation: nil) { cont in + exporter.exportAsynchronously { + switch exporter.status { + case .completed: + cont.resume(returning: ()) + case .failed: + cont.resume(throwing: exporter.error ?? CameraError.exportFailed("Export failed")) + case .cancelled: + cont.resume(throwing: CameraError.exportFailed("Export cancelled")) + default: + cont.resume(throwing: CameraError.exportFailed("Export did not complete")) + } + } + } + } +} + +private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate { + private let continuation: CheckedContinuation + private var didResume = false + + init(_ continuation: CheckedContinuation) { + self.continuation = continuation + } + + func photoOutput( + _ output: AVCapturePhotoOutput, + didFinishProcessingPhoto photo: AVCapturePhoto, + error: Error?) + { + guard !self.didResume else { return } + self.didResume = true + + if let error { + self.continuation.resume(throwing: error) + return + } + guard let data = photo.fileDataRepresentation() else { + self.continuation.resume( + throwing: NSError(domain: "Camera", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "photo data missing", + ])) + return + } + self.continuation.resume(returning: data) + } +} + +private final class MovieFileDelegate: NSObject, AVCaptureFileOutputRecordingDelegate { + private let continuation: CheckedContinuation + private var didResume = false + + init(_ continuation: CheckedContinuation) { + self.continuation = continuation + } + + func fileOutput( + _ output: AVCaptureFileOutput, + didFinishRecordingTo outputFileURL: URL, + from connections: [AVCaptureConnection], + error: Error?) + { + guard !self.didResume else { return } + self.didResume = true + + if let error { + self.continuation.resume(throwing: error) + return + } + self.continuation.resume(returning: outputFileURL) + } +} diff --git a/apps/ios/Sources/Info.plist b/apps/ios/Sources/Info.plist index 78f4b34aa..6ed6968b2 100644 --- a/apps/ios/Sources/Info.plist +++ b/apps/ios/Sources/Info.plist @@ -26,6 +26,8 @@ NSLocalNetworkUsageDescription Clawdis discovers and connects to your Clawdis bridge on the local network. + NSCameraUsageDescription + Clawdis can capture photos or short video clips when requested via the bridge. NSMicrophoneUsageDescription Clawdis needs microphone access for voice wake. NSSpeechRecognitionUsageDescription diff --git a/apps/ios/Sources/Model/NodeAppModel.swift b/apps/ios/Sources/Model/NodeAppModel.swift index de20a117b..c581c6b30 100644 --- a/apps/ios/Sources/Model/NodeAppModel.swift +++ b/apps/ios/Sources/Model/NodeAppModel.swift @@ -6,6 +6,7 @@ import SwiftUI final class NodeAppModel: ObservableObject { @Published var isBackgrounded: Bool = false let screen = ScreenController() + let camera = CameraController() @Published var bridgeStatusText: String = "Not connected" @Published var bridgeServerName: String? @Published var bridgeRemoteAddress: String? @@ -182,13 +183,22 @@ final class NodeAppModel: ObservableObject { } private func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse { - if req.command.hasPrefix("screen."), self.isBackgrounded { + if req.command.hasPrefix("screen.") || req.command.hasPrefix("camera."), self.isBackgrounded { return BridgeInvokeResponse( id: req.id, ok: false, error: ClawdisNodeError( code: .backgroundUnavailable, - message: "NODE_BACKGROUND_UNAVAILABLE: screen commands require foreground")) + message: "NODE_BACKGROUND_UNAVAILABLE: screen/camera commands require foreground")) + } + + if req.command.hasPrefix("camera."), !self.isCameraEnabled() { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdisNodeError( + code: .unavailable, + message: "CAMERA_DISABLED: enable Camera in iOS Settings → Camera → Allow Camera")) } do { @@ -222,6 +232,46 @@ final class NodeAppModel: ObservableObject { let payload = try Self.encodePayload(["format": "png", "base64": base64]) return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + case ClawdisCameraCommand.snap.rawValue: + let params = (try? Self.decodeParams(ClawdisCameraSnapParams.self, from: req.paramsJSON)) ?? + ClawdisCameraSnapParams() + let res = try await self.camera.snap(params: params) + + struct Payload: Codable { + var format: String + var base64: String + var width: Int + var height: Int + } + let payload = try Self.encodePayload(Payload( + format: res.format, + base64: res.base64, + width: res.width, + height: res.height)) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + + case ClawdisCameraCommand.clip.rawValue: + let params = (try? Self.decodeParams(ClawdisCameraClipParams.self, from: req.paramsJSON)) ?? + ClawdisCameraClipParams() + + let suspended = (params.includeAudio ?? true) ? self.voiceWake.suspendForExternalAudioCapture() : false + defer { self.voiceWake.resumeAfterExternalAudioCapture(wasSuspended: suspended) } + + let res = try await self.camera.clip(params: params) + + struct Payload: Codable { + var format: String + var base64: String + var durationMs: Int + var hasAudio: Bool + } + let payload = try Self.encodePayload(Payload( + format: res.format, + base64: res.base64, + durationMs: res.durationMs, + hasAudio: res.hasAudio)) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + default: return BridgeInvokeResponse( id: req.id, @@ -254,4 +304,10 @@ final class NodeAppModel: ObservableObject { } return json } + + private func isCameraEnabled() -> Bool { + // Default-on: if the key doesn't exist yet, treat it as enabled. + if UserDefaults.standard.object(forKey: "camera.enabled") == nil { return true } + return UserDefaults.standard.bool(forKey: "camera.enabled") + } } diff --git a/apps/ios/Sources/RootTabs.swift b/apps/ios/Sources/RootTabs.swift index c87392974..3b450647b 100644 --- a/apps/ios/Sources/RootTabs.swift +++ b/apps/ios/Sources/RootTabs.swift @@ -2,6 +2,7 @@ import SwiftUI struct RootTabs: View { @EnvironmentObject private var appModel: NodeAppModel + @State private var isConnectingPulse: Bool = false var body: some View { TabView { @@ -27,12 +28,18 @@ struct RootTabs: View { radius: self.settingsIndicatorGlowRadius, x: 0, y: 0) + .scaleEffect(self.settingsIndicatorScale) + .opacity(self.settingsIndicatorOpacity) .offset(x: 7, y: -2) } Text("Settings") } } } + .onAppear { self.updateConnectingPulse(for: self.bridgeIndicatorState) } + .onChange(of: self.bridgeIndicatorState) { _, newValue in + self.updateConnectingPulse(for: newValue) + } } private enum BridgeIndicatorState { @@ -74,9 +81,31 @@ struct RootTabs: View { case .connected: 6 case .connecting: - 4 + self.isConnectingPulse ? 6 : 3 case .disconnected: 0 } } + + private var settingsIndicatorScale: CGFloat { + guard self.bridgeIndicatorState == .connecting else { return 1 } + return self.isConnectingPulse ? 1.12 : 0.96 + } + + private var settingsIndicatorOpacity: Double { + guard self.bridgeIndicatorState == .connecting else { return 1 } + return self.isConnectingPulse ? 1.0 : 0.75 + } + + private func updateConnectingPulse(for state: BridgeIndicatorState) { + guard state == .connecting else { + withAnimation(.easeOut(duration: 0.2)) { self.isConnectingPulse = false } + return + } + + guard !self.isConnectingPulse else { return } + withAnimation(.easeInOut(duration: 0.9).repeatForever(autoreverses: true)) { + self.isConnectingPulse = true + } + } } diff --git a/apps/ios/Sources/Settings/SettingsTab.swift b/apps/ios/Sources/Settings/SettingsTab.swift index d5304d698..de01a2548 100644 --- a/apps/ios/Sources/Settings/SettingsTab.swift +++ b/apps/ios/Sources/Settings/SettingsTab.swift @@ -19,6 +19,7 @@ struct SettingsTab: View { @AppStorage("voiceWake.enabled") private var voiceWakeEnabled: Bool = false @AppStorage("camera.enabled") private var cameraEnabled: Bool = true @AppStorage("bridge.preferredStableID") private var preferredBridgeStableID: String = "" + @AppStorage("bridge.lastDiscoveredStableID") private var lastDiscoveredBridgeStableID: String = "" @StateObject private var connectStatus = ConnectStatusStore() @State private var connectingBridgeID: String? @State private var localIPAddress: String? @@ -207,6 +208,8 @@ struct SettingsTab: View { self.connectingBridgeID = bridge.id self.preferredBridgeStableID = bridge.stableID BridgeSettingsStore.savePreferredBridgeStableID(bridge.stableID) + self.lastDiscoveredBridgeStableID = bridge.stableID + BridgeSettingsStore.saveLastDiscoveredBridgeStableID(bridge.stableID) defer { self.connectingBridgeID = nil } do { diff --git a/apps/ios/Sources/Voice/VoiceWakeManager.swift b/apps/ios/Sources/Voice/VoiceWakeManager.swift index 348d0bd78..2b46c5490 100644 --- a/apps/ios/Sources/Voice/VoiceWakeManager.swift +++ b/apps/ios/Sources/Voice/VoiceWakeManager.swift @@ -205,6 +205,37 @@ final class VoiceWakeManager: NSObject, ObservableObject { try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation) } + /// Temporarily releases the microphone so other subsystems (e.g. camera video capture) can record audio. + /// Returns `true` when listening was active and was suspended. + func suspendForExternalAudioCapture() -> Bool { + guard self.isEnabled, self.isListening else { return false } + + self.isListening = false + self.statusText = "Paused" + + self.tapDrainTask?.cancel() + self.tapDrainTask = nil + self.tapQueue?.clear() + self.tapQueue = nil + + self.recognitionTask?.cancel() + self.recognitionTask = nil + self.recognitionRequest = nil + + if self.audioEngine.isRunning { + self.audioEngine.stop() + self.audioEngine.inputNode.removeTap(onBus: 0) + } + + try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation) + return true + } + + func resumeAfterExternalAudioCapture(wasSuspended: Bool) { + guard wasSuspended else { return } + Task { await self.start() } + } + private func startRecognition() throws { self.recognitionTask?.cancel() self.recognitionTask = nil diff --git a/apps/ios/project.yml b/apps/ios/project.yml index bd8d1479e..8281aac17 100644 --- a/apps/ios/project.yml +++ b/apps/ios/project.yml @@ -54,5 +54,6 @@ targets: NSLocalNetworkUsageDescription: Clawdis discovers and connects to your Clawdis bridge on the local network. NSBonjourServices: - _clawdis-bridge._tcp + NSCameraUsageDescription: Clawdis can capture photos or short video clips when requested via the bridge. NSMicrophoneUsageDescription: Clawdis needs microphone access for voice wake. NSSpeechRecognitionUsageDescription: Clawdis uses on-device speech recognition for voice wake. diff --git a/apps/macos/Sources/Clawdis/CameraCaptureService.swift b/apps/macos/Sources/Clawdis/CameraCaptureService.swift new file mode 100644 index 000000000..52ce4d53d --- /dev/null +++ b/apps/macos/Sources/Clawdis/CameraCaptureService.swift @@ -0,0 +1,341 @@ +import AVFoundation +import ClawdisIPC +import CoreGraphics +import Foundation +import ImageIO +import OSLog +import UniformTypeIdentifiers + +actor CameraCaptureService { + enum CameraError: LocalizedError, Sendable { + case cameraUnavailable + case microphoneUnavailable + case permissionDenied(kind: String) + case captureFailed(String) + case exportFailed(String) + + var errorDescription: String? { + switch self { + case .cameraUnavailable: + "Camera unavailable" + case .microphoneUnavailable: + "Microphone unavailable" + case let .permissionDenied(kind): + "\(kind) permission denied" + case let .captureFailed(msg): + msg + case let .exportFailed(msg): + msg + } + } + } + + private let logger = Logger(subsystem: "com.steipete.clawdis", category: "camera") + + func snap(facing: CameraFacing?, maxWidth: Int?, quality: Double?) async throws -> (data: Data, size: CGSize) { + let facing = facing ?? .front + let maxWidth = maxWidth.flatMap { $0 > 0 ? $0 : nil } + let quality = Self.clampQuality(quality) + + try await self.ensureAccess(for: .video) + + let session = AVCaptureSession() + session.sessionPreset = .photo + + guard let device = Self.pickCamera(facing: facing) else { + throw CameraError.cameraUnavailable + } + + let input = try AVCaptureDeviceInput(device: device) + guard session.canAddInput(input) else { + throw CameraError.captureFailed("Failed to add camera input") + } + session.addInput(input) + + let output = AVCapturePhotoOutput() + guard session.canAddOutput(output) else { + throw CameraError.captureFailed("Failed to add photo output") + } + session.addOutput(output) + output.maxPhotoQualityPrioritization = .quality + + session.startRunning() + defer { session.stopRunning() } + + let settings: AVCapturePhotoSettings = { + if output.availablePhotoCodecTypes.contains(.jpeg) { + return AVCapturePhotoSettings(format: [AVVideoCodecKey: AVVideoCodecType.jpeg]) + } + return AVCapturePhotoSettings() + }() + settings.photoQualityPrioritization = .quality + + let rawData: Data = try await withCheckedThrowingContinuation(isolation: nil) { cont in + output.capturePhoto(with: settings, delegate: PhotoCaptureDelegate(cont)) + } + + return try Self.reencodeJPEG(imageData: rawData, maxWidth: maxWidth, quality: quality) + } + + func clip( + facing: CameraFacing?, + durationMs: Int?, + includeAudio: Bool, + outPath: String?) async throws -> (path: String, durationMs: Int, hasAudio: Bool) + { + let facing = facing ?? .front + let durationMs = Self.clampDurationMs(durationMs) + + try await self.ensureAccess(for: .video) + if includeAudio { + try await self.ensureAccess(for: .audio) + } + + let session = AVCaptureSession() + session.sessionPreset = .high + + guard let camera = Self.pickCamera(facing: facing) else { + throw CameraError.cameraUnavailable + } + let cameraInput = try AVCaptureDeviceInput(device: camera) + guard session.canAddInput(cameraInput) else { + throw CameraError.captureFailed("Failed to add camera input") + } + session.addInput(cameraInput) + + if includeAudio { + guard let mic = AVCaptureDevice.default(for: .audio) else { + throw CameraError.microphoneUnavailable + } + let micInput = try AVCaptureDeviceInput(device: mic) + guard session.canAddInput(micInput) else { + throw CameraError.captureFailed("Failed to add microphone input") + } + session.addInput(micInput) + } + + let output = AVCaptureMovieFileOutput() + guard session.canAddOutput(output) else { + throw CameraError.captureFailed("Failed to add movie output") + } + session.addOutput(output) + output.maxRecordedDuration = CMTime(value: Int64(durationMs), timescale: 1000) + + session.startRunning() + defer { session.stopRunning() } + + let tmpMovURL = FileManager.default.temporaryDirectory + .appendingPathComponent("clawdis-camera-\(UUID().uuidString).mov") + defer { try? FileManager.default.removeItem(at: tmpMovURL) } + + let outputURL: URL = { + if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + return URL(fileURLWithPath: outPath) + } + return FileManager.default.temporaryDirectory + .appendingPathComponent("clawdis-camera-\(UUID().uuidString).mp4") + }() + + // Ensure we don't fail exporting due to an existing file. + try? FileManager.default.removeItem(at: outputURL) + + let logger = self.logger + let recordedURL: URL = try await withCheckedThrowingContinuation(isolation: nil) { cont in + output.startRecording(to: tmpMovURL, recordingDelegate: MovieFileDelegate(cont, logger: logger)) + } + + try await Self.exportToMP4(inputURL: recordedURL, outputURL: outputURL) + return (path: outputURL.path, durationMs: durationMs, hasAudio: includeAudio) + } + + private func ensureAccess(for mediaType: AVMediaType) async throws { + let status = AVCaptureDevice.authorizationStatus(for: mediaType) + switch status { + case .authorized: + return + case .notDetermined: + let ok = await withCheckedContinuation(isolation: nil) { cont in + AVCaptureDevice.requestAccess(for: mediaType) { granted in + cont.resume(returning: granted) + } + } + if !ok { + throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone") + } + case .denied, .restricted: + throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone") + @unknown default: + throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone") + } + } + + private nonisolated static func pickCamera(facing: CameraFacing) -> AVCaptureDevice? { + let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back + + if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) { + return device + } + + // Many macOS cameras report `unspecified` position; fall back to any default. + return AVCaptureDevice.default(for: .video) + } + + private nonisolated static func clampQuality(_ quality: Double?) -> Double { + let q = quality ?? 0.9 + return min(1.0, max(0.05, q)) + } + + private nonisolated static func clampDurationMs(_ ms: Int?) -> Int { + let v = ms ?? 3000 + return min(15_000, max(250, v)) + } + + private nonisolated static func reencodeJPEG( + imageData: Data, + maxWidth: Int?, + quality: Double) throws -> (data: Data, size: CGSize) + { + guard let src = CGImageSourceCreateWithData(imageData as CFData, nil), + let img = CGImageSourceCreateImageAtIndex(src, 0, nil) + else { + throw CameraError.captureFailed("Failed to decode captured image") + } + + let finalImage: CGImage + if let maxWidth, img.width > maxWidth { + guard let scaled = self.downscale(image: img, maxWidth: maxWidth) else { + throw CameraError.captureFailed("Failed to downscale image") + } + finalImage = scaled + } else { + finalImage = img + } + + let out = NSMutableData() + guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else { + throw CameraError.captureFailed("Failed to create JPEG destination") + } + + let props = [kCGImageDestinationLossyCompressionQuality: quality] as CFDictionary + CGImageDestinationAddImage(dest, finalImage, props) + guard CGImageDestinationFinalize(dest) else { + throw CameraError.captureFailed("Failed to encode JPEG") + } + + return (out as Data, CGSize(width: finalImage.width, height: finalImage.height)) + } + + private nonisolated static func downscale(image: CGImage, maxWidth: Int) -> CGImage? { + guard image.width > 0, image.height > 0 else { return image } + guard image.width > maxWidth else { return image } + + let scale = Double(maxWidth) / Double(image.width) + let targetW = maxWidth + let targetH = max(1, Int((Double(image.height) * scale).rounded())) + + let cs = CGColorSpaceCreateDeviceRGB() + let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue + guard let ctx = CGContext( + data: nil, + width: targetW, + height: targetH, + bitsPerComponent: 8, + bytesPerRow: 0, + space: cs, + bitmapInfo: bitmapInfo) + else { return nil } + + ctx.interpolationQuality = .high + ctx.draw(image, in: CGRect(x: 0, y: 0, width: targetW, height: targetH)) + return ctx.makeImage() + } + + private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws { + let asset = AVAsset(url: inputURL) + guard let export = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetMediumQuality) else { + throw CameraError.exportFailed("Failed to create export session") + } + export.outputURL = outputURL + export.outputFileType = .mp4 + export.shouldOptimizeForNetworkUse = true + + await withCheckedContinuation { cont in + export.exportAsynchronously { + cont.resume() + } + } + + switch export.status { + case .completed: + return + case .failed: + throw CameraError.exportFailed(export.error?.localizedDescription ?? "export failed") + case .cancelled: + throw CameraError.exportFailed("export cancelled") + default: + throw CameraError.exportFailed("export did not complete (\(export.status.rawValue))") + } + } +} + +private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate { + private var cont: CheckedContinuation? + + init(_ cont: CheckedContinuation) { + self.cont = cont + } + + func photoOutput( + _ output: AVCapturePhotoOutput, + didFinishProcessingPhoto photo: AVCapturePhoto, + error: Error?) + { + guard let cont else { return } + self.cont = nil + if let error { + cont.resume(throwing: error) + return + } + guard let data = photo.fileDataRepresentation() else { + cont.resume(throwing: CameraCaptureService.CameraError.captureFailed("No photo data")) + return + } + cont.resume(returning: data) + } +} + +private final class MovieFileDelegate: NSObject, AVCaptureFileOutputRecordingDelegate { + private var cont: CheckedContinuation? + private let logger: Logger + + init(_ cont: CheckedContinuation, logger: Logger) { + self.cont = cont + self.logger = logger + } + + func fileOutput( + _ output: AVCaptureFileOutput, + didFinishRecordingTo outputFileURL: URL, + from connections: [AVCaptureConnection], + error: Error?) + { + guard let cont else { return } + self.cont = nil + + if let error { + let ns = error as NSError + if ns.domain == AVFoundationErrorDomain, + ns.code == AVError.maximumDurationReached.rawValue + { + cont.resume(returning: outputFileURL) + return + } + + self.logger.error("camera record failed: \(error.localizedDescription, privacy: .public)") + cont.resume(throwing: error) + return + } + + cont.resume(returning: outputFileURL) + } +} diff --git a/apps/macos/Sources/Clawdis/Constants.swift b/apps/macos/Sources/Clawdis/Constants.swift index c4538365c..dc0965425 100644 --- a/apps/macos/Sources/Clawdis/Constants.swift +++ b/apps/macos/Sources/Clawdis/Constants.swift @@ -24,6 +24,7 @@ let webChatEnabledKey = "clawdis.webChatEnabled" let webChatSwiftUIEnabledKey = "clawdis.webChatSwiftUIEnabled" let webChatPortKey = "clawdis.webChatPort" let canvasEnabledKey = "clawdis.canvasEnabled" +let cameraEnabledKey = "clawdis.cameraEnabled" let peekabooBridgeEnabledKey = "clawdis.peekabooBridgeEnabled" let deepLinkAgentEnabledKey = "clawdis.deepLinkAgentEnabled" let deepLinkKeyKey = "clawdis.deepLinkKey" diff --git a/apps/macos/Sources/Clawdis/ControlRequestHandler.swift b/apps/macos/Sources/Clawdis/ControlRequestHandler.swift index a847ce62c..e17dddafa 100644 --- a/apps/macos/Sources/Clawdis/ControlRequestHandler.swift +++ b/apps/macos/Sources/Clawdis/ControlRequestHandler.swift @@ -3,6 +3,8 @@ import Foundation import OSLog enum ControlRequestHandler { + private static let cameraCapture = CameraCaptureService() + static func process( request: Request, notifier: NotificationManager = NotificationManager(), @@ -77,6 +79,16 @@ enum ControlRequestHandler { command: command, paramsJSON: paramsJSON, logger: logger) + + case let .cameraSnap(facing, maxWidth, quality, outPath): + return await self.handleCameraSnap(facing: facing, maxWidth: maxWidth, quality: quality, outPath: outPath) + + case let .cameraClip(facing, durationMs, includeAudio, outPath): + return await self.handleCameraClip( + facing: facing, + durationMs: durationMs, + includeAudio: includeAudio, + outPath: outPath) } } @@ -173,6 +185,10 @@ enum ControlRequestHandler { UserDefaults.standard.object(forKey: canvasEnabledKey) as? Bool ?? true } + private static func cameraEnabled() -> Bool { + UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false + } + private static func handleCanvasShow( session: String, path: String?, @@ -254,4 +270,46 @@ enum ControlRequestHandler { return Response(ok: false, message: error.localizedDescription) } } + + private static func handleCameraSnap( + facing: CameraFacing?, + maxWidth: Int?, + quality: Double?, + outPath: String?) async -> Response + { + guard self.cameraEnabled() else { return Response(ok: false, message: "Camera disabled by user") } + do { + let res = try await self.cameraCapture.snap(facing: facing, maxWidth: maxWidth, quality: quality) + let url: URL = if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + URL(fileURLWithPath: outPath) + } else { + FileManager.default.temporaryDirectory + .appendingPathComponent("clawdis-camera-snap-\(UUID().uuidString).jpg") + } + + try res.data.write(to: url, options: [.atomic]) + return Response(ok: true, message: url.path) + } catch { + return Response(ok: false, message: error.localizedDescription) + } + } + + private static func handleCameraClip( + facing: CameraFacing?, + durationMs: Int?, + includeAudio: Bool, + outPath: String?) async -> Response + { + guard self.cameraEnabled() else { return Response(ok: false, message: "Camera disabled by user") } + do { + let res = try await self.cameraCapture.clip( + facing: facing, + durationMs: durationMs, + includeAudio: includeAudio, + outPath: outPath) + return Response(ok: true, message: res.path) + } catch { + return Response(ok: false, message: error.localizedDescription) + } + } } diff --git a/apps/macos/Sources/Clawdis/DebugSettings.swift b/apps/macos/Sources/Clawdis/DebugSettings.swift index 6b79e24b7..abd5d0e44 100644 --- a/apps/macos/Sources/Clawdis/DebugSettings.swift +++ b/apps/macos/Sources/Clawdis/DebugSettings.swift @@ -9,6 +9,7 @@ struct DebugSettings: View { @AppStorage(modelCatalogReloadKey) private var modelCatalogReloadBump: Int = 0 @AppStorage(iconOverrideKey) private var iconOverrideRaw: String = IconOverrideSelection.system.rawValue @AppStorage(canvasEnabledKey) private var canvasEnabled: Bool = true + @AppStorage(cameraEnabledKey) private var cameraEnabled: Bool = false @AppStorage(deepLinkAgentEnabledKey) private var deepLinkAgentEnabled: Bool = false @State private var modelsCount: Int? @State private var modelsLoading = false @@ -48,6 +49,7 @@ struct DebugSettings: View { self.pathsSection self.quickActionsSection self.canvasSection + self.cameraSection self.experimentsSection Spacer(minLength: 0) @@ -571,6 +573,20 @@ struct DebugSettings: View { } } + private var cameraSection: some View { + GroupBox("Camera") { + VStack(alignment: .leading, spacing: 10) { + Toggle("Allow Camera (agent)", isOn: self.$cameraEnabled) + .toggleStyle(.checkbox) + .help("When off, camera requests return “Camera disabled by user”.") + + Text("Allows Clawdis to capture a photo or short video via the built-in camera.") + .font(.caption) + .foregroundStyle(.secondary) + } + } + } + private var experimentsSection: some View { GroupBox("Experiments") { Grid(alignment: .leadingFirstTextBaseline, horizontalSpacing: 14, verticalSpacing: 10) { diff --git a/apps/macos/Sources/ClawdisCLI/ClawdisCLI.swift b/apps/macos/Sources/ClawdisCLI/ClawdisCLI.swift index fa4100422..b1c203ef1 100644 --- a/apps/macos/Sources/ClawdisCLI/ClawdisCLI.swift +++ b/apps/macos/Sources/ClawdisCLI/ClawdisCLI.swift @@ -52,6 +52,7 @@ struct ClawdisCLI { enum Kind { case generic + case mediaPath } } @@ -91,6 +92,9 @@ struct ClawdisCLI { case "canvas": return try self.parseCanvas(args: &args) + case "camera": + return try self.parseCamera(args: &args) + default: throw CLIError.help } @@ -292,6 +296,62 @@ struct ClawdisCLI { } } + private static func parseCamera(args: inout [String]) throws -> ParsedCLIRequest { + guard let sub = args.popFirst() else { throw CLIError.help } + switch sub { + case "snap": + var facing: CameraFacing? + var maxWidth: Int? + var quality: Double? + var outPath: String? + while !args.isEmpty { + let arg = args.removeFirst() + switch arg { + case "--facing": + if let val = args.popFirst(), let f = CameraFacing(rawValue: val) { facing = f } + case "--max-width": + maxWidth = args.popFirst().flatMap(Int.init) + case "--quality": + quality = args.popFirst().flatMap(Double.init) + case "--out": + outPath = args.popFirst() + default: + break + } + } + return ParsedCLIRequest( + request: .cameraSnap(facing: facing, maxWidth: maxWidth, quality: quality, outPath: outPath), + kind: .mediaPath) + + case "clip": + var facing: CameraFacing? + var durationMs: Int? + var includeAudio = true + var outPath: String? + while !args.isEmpty { + let arg = args.removeFirst() + switch arg { + case "--facing": + if let val = args.popFirst(), let f = CameraFacing(rawValue: val) { facing = f } + case "--duration-ms": + durationMs = args.popFirst().flatMap(Int.init) + case "--no-audio": + includeAudio = false + case "--out": + outPath = args.popFirst() + default: + break + } + } + return ParsedCLIRequest( + request: .cameraClip(facing: facing, durationMs: durationMs, includeAudio: includeAudio, outPath: outPath), + kind: .mediaPath) + + default: + throw CLIError.help + } + } + private static func parseCanvasPlacement( args: inout [String], session: inout String, @@ -334,6 +394,10 @@ struct ClawdisCLI { if let message = response.message, !message.isEmpty { FileHandle.standardOutput.write(Data((message + "\n").utf8)) } + case .mediaPath: + if let message = response.message, !message.isEmpty { + print("MEDIA:\(message)") + } } } @@ -352,6 +416,8 @@ struct ClawdisCLI { output["payload"] = text } } + case .mediaPath: + break } let json = try JSONSerialization.data(withJSONObject: output, options: [.prettyPrinted]) @@ -406,6 +472,10 @@ struct ClawdisCLI { clawdis-mac canvas eval --js [--session ] clawdis-mac canvas snapshot [--out ] [--session ] + Camera: + clawdis-mac camera snap [--facing ] [--max-width ] [--quality <0-1>] [--out ] + clawdis-mac camera clip [--facing ] [--duration-ms ] [--no-audio] [--out ] + Browser (clawd): clawdis-mac browser status|start|stop|tabs|open|focus|close|screenshot|eval|query|dom|snapshot @@ -433,6 +503,7 @@ struct ClawdisCLI { Output: Default output is text. Use --json for machine-readable output. In text mode, `browser screenshot` prints MEDIA:. + In text mode, `camera snap` and `camera clip` print MEDIA:. """ print(usage) } diff --git a/apps/macos/Sources/ClawdisIPC/IPC.swift b/apps/macos/Sources/ClawdisIPC/IPC.swift index feb062bdc..6aadc0185 100644 --- a/apps/macos/Sources/ClawdisIPC/IPC.swift +++ b/apps/macos/Sources/ClawdisIPC/IPC.swift @@ -13,6 +13,11 @@ public enum Capability: String, Codable, CaseIterable, Sendable { case speechRecognition } +public enum CameraFacing: String, Codable, Sendable { + case front + case back +} + // MARK: - Requests /// Notification interruption level (maps to UNNotificationInterruptionLevel) @@ -74,6 +79,8 @@ public enum Request: Sendable { case canvasSnapshot(session: String, outPath: String?) case nodeList case nodeInvoke(nodeId: String, command: String, paramsJSON: String?) + case cameraSnap(facing: CameraFacing?, maxWidth: Int?, quality: Double?, outPath: String?) + case cameraClip(facing: CameraFacing?, durationMs: Int?, includeAudio: Bool, outPath: String?) } // MARK: - Responses @@ -104,6 +111,11 @@ extension Request: Codable { case path case javaScript case outPath + case facing + case maxWidth + case quality + case durationMs + case includeAudio case placement case nodeId case nodeCommand @@ -124,6 +136,8 @@ extension Request: Codable { case canvasSnapshot case nodeList case nodeInvoke + case cameraSnap + case cameraClip } public func encode(to encoder: Encoder) throws { @@ -198,6 +212,20 @@ extension Request: Codable { try container.encode(nodeId, forKey: .nodeId) try container.encode(command, forKey: .nodeCommand) try container.encodeIfPresent(paramsJSON, forKey: .paramsJSON) + + case let .cameraSnap(facing, maxWidth, quality, outPath): + try container.encode(Kind.cameraSnap, forKey: .type) + try container.encodeIfPresent(facing, forKey: .facing) + try container.encodeIfPresent(maxWidth, forKey: .maxWidth) + try container.encodeIfPresent(quality, forKey: .quality) + try container.encodeIfPresent(outPath, forKey: .outPath) + + case let .cameraClip(facing, durationMs, includeAudio, outPath): + try container.encode(Kind.cameraClip, forKey: .type) + try container.encodeIfPresent(facing, forKey: .facing) + try container.encodeIfPresent(durationMs, forKey: .durationMs) + try container.encode(includeAudio, forKey: .includeAudio) + try container.encodeIfPresent(outPath, forKey: .outPath) } } @@ -274,6 +302,20 @@ extension Request: Codable { let command = try container.decode(String.self, forKey: .nodeCommand) let paramsJSON = try container.decodeIfPresent(String.self, forKey: .paramsJSON) self = .nodeInvoke(nodeId: nodeId, command: command, paramsJSON: paramsJSON) + + case .cameraSnap: + let facing = try container.decodeIfPresent(CameraFacing.self, forKey: .facing) + let maxWidth = try container.decodeIfPresent(Int.self, forKey: .maxWidth) + let quality = try container.decodeIfPresent(Double.self, forKey: .quality) + let outPath = try container.decodeIfPresent(String.self, forKey: .outPath) + self = .cameraSnap(facing: facing, maxWidth: maxWidth, quality: quality, outPath: outPath) + + case .cameraClip: + let facing = try container.decodeIfPresent(CameraFacing.self, forKey: .facing) + let durationMs = try container.decodeIfPresent(Int.self, forKey: .durationMs) + let includeAudio = (try? container.decode(Bool.self, forKey: .includeAudio)) ?? true + let outPath = try container.decodeIfPresent(String.self, forKey: .outPath) + self = .cameraClip(facing: facing, durationMs: durationMs, includeAudio: includeAudio, outPath: outPath) } } } diff --git a/apps/macos/Tests/ClawdisIPCTests/CameraIPCTests.swift b/apps/macos/Tests/ClawdisIPCTests/CameraIPCTests.swift new file mode 100644 index 000000000..a5121ab56 --- /dev/null +++ b/apps/macos/Tests/ClawdisIPCTests/CameraIPCTests.swift @@ -0,0 +1,62 @@ +import ClawdisIPC +import Foundation +import Testing + +@Suite struct CameraIPCTests { + @Test func cameraSnapCodableRoundtrip() throws { + let req: Request = .cameraSnap( + facing: .front, + maxWidth: 640, + quality: 0.85, + outPath: "/tmp/test.jpg") + + let data = try JSONEncoder().encode(req) + let decoded = try JSONDecoder().decode(Request.self, from: data) + + switch decoded { + case let .cameraSnap(facing, maxWidth, quality, outPath): + #expect(facing == .front) + #expect(maxWidth == 640) + #expect(quality == 0.85) + #expect(outPath == "/tmp/test.jpg") + default: + Issue.record("expected cameraSnap, got \(decoded)") + } + } + + @Test func cameraClipCodableRoundtrip() throws { + let req: Request = .cameraClip( + facing: .back, + durationMs: 3000, + includeAudio: false, + outPath: "/tmp/test.mp4") + + let data = try JSONEncoder().encode(req) + let decoded = try JSONDecoder().decode(Request.self, from: data) + + switch decoded { + case let .cameraClip(facing, durationMs, includeAudio, outPath): + #expect(facing == .back) + #expect(durationMs == 3000) + #expect(includeAudio == false) + #expect(outPath == "/tmp/test.mp4") + default: + Issue.record("expected cameraClip, got \(decoded)") + } + } + + @Test func cameraClipDefaultsIncludeAudioToTrueWhenMissing() throws { + let json = """ + {"type":"cameraClip","durationMs":1234} + """ + let decoded = try JSONDecoder().decode(Request.self, from: Data(json.utf8)) + switch decoded { + case let .cameraClip(_, durationMs, includeAudio, _): + #expect(durationMs == 1234) + #expect(includeAudio == true) + default: + Issue.record("expected cameraClip, got \(decoded)") + } + } +} + diff --git a/apps/shared/ClawdisKit/Sources/ClawdisKit/CameraCommands.swift b/apps/shared/ClawdisKit/Sources/ClawdisKit/CameraCommands.swift new file mode 100644 index 000000000..dd2c2015d --- /dev/null +++ b/apps/shared/ClawdisKit/Sources/ClawdisKit/CameraCommands.swift @@ -0,0 +1,58 @@ +import Foundation + +public enum ClawdisCameraCommand: String, Codable, Sendable { + case snap = "camera.snap" + case clip = "camera.clip" +} + +public enum ClawdisCameraFacing: String, Codable, Sendable { + case back + case front +} + +public enum ClawdisCameraImageFormat: String, Codable, Sendable { + case jpg + case jpeg +} + +public enum ClawdisCameraVideoFormat: String, Codable, Sendable { + case mp4 +} + +public struct ClawdisCameraSnapParams: Codable, Sendable, Equatable { + public var facing: ClawdisCameraFacing? + public var maxWidth: Int? + public var quality: Double? + public var format: ClawdisCameraImageFormat? + + public init( + facing: ClawdisCameraFacing? = nil, + maxWidth: Int? = nil, + quality: Double? = nil, + format: ClawdisCameraImageFormat? = nil) + { + self.facing = facing + self.maxWidth = maxWidth + self.quality = quality + self.format = format + } +} + +public struct ClawdisCameraClipParams: Codable, Sendable, Equatable { + public var facing: ClawdisCameraFacing? + public var durationMs: Int? + public var includeAudio: Bool? + public var format: ClawdisCameraVideoFormat? + + public init( + facing: ClawdisCameraFacing? = nil, + durationMs: Int? = nil, + includeAudio: Bool? = nil, + format: ClawdisCameraVideoFormat? = nil) + { + self.facing = facing + self.durationMs = durationMs + self.includeAudio = includeAudio + self.format = format + } +} diff --git a/docs/AGENTS.default.md b/docs/AGENTS.default.md index 9a1d711a4..9b5ce1a5c 100644 --- a/docs/AGENTS.default.md +++ b/docs/AGENTS.default.md @@ -9,7 +9,7 @@ read_when: ## What Clawdis Does - Runs WhatsApp gateway + Pi coding agent so the assistant can read/write chats, fetch context, and run tools via the host Mac. - macOS app manages permissions (screen recording, notifications, microphone) and exposes a CLI helper `clawdis-mac` for scripts. -- Sessions are per-sender; heartbeats keep background tasks alive. +- Direct chats collapse into the shared `main` session by default; groups stay isolated as `group:`; heartbeats keep background tasks alive. ## Core Tools (enable in Settings → Tools) - **mcporter** — MCP runtime/CLI to list, call, and sync Model Context Protocol servers. diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html index 63eedc126..a56aafc07 100644 --- a/docs/_layouts/default.html +++ b/docs/_layouts/default.html @@ -122,8 +122,8 @@ · source · - npm - + releases +