Merge origin/main
This commit is contained in:
@@ -13,6 +13,7 @@ final class BridgeConnectionController: ObservableObject {
|
||||
private weak var appModel: NodeAppModel?
|
||||
private var cancellables = Set<AnyCancellable>()
|
||||
private var didAutoConnect = false
|
||||
private var seenStableIDs = Set<String>()
|
||||
|
||||
init(appModel: NodeAppModel) {
|
||||
self.appModel = appModel
|
||||
@@ -23,6 +24,7 @@ final class BridgeConnectionController: ObservableObject {
|
||||
.sink { [weak self] newValue in
|
||||
guard let self else { return }
|
||||
self.bridges = newValue
|
||||
self.updateLastDiscoveredBridge(from: newValue)
|
||||
self.maybeAutoConnect()
|
||||
}
|
||||
.store(in: &self.cancellables)
|
||||
@@ -50,9 +52,9 @@ final class BridgeConnectionController: ObservableObject {
|
||||
guard appModel.bridgeServerName == nil else { return }
|
||||
|
||||
let defaults = UserDefaults.standard
|
||||
let preferredStableID = defaults.string(forKey: "bridge.preferredStableID")?
|
||||
let targetStableID = defaults.string(forKey: "bridge.lastDiscoveredStableID")?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||
guard !preferredStableID.isEmpty else { return }
|
||||
guard !targetStableID.isEmpty else { return }
|
||||
|
||||
let instanceId = defaults.string(forKey: "node.instanceId")?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||
@@ -64,12 +66,20 @@ final class BridgeConnectionController: ObservableObject {
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||
guard !token.isEmpty else { return }
|
||||
|
||||
guard let target = self.bridges.first(where: { $0.stableID == preferredStableID }) else { return }
|
||||
guard let target = self.bridges.first(where: { $0.stableID == targetStableID }) else { return }
|
||||
|
||||
self.didAutoConnect = true
|
||||
appModel.connectToBridge(endpoint: target.endpoint, hello: self.makeHello(token: token))
|
||||
}
|
||||
|
||||
private func updateLastDiscoveredBridge(from bridges: [BridgeDiscoveryModel.DiscoveredBridge]) {
|
||||
let newlyDiscovered = bridges.filter { self.seenStableIDs.insert($0.stableID).inserted }
|
||||
guard let last = newlyDiscovered.last else { return }
|
||||
|
||||
UserDefaults.standard.set(last.stableID, forKey: "bridge.lastDiscoveredStableID")
|
||||
BridgeSettingsStore.saveLastDiscoveredBridgeStableID(last.stableID)
|
||||
}
|
||||
|
||||
private func makeHello(token: String) -> BridgeHello {
|
||||
let defaults = UserDefaults.standard
|
||||
let nodeId = defaults.string(forKey: "node.instanceId") ?? "ios-node"
|
||||
|
||||
@@ -6,13 +6,16 @@ enum BridgeSettingsStore {
|
||||
|
||||
private static let instanceIdDefaultsKey = "node.instanceId"
|
||||
private static let preferredBridgeStableIDDefaultsKey = "bridge.preferredStableID"
|
||||
private static let lastDiscoveredBridgeStableIDDefaultsKey = "bridge.lastDiscoveredStableID"
|
||||
|
||||
private static let instanceIdAccount = "instanceId"
|
||||
private static let preferredBridgeStableIDAccount = "preferredStableID"
|
||||
private static let lastDiscoveredBridgeStableIDAccount = "lastDiscoveredStableID"
|
||||
|
||||
static func bootstrapPersistence() {
|
||||
self.ensureStableInstanceID()
|
||||
self.ensurePreferredBridgeStableID()
|
||||
self.ensureLastDiscoveredBridgeStableID()
|
||||
}
|
||||
|
||||
static func loadStableInstanceID() -> String? {
|
||||
@@ -36,6 +39,18 @@ enum BridgeSettingsStore {
|
||||
account: self.preferredBridgeStableIDAccount)
|
||||
}
|
||||
|
||||
static func loadLastDiscoveredBridgeStableID() -> String? {
|
||||
KeychainStore.loadString(service: self.bridgeService, account: self.lastDiscoveredBridgeStableIDAccount)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
}
|
||||
|
||||
static func saveLastDiscoveredBridgeStableID(_ stableID: String) {
|
||||
_ = KeychainStore.saveString(
|
||||
stableID,
|
||||
service: self.bridgeService,
|
||||
account: self.lastDiscoveredBridgeStableIDAccount)
|
||||
}
|
||||
|
||||
private static func ensureStableInstanceID() {
|
||||
let defaults = UserDefaults.standard
|
||||
|
||||
@@ -76,4 +91,22 @@ enum BridgeSettingsStore {
|
||||
defaults.set(stored, forKey: self.preferredBridgeStableIDDefaultsKey)
|
||||
}
|
||||
}
|
||||
|
||||
private static func ensureLastDiscoveredBridgeStableID() {
|
||||
let defaults = UserDefaults.standard
|
||||
|
||||
if let existing = defaults.string(forKey: self.lastDiscoveredBridgeStableIDDefaultsKey)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines),
|
||||
!existing.isEmpty
|
||||
{
|
||||
if self.loadLastDiscoveredBridgeStableID() == nil {
|
||||
self.saveLastDiscoveredBridgeStableID(existing)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if let stored = self.loadLastDiscoveredBridgeStableID(), !stored.isEmpty {
|
||||
defaults.set(stored, forKey: self.lastDiscoveredBridgeStableIDDefaultsKey)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
319
apps/ios/Sources/Camera/CameraController.swift
Normal file
319
apps/ios/Sources/Camera/CameraController.swift
Normal file
@@ -0,0 +1,319 @@
|
||||
import AVFoundation
|
||||
import ClawdisKit
|
||||
import Foundation
|
||||
import UIKit
|
||||
|
||||
actor CameraController {
|
||||
enum CameraError: LocalizedError, Sendable {
|
||||
case cameraUnavailable
|
||||
case microphoneUnavailable
|
||||
case permissionDenied(kind: String)
|
||||
case invalidParams(String)
|
||||
case captureFailed(String)
|
||||
case exportFailed(String)
|
||||
|
||||
var errorDescription: String? {
|
||||
switch self {
|
||||
case .cameraUnavailable:
|
||||
"Camera unavailable"
|
||||
case .microphoneUnavailable:
|
||||
"Microphone unavailable"
|
||||
case let .permissionDenied(kind):
|
||||
"\(kind) permission denied"
|
||||
case let .invalidParams(msg):
|
||||
msg
|
||||
case let .captureFailed(msg):
|
||||
msg
|
||||
case let .exportFailed(msg):
|
||||
msg
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func snap(params: ClawdisCameraSnapParams) async throws -> (
|
||||
format: String,
|
||||
base64: String,
|
||||
width: Int,
|
||||
height: Int)
|
||||
{
|
||||
let facing = params.facing ?? .front
|
||||
let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil }
|
||||
let quality = Self.clampQuality(params.quality)
|
||||
|
||||
try await self.ensureAccess(for: .video)
|
||||
|
||||
let session = AVCaptureSession()
|
||||
session.sessionPreset = .photo
|
||||
|
||||
guard let device = Self.pickCamera(facing: facing) else {
|
||||
throw CameraError.cameraUnavailable
|
||||
}
|
||||
|
||||
let input = try AVCaptureDeviceInput(device: device)
|
||||
guard session.canAddInput(input) else {
|
||||
throw CameraError.captureFailed("Failed to add camera input")
|
||||
}
|
||||
session.addInput(input)
|
||||
|
||||
let output = AVCapturePhotoOutput()
|
||||
guard session.canAddOutput(output) else {
|
||||
throw CameraError.captureFailed("Failed to add photo output")
|
||||
}
|
||||
session.addOutput(output)
|
||||
output.maxPhotoQualityPrioritization = .quality
|
||||
|
||||
session.startRunning()
|
||||
defer { session.stopRunning() }
|
||||
|
||||
let settings: AVCapturePhotoSettings = {
|
||||
if output.availablePhotoCodecTypes.contains(.jpeg) {
|
||||
return AVCapturePhotoSettings(format: [AVVideoCodecKey: AVVideoCodecType.jpeg])
|
||||
}
|
||||
return AVCapturePhotoSettings()
|
||||
}()
|
||||
settings.photoQualityPrioritization = .quality
|
||||
|
||||
let rawData: Data = try await withCheckedThrowingContinuation { cont in
|
||||
output.capturePhoto(with: settings, delegate: PhotoCaptureDelegate(cont))
|
||||
}
|
||||
|
||||
let (finalData, size) = try Self.reencodeJPEG(
|
||||
imageData: rawData,
|
||||
maxWidth: maxWidth,
|
||||
quality: quality)
|
||||
|
||||
return (
|
||||
format: "jpg",
|
||||
base64: finalData.base64EncodedString(),
|
||||
width: Int(size.width.rounded()),
|
||||
height: Int(size.height.rounded()))
|
||||
}
|
||||
|
||||
func clip(params: ClawdisCameraClipParams) async throws -> (
|
||||
format: String,
|
||||
base64: String,
|
||||
durationMs: Int,
|
||||
hasAudio: Bool)
|
||||
{
|
||||
let facing = params.facing ?? .front
|
||||
let durationMs = Self.clampDurationMs(params.durationMs)
|
||||
let includeAudio = params.includeAudio ?? true
|
||||
|
||||
try await self.ensureAccess(for: .video)
|
||||
if includeAudio {
|
||||
try await self.ensureAccess(for: .audio)
|
||||
}
|
||||
|
||||
let session = AVCaptureSession()
|
||||
session.sessionPreset = .high
|
||||
|
||||
guard let camera = Self.pickCamera(facing: facing) else {
|
||||
throw CameraError.cameraUnavailable
|
||||
}
|
||||
let cameraInput = try AVCaptureDeviceInput(device: camera)
|
||||
guard session.canAddInput(cameraInput) else {
|
||||
throw CameraError.captureFailed("Failed to add camera input")
|
||||
}
|
||||
session.addInput(cameraInput)
|
||||
|
||||
if includeAudio {
|
||||
guard let mic = AVCaptureDevice.default(for: .audio) else {
|
||||
throw CameraError.microphoneUnavailable
|
||||
}
|
||||
let micInput = try AVCaptureDeviceInput(device: mic)
|
||||
if session.canAddInput(micInput) {
|
||||
session.addInput(micInput)
|
||||
} else {
|
||||
throw CameraError.captureFailed("Failed to add microphone input")
|
||||
}
|
||||
}
|
||||
|
||||
let output = AVCaptureMovieFileOutput()
|
||||
guard session.canAddOutput(output) else {
|
||||
throw CameraError.captureFailed("Failed to add movie output")
|
||||
}
|
||||
session.addOutput(output)
|
||||
output.maxRecordedDuration = CMTime(value: Int64(durationMs), timescale: 1000)
|
||||
|
||||
session.startRunning()
|
||||
defer { session.stopRunning() }
|
||||
|
||||
let movURL = FileManager.default.temporaryDirectory
|
||||
.appendingPathComponent("clawdis-camera-\(UUID().uuidString).mov")
|
||||
let mp4URL = FileManager.default.temporaryDirectory
|
||||
.appendingPathComponent("clawdis-camera-\(UUID().uuidString).mp4")
|
||||
|
||||
defer {
|
||||
try? FileManager.default.removeItem(at: movURL)
|
||||
try? FileManager.default.removeItem(at: mp4URL)
|
||||
}
|
||||
|
||||
let recordedURL: URL = try await withCheckedThrowingContinuation { cont in
|
||||
let delegate = MovieFileDelegate(cont)
|
||||
output.startRecording(to: movURL, recordingDelegate: delegate)
|
||||
}
|
||||
|
||||
// Transcode .mov -> .mp4 for easier downstream handling.
|
||||
try await Self.exportToMP4(inputURL: recordedURL, outputURL: mp4URL)
|
||||
|
||||
let data = try Data(contentsOf: mp4URL)
|
||||
return (format: "mp4", base64: data.base64EncodedString(), durationMs: durationMs, hasAudio: includeAudio)
|
||||
}
|
||||
|
||||
private func ensureAccess(for mediaType: AVMediaType) async throws {
|
||||
let status = AVCaptureDevice.authorizationStatus(for: mediaType)
|
||||
switch status {
|
||||
case .authorized:
|
||||
return
|
||||
case .notDetermined:
|
||||
let ok = await withCheckedContinuation(isolation: nil) { cont in
|
||||
AVCaptureDevice.requestAccess(for: mediaType) { granted in
|
||||
cont.resume(returning: granted)
|
||||
}
|
||||
}
|
||||
if !ok {
|
||||
throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone")
|
||||
}
|
||||
case .denied, .restricted:
|
||||
throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone")
|
||||
@unknown default:
|
||||
throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone")
|
||||
}
|
||||
}
|
||||
|
||||
private nonisolated static func pickCamera(facing: ClawdisCameraFacing) -> AVCaptureDevice? {
|
||||
let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back
|
||||
return AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position)
|
||||
}
|
||||
|
||||
private nonisolated static func clampQuality(_ quality: Double?) -> Double {
|
||||
let q = quality ?? 0.9
|
||||
return min(1.0, max(0.05, q))
|
||||
}
|
||||
|
||||
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
|
||||
let v = ms ?? 3000
|
||||
// Keep clips short by default; avoid huge base64 payloads on the bridge.
|
||||
return min(15000, max(250, v))
|
||||
}
|
||||
|
||||
private nonisolated static func reencodeJPEG(
|
||||
imageData: Data,
|
||||
maxWidth: Int?,
|
||||
quality: Double) throws -> (data: Data, size: CGSize)
|
||||
{
|
||||
guard let image = UIImage(data: imageData) else {
|
||||
throw CameraError.captureFailed("Failed to decode captured image")
|
||||
}
|
||||
|
||||
let finalImage: UIImage = if let maxWidth, maxWidth > 0 {
|
||||
Self.downscale(image: image, maxWidth: CGFloat(maxWidth))
|
||||
} else {
|
||||
image
|
||||
}
|
||||
|
||||
guard let out = finalImage.jpegData(compressionQuality: quality) else {
|
||||
throw CameraError.captureFailed("Failed to encode JPEG")
|
||||
}
|
||||
|
||||
return (out, finalImage.size)
|
||||
}
|
||||
|
||||
private nonisolated static func downscale(image: UIImage, maxWidth: CGFloat) -> UIImage {
|
||||
let w = image.size.width
|
||||
let h = image.size.height
|
||||
guard w > 0, h > 0 else { return image }
|
||||
guard w > maxWidth else { return image }
|
||||
|
||||
let scale = maxWidth / w
|
||||
let target = CGSize(width: maxWidth, height: max(1, h * scale))
|
||||
|
||||
let format = UIGraphicsImageRendererFormat.default()
|
||||
format.opaque = false
|
||||
let renderer = UIGraphicsImageRenderer(size: target, format: format)
|
||||
return renderer.image { _ in
|
||||
image.draw(in: CGRect(origin: .zero, size: target))
|
||||
}
|
||||
}
|
||||
|
||||
private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws {
|
||||
let asset = AVAsset(url: inputURL)
|
||||
guard let exporter = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetHighestQuality) else {
|
||||
throw CameraError.exportFailed("Failed to create export session")
|
||||
}
|
||||
exporter.outputURL = outputURL
|
||||
exporter.outputFileType = .mp4
|
||||
exporter.shouldOptimizeForNetworkUse = true
|
||||
|
||||
try await withCheckedThrowingContinuation(isolation: nil) { cont in
|
||||
exporter.exportAsynchronously {
|
||||
switch exporter.status {
|
||||
case .completed:
|
||||
cont.resume(returning: ())
|
||||
case .failed:
|
||||
cont.resume(throwing: exporter.error ?? CameraError.exportFailed("Export failed"))
|
||||
case .cancelled:
|
||||
cont.resume(throwing: CameraError.exportFailed("Export cancelled"))
|
||||
default:
|
||||
cont.resume(throwing: CameraError.exportFailed("Export did not complete"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate {
|
||||
private let continuation: CheckedContinuation<Data, Error>
|
||||
private var didResume = false
|
||||
|
||||
init(_ continuation: CheckedContinuation<Data, Error>) {
|
||||
self.continuation = continuation
|
||||
}
|
||||
|
||||
func photoOutput(
|
||||
_ output: AVCapturePhotoOutput,
|
||||
didFinishProcessingPhoto photo: AVCapturePhoto,
|
||||
error: Error?)
|
||||
{
|
||||
guard !self.didResume else { return }
|
||||
self.didResume = true
|
||||
|
||||
if let error {
|
||||
self.continuation.resume(throwing: error)
|
||||
return
|
||||
}
|
||||
guard let data = photo.fileDataRepresentation() else {
|
||||
self.continuation.resume(
|
||||
throwing: NSError(domain: "Camera", code: 1, userInfo: [
|
||||
NSLocalizedDescriptionKey: "photo data missing",
|
||||
]))
|
||||
return
|
||||
}
|
||||
self.continuation.resume(returning: data)
|
||||
}
|
||||
}
|
||||
|
||||
private final class MovieFileDelegate: NSObject, AVCaptureFileOutputRecordingDelegate {
|
||||
private let continuation: CheckedContinuation<URL, Error>
|
||||
private var didResume = false
|
||||
|
||||
init(_ continuation: CheckedContinuation<URL, Error>) {
|
||||
self.continuation = continuation
|
||||
}
|
||||
|
||||
func fileOutput(
|
||||
_ output: AVCaptureFileOutput,
|
||||
didFinishRecordingTo outputFileURL: URL,
|
||||
from connections: [AVCaptureConnection],
|
||||
error: Error?)
|
||||
{
|
||||
guard !self.didResume else { return }
|
||||
self.didResume = true
|
||||
|
||||
if let error {
|
||||
self.continuation.resume(throwing: error)
|
||||
return
|
||||
}
|
||||
self.continuation.resume(returning: outputFileURL)
|
||||
}
|
||||
}
|
||||
@@ -26,6 +26,8 @@
|
||||
</array>
|
||||
<key>NSLocalNetworkUsageDescription</key>
|
||||
<string>Clawdis discovers and connects to your Clawdis bridge on the local network.</string>
|
||||
<key>NSCameraUsageDescription</key>
|
||||
<string>Clawdis can capture photos or short video clips when requested via the bridge.</string>
|
||||
<key>NSMicrophoneUsageDescription</key>
|
||||
<string>Clawdis needs microphone access for voice wake.</string>
|
||||
<key>NSSpeechRecognitionUsageDescription</key>
|
||||
|
||||
@@ -6,6 +6,7 @@ import SwiftUI
|
||||
final class NodeAppModel: ObservableObject {
|
||||
@Published var isBackgrounded: Bool = false
|
||||
let screen = ScreenController()
|
||||
let camera = CameraController()
|
||||
@Published var bridgeStatusText: String = "Not connected"
|
||||
@Published var bridgeServerName: String?
|
||||
@Published var bridgeRemoteAddress: String?
|
||||
@@ -182,13 +183,22 @@ final class NodeAppModel: ObservableObject {
|
||||
}
|
||||
|
||||
private func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse {
|
||||
if req.command.hasPrefix("screen."), self.isBackgrounded {
|
||||
if req.command.hasPrefix("screen.") || req.command.hasPrefix("camera."), self.isBackgrounded {
|
||||
return BridgeInvokeResponse(
|
||||
id: req.id,
|
||||
ok: false,
|
||||
error: ClawdisNodeError(
|
||||
code: .backgroundUnavailable,
|
||||
message: "NODE_BACKGROUND_UNAVAILABLE: screen commands require foreground"))
|
||||
message: "NODE_BACKGROUND_UNAVAILABLE: screen/camera commands require foreground"))
|
||||
}
|
||||
|
||||
if req.command.hasPrefix("camera."), !self.isCameraEnabled() {
|
||||
return BridgeInvokeResponse(
|
||||
id: req.id,
|
||||
ok: false,
|
||||
error: ClawdisNodeError(
|
||||
code: .unavailable,
|
||||
message: "CAMERA_DISABLED: enable Camera in iOS Settings → Camera → Allow Camera"))
|
||||
}
|
||||
|
||||
do {
|
||||
@@ -222,6 +232,46 @@ final class NodeAppModel: ObservableObject {
|
||||
let payload = try Self.encodePayload(["format": "png", "base64": base64])
|
||||
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
|
||||
|
||||
case ClawdisCameraCommand.snap.rawValue:
|
||||
let params = (try? Self.decodeParams(ClawdisCameraSnapParams.self, from: req.paramsJSON)) ??
|
||||
ClawdisCameraSnapParams()
|
||||
let res = try await self.camera.snap(params: params)
|
||||
|
||||
struct Payload: Codable {
|
||||
var format: String
|
||||
var base64: String
|
||||
var width: Int
|
||||
var height: Int
|
||||
}
|
||||
let payload = try Self.encodePayload(Payload(
|
||||
format: res.format,
|
||||
base64: res.base64,
|
||||
width: res.width,
|
||||
height: res.height))
|
||||
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
|
||||
|
||||
case ClawdisCameraCommand.clip.rawValue:
|
||||
let params = (try? Self.decodeParams(ClawdisCameraClipParams.self, from: req.paramsJSON)) ??
|
||||
ClawdisCameraClipParams()
|
||||
|
||||
let suspended = (params.includeAudio ?? true) ? self.voiceWake.suspendForExternalAudioCapture() : false
|
||||
defer { self.voiceWake.resumeAfterExternalAudioCapture(wasSuspended: suspended) }
|
||||
|
||||
let res = try await self.camera.clip(params: params)
|
||||
|
||||
struct Payload: Codable {
|
||||
var format: String
|
||||
var base64: String
|
||||
var durationMs: Int
|
||||
var hasAudio: Bool
|
||||
}
|
||||
let payload = try Self.encodePayload(Payload(
|
||||
format: res.format,
|
||||
base64: res.base64,
|
||||
durationMs: res.durationMs,
|
||||
hasAudio: res.hasAudio))
|
||||
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
|
||||
|
||||
default:
|
||||
return BridgeInvokeResponse(
|
||||
id: req.id,
|
||||
@@ -254,4 +304,10 @@ final class NodeAppModel: ObservableObject {
|
||||
}
|
||||
return json
|
||||
}
|
||||
|
||||
private func isCameraEnabled() -> Bool {
|
||||
// Default-on: if the key doesn't exist yet, treat it as enabled.
|
||||
if UserDefaults.standard.object(forKey: "camera.enabled") == nil { return true }
|
||||
return UserDefaults.standard.bool(forKey: "camera.enabled")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import SwiftUI
|
||||
|
||||
struct RootTabs: View {
|
||||
@EnvironmentObject private var appModel: NodeAppModel
|
||||
@State private var isConnectingPulse: Bool = false
|
||||
|
||||
var body: some View {
|
||||
TabView {
|
||||
@@ -27,12 +28,18 @@ struct RootTabs: View {
|
||||
radius: self.settingsIndicatorGlowRadius,
|
||||
x: 0,
|
||||
y: 0)
|
||||
.scaleEffect(self.settingsIndicatorScale)
|
||||
.opacity(self.settingsIndicatorOpacity)
|
||||
.offset(x: 7, y: -2)
|
||||
}
|
||||
Text("Settings")
|
||||
}
|
||||
}
|
||||
}
|
||||
.onAppear { self.updateConnectingPulse(for: self.bridgeIndicatorState) }
|
||||
.onChange(of: self.bridgeIndicatorState) { _, newValue in
|
||||
self.updateConnectingPulse(for: newValue)
|
||||
}
|
||||
}
|
||||
|
||||
private enum BridgeIndicatorState {
|
||||
@@ -74,9 +81,31 @@ struct RootTabs: View {
|
||||
case .connected:
|
||||
6
|
||||
case .connecting:
|
||||
4
|
||||
self.isConnectingPulse ? 6 : 3
|
||||
case .disconnected:
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
private var settingsIndicatorScale: CGFloat {
|
||||
guard self.bridgeIndicatorState == .connecting else { return 1 }
|
||||
return self.isConnectingPulse ? 1.12 : 0.96
|
||||
}
|
||||
|
||||
private var settingsIndicatorOpacity: Double {
|
||||
guard self.bridgeIndicatorState == .connecting else { return 1 }
|
||||
return self.isConnectingPulse ? 1.0 : 0.75
|
||||
}
|
||||
|
||||
private func updateConnectingPulse(for state: BridgeIndicatorState) {
|
||||
guard state == .connecting else {
|
||||
withAnimation(.easeOut(duration: 0.2)) { self.isConnectingPulse = false }
|
||||
return
|
||||
}
|
||||
|
||||
guard !self.isConnectingPulse else { return }
|
||||
withAnimation(.easeInOut(duration: 0.9).repeatForever(autoreverses: true)) {
|
||||
self.isConnectingPulse = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ struct SettingsTab: View {
|
||||
@AppStorage("voiceWake.enabled") private var voiceWakeEnabled: Bool = false
|
||||
@AppStorage("camera.enabled") private var cameraEnabled: Bool = true
|
||||
@AppStorage("bridge.preferredStableID") private var preferredBridgeStableID: String = ""
|
||||
@AppStorage("bridge.lastDiscoveredStableID") private var lastDiscoveredBridgeStableID: String = ""
|
||||
@StateObject private var connectStatus = ConnectStatusStore()
|
||||
@State private var connectingBridgeID: String?
|
||||
@State private var localIPAddress: String?
|
||||
@@ -207,6 +208,8 @@ struct SettingsTab: View {
|
||||
self.connectingBridgeID = bridge.id
|
||||
self.preferredBridgeStableID = bridge.stableID
|
||||
BridgeSettingsStore.savePreferredBridgeStableID(bridge.stableID)
|
||||
self.lastDiscoveredBridgeStableID = bridge.stableID
|
||||
BridgeSettingsStore.saveLastDiscoveredBridgeStableID(bridge.stableID)
|
||||
defer { self.connectingBridgeID = nil }
|
||||
|
||||
do {
|
||||
|
||||
@@ -205,6 +205,37 @@ final class VoiceWakeManager: NSObject, ObservableObject {
|
||||
try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
|
||||
}
|
||||
|
||||
/// Temporarily releases the microphone so other subsystems (e.g. camera video capture) can record audio.
|
||||
/// Returns `true` when listening was active and was suspended.
|
||||
func suspendForExternalAudioCapture() -> Bool {
|
||||
guard self.isEnabled, self.isListening else { return false }
|
||||
|
||||
self.isListening = false
|
||||
self.statusText = "Paused"
|
||||
|
||||
self.tapDrainTask?.cancel()
|
||||
self.tapDrainTask = nil
|
||||
self.tapQueue?.clear()
|
||||
self.tapQueue = nil
|
||||
|
||||
self.recognitionTask?.cancel()
|
||||
self.recognitionTask = nil
|
||||
self.recognitionRequest = nil
|
||||
|
||||
if self.audioEngine.isRunning {
|
||||
self.audioEngine.stop()
|
||||
self.audioEngine.inputNode.removeTap(onBus: 0)
|
||||
}
|
||||
|
||||
try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
|
||||
return true
|
||||
}
|
||||
|
||||
func resumeAfterExternalAudioCapture(wasSuspended: Bool) {
|
||||
guard wasSuspended else { return }
|
||||
Task { await self.start() }
|
||||
}
|
||||
|
||||
private func startRecognition() throws {
|
||||
self.recognitionTask?.cancel()
|
||||
self.recognitionTask = nil
|
||||
|
||||
@@ -54,5 +54,6 @@ targets:
|
||||
NSLocalNetworkUsageDescription: Clawdis discovers and connects to your Clawdis bridge on the local network.
|
||||
NSBonjourServices:
|
||||
- _clawdis-bridge._tcp
|
||||
NSCameraUsageDescription: Clawdis can capture photos or short video clips when requested via the bridge.
|
||||
NSMicrophoneUsageDescription: Clawdis needs microphone access for voice wake.
|
||||
NSSpeechRecognitionUsageDescription: Clawdis uses on-device speech recognition for voice wake.
|
||||
|
||||
341
apps/macos/Sources/Clawdis/CameraCaptureService.swift
Normal file
341
apps/macos/Sources/Clawdis/CameraCaptureService.swift
Normal file
@@ -0,0 +1,341 @@
|
||||
import AVFoundation
|
||||
import ClawdisIPC
|
||||
import CoreGraphics
|
||||
import Foundation
|
||||
import ImageIO
|
||||
import OSLog
|
||||
import UniformTypeIdentifiers
|
||||
|
||||
actor CameraCaptureService {
|
||||
enum CameraError: LocalizedError, Sendable {
|
||||
case cameraUnavailable
|
||||
case microphoneUnavailable
|
||||
case permissionDenied(kind: String)
|
||||
case captureFailed(String)
|
||||
case exportFailed(String)
|
||||
|
||||
var errorDescription: String? {
|
||||
switch self {
|
||||
case .cameraUnavailable:
|
||||
"Camera unavailable"
|
||||
case .microphoneUnavailable:
|
||||
"Microphone unavailable"
|
||||
case let .permissionDenied(kind):
|
||||
"\(kind) permission denied"
|
||||
case let .captureFailed(msg):
|
||||
msg
|
||||
case let .exportFailed(msg):
|
||||
msg
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "camera")
|
||||
|
||||
func snap(facing: CameraFacing?, maxWidth: Int?, quality: Double?) async throws -> (data: Data, size: CGSize) {
|
||||
let facing = facing ?? .front
|
||||
let maxWidth = maxWidth.flatMap { $0 > 0 ? $0 : nil }
|
||||
let quality = Self.clampQuality(quality)
|
||||
|
||||
try await self.ensureAccess(for: .video)
|
||||
|
||||
let session = AVCaptureSession()
|
||||
session.sessionPreset = .photo
|
||||
|
||||
guard let device = Self.pickCamera(facing: facing) else {
|
||||
throw CameraError.cameraUnavailable
|
||||
}
|
||||
|
||||
let input = try AVCaptureDeviceInput(device: device)
|
||||
guard session.canAddInput(input) else {
|
||||
throw CameraError.captureFailed("Failed to add camera input")
|
||||
}
|
||||
session.addInput(input)
|
||||
|
||||
let output = AVCapturePhotoOutput()
|
||||
guard session.canAddOutput(output) else {
|
||||
throw CameraError.captureFailed("Failed to add photo output")
|
||||
}
|
||||
session.addOutput(output)
|
||||
output.maxPhotoQualityPrioritization = .quality
|
||||
|
||||
session.startRunning()
|
||||
defer { session.stopRunning() }
|
||||
|
||||
let settings: AVCapturePhotoSettings = {
|
||||
if output.availablePhotoCodecTypes.contains(.jpeg) {
|
||||
return AVCapturePhotoSettings(format: [AVVideoCodecKey: AVVideoCodecType.jpeg])
|
||||
}
|
||||
return AVCapturePhotoSettings()
|
||||
}()
|
||||
settings.photoQualityPrioritization = .quality
|
||||
|
||||
let rawData: Data = try await withCheckedThrowingContinuation(isolation: nil) { cont in
|
||||
output.capturePhoto(with: settings, delegate: PhotoCaptureDelegate(cont))
|
||||
}
|
||||
|
||||
return try Self.reencodeJPEG(imageData: rawData, maxWidth: maxWidth, quality: quality)
|
||||
}
|
||||
|
||||
func clip(
|
||||
facing: CameraFacing?,
|
||||
durationMs: Int?,
|
||||
includeAudio: Bool,
|
||||
outPath: String?) async throws -> (path: String, durationMs: Int, hasAudio: Bool)
|
||||
{
|
||||
let facing = facing ?? .front
|
||||
let durationMs = Self.clampDurationMs(durationMs)
|
||||
|
||||
try await self.ensureAccess(for: .video)
|
||||
if includeAudio {
|
||||
try await self.ensureAccess(for: .audio)
|
||||
}
|
||||
|
||||
let session = AVCaptureSession()
|
||||
session.sessionPreset = .high
|
||||
|
||||
guard let camera = Self.pickCamera(facing: facing) else {
|
||||
throw CameraError.cameraUnavailable
|
||||
}
|
||||
let cameraInput = try AVCaptureDeviceInput(device: camera)
|
||||
guard session.canAddInput(cameraInput) else {
|
||||
throw CameraError.captureFailed("Failed to add camera input")
|
||||
}
|
||||
session.addInput(cameraInput)
|
||||
|
||||
if includeAudio {
|
||||
guard let mic = AVCaptureDevice.default(for: .audio) else {
|
||||
throw CameraError.microphoneUnavailable
|
||||
}
|
||||
let micInput = try AVCaptureDeviceInput(device: mic)
|
||||
guard session.canAddInput(micInput) else {
|
||||
throw CameraError.captureFailed("Failed to add microphone input")
|
||||
}
|
||||
session.addInput(micInput)
|
||||
}
|
||||
|
||||
let output = AVCaptureMovieFileOutput()
|
||||
guard session.canAddOutput(output) else {
|
||||
throw CameraError.captureFailed("Failed to add movie output")
|
||||
}
|
||||
session.addOutput(output)
|
||||
output.maxRecordedDuration = CMTime(value: Int64(durationMs), timescale: 1000)
|
||||
|
||||
session.startRunning()
|
||||
defer { session.stopRunning() }
|
||||
|
||||
let tmpMovURL = FileManager.default.temporaryDirectory
|
||||
.appendingPathComponent("clawdis-camera-\(UUID().uuidString).mov")
|
||||
defer { try? FileManager.default.removeItem(at: tmpMovURL) }
|
||||
|
||||
let outputURL: URL = {
|
||||
if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||||
return URL(fileURLWithPath: outPath)
|
||||
}
|
||||
return FileManager.default.temporaryDirectory
|
||||
.appendingPathComponent("clawdis-camera-\(UUID().uuidString).mp4")
|
||||
}()
|
||||
|
||||
// Ensure we don't fail exporting due to an existing file.
|
||||
try? FileManager.default.removeItem(at: outputURL)
|
||||
|
||||
let logger = self.logger
|
||||
let recordedURL: URL = try await withCheckedThrowingContinuation(isolation: nil) { cont in
|
||||
output.startRecording(to: tmpMovURL, recordingDelegate: MovieFileDelegate(cont, logger: logger))
|
||||
}
|
||||
|
||||
try await Self.exportToMP4(inputURL: recordedURL, outputURL: outputURL)
|
||||
return (path: outputURL.path, durationMs: durationMs, hasAudio: includeAudio)
|
||||
}
|
||||
|
||||
private func ensureAccess(for mediaType: AVMediaType) async throws {
|
||||
let status = AVCaptureDevice.authorizationStatus(for: mediaType)
|
||||
switch status {
|
||||
case .authorized:
|
||||
return
|
||||
case .notDetermined:
|
||||
let ok = await withCheckedContinuation(isolation: nil) { cont in
|
||||
AVCaptureDevice.requestAccess(for: mediaType) { granted in
|
||||
cont.resume(returning: granted)
|
||||
}
|
||||
}
|
||||
if !ok {
|
||||
throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone")
|
||||
}
|
||||
case .denied, .restricted:
|
||||
throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone")
|
||||
@unknown default:
|
||||
throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone")
|
||||
}
|
||||
}
|
||||
|
||||
private nonisolated static func pickCamera(facing: CameraFacing) -> AVCaptureDevice? {
|
||||
let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back
|
||||
|
||||
if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) {
|
||||
return device
|
||||
}
|
||||
|
||||
// Many macOS cameras report `unspecified` position; fall back to any default.
|
||||
return AVCaptureDevice.default(for: .video)
|
||||
}
|
||||
|
||||
private nonisolated static func clampQuality(_ quality: Double?) -> Double {
|
||||
let q = quality ?? 0.9
|
||||
return min(1.0, max(0.05, q))
|
||||
}
|
||||
|
||||
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
|
||||
let v = ms ?? 3000
|
||||
return min(15_000, max(250, v))
|
||||
}
|
||||
|
||||
private nonisolated static func reencodeJPEG(
|
||||
imageData: Data,
|
||||
maxWidth: Int?,
|
||||
quality: Double) throws -> (data: Data, size: CGSize)
|
||||
{
|
||||
guard let src = CGImageSourceCreateWithData(imageData as CFData, nil),
|
||||
let img = CGImageSourceCreateImageAtIndex(src, 0, nil)
|
||||
else {
|
||||
throw CameraError.captureFailed("Failed to decode captured image")
|
||||
}
|
||||
|
||||
let finalImage: CGImage
|
||||
if let maxWidth, img.width > maxWidth {
|
||||
guard let scaled = self.downscale(image: img, maxWidth: maxWidth) else {
|
||||
throw CameraError.captureFailed("Failed to downscale image")
|
||||
}
|
||||
finalImage = scaled
|
||||
} else {
|
||||
finalImage = img
|
||||
}
|
||||
|
||||
let out = NSMutableData()
|
||||
guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else {
|
||||
throw CameraError.captureFailed("Failed to create JPEG destination")
|
||||
}
|
||||
|
||||
let props = [kCGImageDestinationLossyCompressionQuality: quality] as CFDictionary
|
||||
CGImageDestinationAddImage(dest, finalImage, props)
|
||||
guard CGImageDestinationFinalize(dest) else {
|
||||
throw CameraError.captureFailed("Failed to encode JPEG")
|
||||
}
|
||||
|
||||
return (out as Data, CGSize(width: finalImage.width, height: finalImage.height))
|
||||
}
|
||||
|
||||
private nonisolated static func downscale(image: CGImage, maxWidth: Int) -> CGImage? {
|
||||
guard image.width > 0, image.height > 0 else { return image }
|
||||
guard image.width > maxWidth else { return image }
|
||||
|
||||
let scale = Double(maxWidth) / Double(image.width)
|
||||
let targetW = maxWidth
|
||||
let targetH = max(1, Int((Double(image.height) * scale).rounded()))
|
||||
|
||||
let cs = CGColorSpaceCreateDeviceRGB()
|
||||
let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue
|
||||
guard let ctx = CGContext(
|
||||
data: nil,
|
||||
width: targetW,
|
||||
height: targetH,
|
||||
bitsPerComponent: 8,
|
||||
bytesPerRow: 0,
|
||||
space: cs,
|
||||
bitmapInfo: bitmapInfo)
|
||||
else { return nil }
|
||||
|
||||
ctx.interpolationQuality = .high
|
||||
ctx.draw(image, in: CGRect(x: 0, y: 0, width: targetW, height: targetH))
|
||||
return ctx.makeImage()
|
||||
}
|
||||
|
||||
private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws {
|
||||
let asset = AVAsset(url: inputURL)
|
||||
guard let export = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetMediumQuality) else {
|
||||
throw CameraError.exportFailed("Failed to create export session")
|
||||
}
|
||||
export.outputURL = outputURL
|
||||
export.outputFileType = .mp4
|
||||
export.shouldOptimizeForNetworkUse = true
|
||||
|
||||
await withCheckedContinuation { cont in
|
||||
export.exportAsynchronously {
|
||||
cont.resume()
|
||||
}
|
||||
}
|
||||
|
||||
switch export.status {
|
||||
case .completed:
|
||||
return
|
||||
case .failed:
|
||||
throw CameraError.exportFailed(export.error?.localizedDescription ?? "export failed")
|
||||
case .cancelled:
|
||||
throw CameraError.exportFailed("export cancelled")
|
||||
default:
|
||||
throw CameraError.exportFailed("export did not complete (\(export.status.rawValue))")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate {
|
||||
private var cont: CheckedContinuation<Data, Error>?
|
||||
|
||||
init(_ cont: CheckedContinuation<Data, Error>) {
|
||||
self.cont = cont
|
||||
}
|
||||
|
||||
func photoOutput(
|
||||
_ output: AVCapturePhotoOutput,
|
||||
didFinishProcessingPhoto photo: AVCapturePhoto,
|
||||
error: Error?)
|
||||
{
|
||||
guard let cont else { return }
|
||||
self.cont = nil
|
||||
if let error {
|
||||
cont.resume(throwing: error)
|
||||
return
|
||||
}
|
||||
guard let data = photo.fileDataRepresentation() else {
|
||||
cont.resume(throwing: CameraCaptureService.CameraError.captureFailed("No photo data"))
|
||||
return
|
||||
}
|
||||
cont.resume(returning: data)
|
||||
}
|
||||
}
|
||||
|
||||
private final class MovieFileDelegate: NSObject, AVCaptureFileOutputRecordingDelegate {
|
||||
private var cont: CheckedContinuation<URL, Error>?
|
||||
private let logger: Logger
|
||||
|
||||
init(_ cont: CheckedContinuation<URL, Error>, logger: Logger) {
|
||||
self.cont = cont
|
||||
self.logger = logger
|
||||
}
|
||||
|
||||
func fileOutput(
|
||||
_ output: AVCaptureFileOutput,
|
||||
didFinishRecordingTo outputFileURL: URL,
|
||||
from connections: [AVCaptureConnection],
|
||||
error: Error?)
|
||||
{
|
||||
guard let cont else { return }
|
||||
self.cont = nil
|
||||
|
||||
if let error {
|
||||
let ns = error as NSError
|
||||
if ns.domain == AVFoundationErrorDomain,
|
||||
ns.code == AVError.maximumDurationReached.rawValue
|
||||
{
|
||||
cont.resume(returning: outputFileURL)
|
||||
return
|
||||
}
|
||||
|
||||
self.logger.error("camera record failed: \(error.localizedDescription, privacy: .public)")
|
||||
cont.resume(throwing: error)
|
||||
return
|
||||
}
|
||||
|
||||
cont.resume(returning: outputFileURL)
|
||||
}
|
||||
}
|
||||
@@ -24,6 +24,7 @@ let webChatEnabledKey = "clawdis.webChatEnabled"
|
||||
let webChatSwiftUIEnabledKey = "clawdis.webChatSwiftUIEnabled"
|
||||
let webChatPortKey = "clawdis.webChatPort"
|
||||
let canvasEnabledKey = "clawdis.canvasEnabled"
|
||||
let cameraEnabledKey = "clawdis.cameraEnabled"
|
||||
let peekabooBridgeEnabledKey = "clawdis.peekabooBridgeEnabled"
|
||||
let deepLinkAgentEnabledKey = "clawdis.deepLinkAgentEnabled"
|
||||
let deepLinkKeyKey = "clawdis.deepLinkKey"
|
||||
|
||||
@@ -3,6 +3,8 @@ import Foundation
|
||||
import OSLog
|
||||
|
||||
enum ControlRequestHandler {
|
||||
private static let cameraCapture = CameraCaptureService()
|
||||
|
||||
static func process(
|
||||
request: Request,
|
||||
notifier: NotificationManager = NotificationManager(),
|
||||
@@ -77,6 +79,16 @@ enum ControlRequestHandler {
|
||||
command: command,
|
||||
paramsJSON: paramsJSON,
|
||||
logger: logger)
|
||||
|
||||
case let .cameraSnap(facing, maxWidth, quality, outPath):
|
||||
return await self.handleCameraSnap(facing: facing, maxWidth: maxWidth, quality: quality, outPath: outPath)
|
||||
|
||||
case let .cameraClip(facing, durationMs, includeAudio, outPath):
|
||||
return await self.handleCameraClip(
|
||||
facing: facing,
|
||||
durationMs: durationMs,
|
||||
includeAudio: includeAudio,
|
||||
outPath: outPath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,6 +185,10 @@ enum ControlRequestHandler {
|
||||
UserDefaults.standard.object(forKey: canvasEnabledKey) as? Bool ?? true
|
||||
}
|
||||
|
||||
private static func cameraEnabled() -> Bool {
|
||||
UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false
|
||||
}
|
||||
|
||||
private static func handleCanvasShow(
|
||||
session: String,
|
||||
path: String?,
|
||||
@@ -254,4 +270,46 @@ enum ControlRequestHandler {
|
||||
return Response(ok: false, message: error.localizedDescription)
|
||||
}
|
||||
}
|
||||
|
||||
private static func handleCameraSnap(
|
||||
facing: CameraFacing?,
|
||||
maxWidth: Int?,
|
||||
quality: Double?,
|
||||
outPath: String?) async -> Response
|
||||
{
|
||||
guard self.cameraEnabled() else { return Response(ok: false, message: "Camera disabled by user") }
|
||||
do {
|
||||
let res = try await self.cameraCapture.snap(facing: facing, maxWidth: maxWidth, quality: quality)
|
||||
let url: URL = if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||||
URL(fileURLWithPath: outPath)
|
||||
} else {
|
||||
FileManager.default.temporaryDirectory
|
||||
.appendingPathComponent("clawdis-camera-snap-\(UUID().uuidString).jpg")
|
||||
}
|
||||
|
||||
try res.data.write(to: url, options: [.atomic])
|
||||
return Response(ok: true, message: url.path)
|
||||
} catch {
|
||||
return Response(ok: false, message: error.localizedDescription)
|
||||
}
|
||||
}
|
||||
|
||||
private static func handleCameraClip(
|
||||
facing: CameraFacing?,
|
||||
durationMs: Int?,
|
||||
includeAudio: Bool,
|
||||
outPath: String?) async -> Response
|
||||
{
|
||||
guard self.cameraEnabled() else { return Response(ok: false, message: "Camera disabled by user") }
|
||||
do {
|
||||
let res = try await self.cameraCapture.clip(
|
||||
facing: facing,
|
||||
durationMs: durationMs,
|
||||
includeAudio: includeAudio,
|
||||
outPath: outPath)
|
||||
return Response(ok: true, message: res.path)
|
||||
} catch {
|
||||
return Response(ok: false, message: error.localizedDescription)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ struct DebugSettings: View {
|
||||
@AppStorage(modelCatalogReloadKey) private var modelCatalogReloadBump: Int = 0
|
||||
@AppStorage(iconOverrideKey) private var iconOverrideRaw: String = IconOverrideSelection.system.rawValue
|
||||
@AppStorage(canvasEnabledKey) private var canvasEnabled: Bool = true
|
||||
@AppStorage(cameraEnabledKey) private var cameraEnabled: Bool = false
|
||||
@AppStorage(deepLinkAgentEnabledKey) private var deepLinkAgentEnabled: Bool = false
|
||||
@State private var modelsCount: Int?
|
||||
@State private var modelsLoading = false
|
||||
@@ -48,6 +49,7 @@ struct DebugSettings: View {
|
||||
self.pathsSection
|
||||
self.quickActionsSection
|
||||
self.canvasSection
|
||||
self.cameraSection
|
||||
self.experimentsSection
|
||||
|
||||
Spacer(minLength: 0)
|
||||
@@ -571,6 +573,20 @@ struct DebugSettings: View {
|
||||
}
|
||||
}
|
||||
|
||||
private var cameraSection: some View {
|
||||
GroupBox("Camera") {
|
||||
VStack(alignment: .leading, spacing: 10) {
|
||||
Toggle("Allow Camera (agent)", isOn: self.$cameraEnabled)
|
||||
.toggleStyle(.checkbox)
|
||||
.help("When off, camera requests return “Camera disabled by user”.")
|
||||
|
||||
Text("Allows Clawdis to capture a photo or short video via the built-in camera.")
|
||||
.font(.caption)
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private var experimentsSection: some View {
|
||||
GroupBox("Experiments") {
|
||||
Grid(alignment: .leadingFirstTextBaseline, horizontalSpacing: 14, verticalSpacing: 10) {
|
||||
|
||||
@@ -52,6 +52,7 @@ struct ClawdisCLI {
|
||||
|
||||
enum Kind {
|
||||
case generic
|
||||
case mediaPath
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,6 +92,9 @@ struct ClawdisCLI {
|
||||
case "canvas":
|
||||
return try self.parseCanvas(args: &args)
|
||||
|
||||
case "camera":
|
||||
return try self.parseCamera(args: &args)
|
||||
|
||||
default:
|
||||
throw CLIError.help
|
||||
}
|
||||
@@ -292,6 +296,62 @@ struct ClawdisCLI {
|
||||
}
|
||||
}
|
||||
|
||||
private static func parseCamera(args: inout [String]) throws -> ParsedCLIRequest {
|
||||
guard let sub = args.popFirst() else { throw CLIError.help }
|
||||
switch sub {
|
||||
case "snap":
|
||||
var facing: CameraFacing?
|
||||
var maxWidth: Int?
|
||||
var quality: Double?
|
||||
var outPath: String?
|
||||
while !args.isEmpty {
|
||||
let arg = args.removeFirst()
|
||||
switch arg {
|
||||
case "--facing":
|
||||
if let val = args.popFirst(), let f = CameraFacing(rawValue: val) { facing = f }
|
||||
case "--max-width":
|
||||
maxWidth = args.popFirst().flatMap(Int.init)
|
||||
case "--quality":
|
||||
quality = args.popFirst().flatMap(Double.init)
|
||||
case "--out":
|
||||
outPath = args.popFirst()
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
return ParsedCLIRequest(
|
||||
request: .cameraSnap(facing: facing, maxWidth: maxWidth, quality: quality, outPath: outPath),
|
||||
kind: .mediaPath)
|
||||
|
||||
case "clip":
|
||||
var facing: CameraFacing?
|
||||
var durationMs: Int?
|
||||
var includeAudio = true
|
||||
var outPath: String?
|
||||
while !args.isEmpty {
|
||||
let arg = args.removeFirst()
|
||||
switch arg {
|
||||
case "--facing":
|
||||
if let val = args.popFirst(), let f = CameraFacing(rawValue: val) { facing = f }
|
||||
case "--duration-ms":
|
||||
durationMs = args.popFirst().flatMap(Int.init)
|
||||
case "--no-audio":
|
||||
includeAudio = false
|
||||
case "--out":
|
||||
outPath = args.popFirst()
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
return ParsedCLIRequest(
|
||||
request: .cameraClip(facing: facing, durationMs: durationMs, includeAudio: includeAudio, outPath: outPath),
|
||||
kind: .mediaPath)
|
||||
|
||||
default:
|
||||
throw CLIError.help
|
||||
}
|
||||
}
|
||||
|
||||
private static func parseCanvasPlacement(
|
||||
args: inout [String],
|
||||
session: inout String,
|
||||
@@ -334,6 +394,10 @@ struct ClawdisCLI {
|
||||
if let message = response.message, !message.isEmpty {
|
||||
FileHandle.standardOutput.write(Data((message + "\n").utf8))
|
||||
}
|
||||
case .mediaPath:
|
||||
if let message = response.message, !message.isEmpty {
|
||||
print("MEDIA:\(message)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -352,6 +416,8 @@ struct ClawdisCLI {
|
||||
output["payload"] = text
|
||||
}
|
||||
}
|
||||
case .mediaPath:
|
||||
break
|
||||
}
|
||||
|
||||
let json = try JSONSerialization.data(withJSONObject: output, options: [.prettyPrinted])
|
||||
@@ -406,6 +472,10 @@ struct ClawdisCLI {
|
||||
clawdis-mac canvas eval --js <code> [--session <key>]
|
||||
clawdis-mac canvas snapshot [--out <path>] [--session <key>]
|
||||
|
||||
Camera:
|
||||
clawdis-mac camera snap [--facing <front|back>] [--max-width <px>] [--quality <0-1>] [--out <path>]
|
||||
clawdis-mac camera clip [--facing <front|back>] [--duration-ms <ms>] [--no-audio] [--out <path>]
|
||||
|
||||
Browser (clawd):
|
||||
clawdis-mac browser status|start|stop|tabs|open|focus|close|screenshot|eval|query|dom|snapshot
|
||||
|
||||
@@ -433,6 +503,7 @@ struct ClawdisCLI {
|
||||
Output:
|
||||
Default output is text. Use --json for machine-readable output.
|
||||
In text mode, `browser screenshot` prints MEDIA:<path>.
|
||||
In text mode, `camera snap` and `camera clip` print MEDIA:<path>.
|
||||
"""
|
||||
print(usage)
|
||||
}
|
||||
|
||||
@@ -13,6 +13,11 @@ public enum Capability: String, Codable, CaseIterable, Sendable {
|
||||
case speechRecognition
|
||||
}
|
||||
|
||||
public enum CameraFacing: String, Codable, Sendable {
|
||||
case front
|
||||
case back
|
||||
}
|
||||
|
||||
// MARK: - Requests
|
||||
|
||||
/// Notification interruption level (maps to UNNotificationInterruptionLevel)
|
||||
@@ -74,6 +79,8 @@ public enum Request: Sendable {
|
||||
case canvasSnapshot(session: String, outPath: String?)
|
||||
case nodeList
|
||||
case nodeInvoke(nodeId: String, command: String, paramsJSON: String?)
|
||||
case cameraSnap(facing: CameraFacing?, maxWidth: Int?, quality: Double?, outPath: String?)
|
||||
case cameraClip(facing: CameraFacing?, durationMs: Int?, includeAudio: Bool, outPath: String?)
|
||||
}
|
||||
|
||||
// MARK: - Responses
|
||||
@@ -104,6 +111,11 @@ extension Request: Codable {
|
||||
case path
|
||||
case javaScript
|
||||
case outPath
|
||||
case facing
|
||||
case maxWidth
|
||||
case quality
|
||||
case durationMs
|
||||
case includeAudio
|
||||
case placement
|
||||
case nodeId
|
||||
case nodeCommand
|
||||
@@ -124,6 +136,8 @@ extension Request: Codable {
|
||||
case canvasSnapshot
|
||||
case nodeList
|
||||
case nodeInvoke
|
||||
case cameraSnap
|
||||
case cameraClip
|
||||
}
|
||||
|
||||
public func encode(to encoder: Encoder) throws {
|
||||
@@ -198,6 +212,20 @@ extension Request: Codable {
|
||||
try container.encode(nodeId, forKey: .nodeId)
|
||||
try container.encode(command, forKey: .nodeCommand)
|
||||
try container.encodeIfPresent(paramsJSON, forKey: .paramsJSON)
|
||||
|
||||
case let .cameraSnap(facing, maxWidth, quality, outPath):
|
||||
try container.encode(Kind.cameraSnap, forKey: .type)
|
||||
try container.encodeIfPresent(facing, forKey: .facing)
|
||||
try container.encodeIfPresent(maxWidth, forKey: .maxWidth)
|
||||
try container.encodeIfPresent(quality, forKey: .quality)
|
||||
try container.encodeIfPresent(outPath, forKey: .outPath)
|
||||
|
||||
case let .cameraClip(facing, durationMs, includeAudio, outPath):
|
||||
try container.encode(Kind.cameraClip, forKey: .type)
|
||||
try container.encodeIfPresent(facing, forKey: .facing)
|
||||
try container.encodeIfPresent(durationMs, forKey: .durationMs)
|
||||
try container.encode(includeAudio, forKey: .includeAudio)
|
||||
try container.encodeIfPresent(outPath, forKey: .outPath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -274,6 +302,20 @@ extension Request: Codable {
|
||||
let command = try container.decode(String.self, forKey: .nodeCommand)
|
||||
let paramsJSON = try container.decodeIfPresent(String.self, forKey: .paramsJSON)
|
||||
self = .nodeInvoke(nodeId: nodeId, command: command, paramsJSON: paramsJSON)
|
||||
|
||||
case .cameraSnap:
|
||||
let facing = try container.decodeIfPresent(CameraFacing.self, forKey: .facing)
|
||||
let maxWidth = try container.decodeIfPresent(Int.self, forKey: .maxWidth)
|
||||
let quality = try container.decodeIfPresent(Double.self, forKey: .quality)
|
||||
let outPath = try container.decodeIfPresent(String.self, forKey: .outPath)
|
||||
self = .cameraSnap(facing: facing, maxWidth: maxWidth, quality: quality, outPath: outPath)
|
||||
|
||||
case .cameraClip:
|
||||
let facing = try container.decodeIfPresent(CameraFacing.self, forKey: .facing)
|
||||
let durationMs = try container.decodeIfPresent(Int.self, forKey: .durationMs)
|
||||
let includeAudio = (try? container.decode(Bool.self, forKey: .includeAudio)) ?? true
|
||||
let outPath = try container.decodeIfPresent(String.self, forKey: .outPath)
|
||||
self = .cameraClip(facing: facing, durationMs: durationMs, includeAudio: includeAudio, outPath: outPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
62
apps/macos/Tests/ClawdisIPCTests/CameraIPCTests.swift
Normal file
62
apps/macos/Tests/ClawdisIPCTests/CameraIPCTests.swift
Normal file
@@ -0,0 +1,62 @@
|
||||
import ClawdisIPC
|
||||
import Foundation
|
||||
import Testing
|
||||
|
||||
@Suite struct CameraIPCTests {
|
||||
@Test func cameraSnapCodableRoundtrip() throws {
|
||||
let req: Request = .cameraSnap(
|
||||
facing: .front,
|
||||
maxWidth: 640,
|
||||
quality: 0.85,
|
||||
outPath: "/tmp/test.jpg")
|
||||
|
||||
let data = try JSONEncoder().encode(req)
|
||||
let decoded = try JSONDecoder().decode(Request.self, from: data)
|
||||
|
||||
switch decoded {
|
||||
case let .cameraSnap(facing, maxWidth, quality, outPath):
|
||||
#expect(facing == .front)
|
||||
#expect(maxWidth == 640)
|
||||
#expect(quality == 0.85)
|
||||
#expect(outPath == "/tmp/test.jpg")
|
||||
default:
|
||||
Issue.record("expected cameraSnap, got \(decoded)")
|
||||
}
|
||||
}
|
||||
|
||||
@Test func cameraClipCodableRoundtrip() throws {
|
||||
let req: Request = .cameraClip(
|
||||
facing: .back,
|
||||
durationMs: 3000,
|
||||
includeAudio: false,
|
||||
outPath: "/tmp/test.mp4")
|
||||
|
||||
let data = try JSONEncoder().encode(req)
|
||||
let decoded = try JSONDecoder().decode(Request.self, from: data)
|
||||
|
||||
switch decoded {
|
||||
case let .cameraClip(facing, durationMs, includeAudio, outPath):
|
||||
#expect(facing == .back)
|
||||
#expect(durationMs == 3000)
|
||||
#expect(includeAudio == false)
|
||||
#expect(outPath == "/tmp/test.mp4")
|
||||
default:
|
||||
Issue.record("expected cameraClip, got \(decoded)")
|
||||
}
|
||||
}
|
||||
|
||||
@Test func cameraClipDefaultsIncludeAudioToTrueWhenMissing() throws {
|
||||
let json = """
|
||||
{"type":"cameraClip","durationMs":1234}
|
||||
"""
|
||||
let decoded = try JSONDecoder().decode(Request.self, from: Data(json.utf8))
|
||||
switch decoded {
|
||||
case let .cameraClip(_, durationMs, includeAudio, _):
|
||||
#expect(durationMs == 1234)
|
||||
#expect(includeAudio == true)
|
||||
default:
|
||||
Issue.record("expected cameraClip, got \(decoded)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
import Foundation
|
||||
|
||||
public enum ClawdisCameraCommand: String, Codable, Sendable {
|
||||
case snap = "camera.snap"
|
||||
case clip = "camera.clip"
|
||||
}
|
||||
|
||||
public enum ClawdisCameraFacing: String, Codable, Sendable {
|
||||
case back
|
||||
case front
|
||||
}
|
||||
|
||||
public enum ClawdisCameraImageFormat: String, Codable, Sendable {
|
||||
case jpg
|
||||
case jpeg
|
||||
}
|
||||
|
||||
public enum ClawdisCameraVideoFormat: String, Codable, Sendable {
|
||||
case mp4
|
||||
}
|
||||
|
||||
public struct ClawdisCameraSnapParams: Codable, Sendable, Equatable {
|
||||
public var facing: ClawdisCameraFacing?
|
||||
public var maxWidth: Int?
|
||||
public var quality: Double?
|
||||
public var format: ClawdisCameraImageFormat?
|
||||
|
||||
public init(
|
||||
facing: ClawdisCameraFacing? = nil,
|
||||
maxWidth: Int? = nil,
|
||||
quality: Double? = nil,
|
||||
format: ClawdisCameraImageFormat? = nil)
|
||||
{
|
||||
self.facing = facing
|
||||
self.maxWidth = maxWidth
|
||||
self.quality = quality
|
||||
self.format = format
|
||||
}
|
||||
}
|
||||
|
||||
public struct ClawdisCameraClipParams: Codable, Sendable, Equatable {
|
||||
public var facing: ClawdisCameraFacing?
|
||||
public var durationMs: Int?
|
||||
public var includeAudio: Bool?
|
||||
public var format: ClawdisCameraVideoFormat?
|
||||
|
||||
public init(
|
||||
facing: ClawdisCameraFacing? = nil,
|
||||
durationMs: Int? = nil,
|
||||
includeAudio: Bool? = nil,
|
||||
format: ClawdisCameraVideoFormat? = nil)
|
||||
{
|
||||
self.facing = facing
|
||||
self.durationMs = durationMs
|
||||
self.includeAudio = includeAudio
|
||||
self.format = format
|
||||
}
|
||||
}
|
||||
@@ -9,7 +9,7 @@ read_when:
|
||||
## What Clawdis Does
|
||||
- Runs WhatsApp gateway + Pi coding agent so the assistant can read/write chats, fetch context, and run tools via the host Mac.
|
||||
- macOS app manages permissions (screen recording, notifications, microphone) and exposes a CLI helper `clawdis-mac` for scripts.
|
||||
- Sessions are per-sender; heartbeats keep background tasks alive.
|
||||
- Direct chats collapse into the shared `main` session by default; groups stay isolated as `group:<jid>`; heartbeats keep background tasks alive.
|
||||
|
||||
## Core Tools (enable in Settings → Tools)
|
||||
- **mcporter** — MCP runtime/CLI to list, call, and sync Model Context Protocol servers.
|
||||
|
||||
@@ -122,8 +122,8 @@
|
||||
<span class="footer__sep">·</span>
|
||||
<a href="https://github.com/steipete/clawdis">source</a>
|
||||
<span class="footer__sep">·</span>
|
||||
<a href="https://www.npmjs.com/package/clawdis">npm</a>
|
||||
</div>
|
||||
<a href="https://github.com/steipete/clawdis/releases">releases</a>
|
||||
</div>
|
||||
<div class="footer__hint" aria-hidden="true">
|
||||
tip: press <kbd>F2</kbd> (Mac: <kbd>fn</kbd>+<kbd>F2</kbd>) to flip
|
||||
the universe
|
||||
|
||||
98
docs/camera.md
Normal file
98
docs/camera.md
Normal file
@@ -0,0 +1,98 @@
|
||||
---
|
||||
summary: "Camera capture (iOS node + macOS app) for agent use: photos (jpg) and short video clips (mp4)"
|
||||
read_when:
|
||||
- Adding or modifying camera capture on iOS nodes or macOS
|
||||
- Extending agent-accessible MEDIA temp-file workflows
|
||||
---
|
||||
|
||||
# Camera capture (agent)
|
||||
|
||||
Clawdis supports **camera capture** for agent workflows:
|
||||
|
||||
- **iOS node** (paired via Gateway): capture a **photo** (`jpg`) or **short video clip** (`mp4`, with optional audio) via `node.invoke`.
|
||||
- **macOS app** (local control socket): capture a **photo** (`jpg`) or **short video clip** (`mp4`, with optional audio) via `clawdis-mac`.
|
||||
|
||||
All camera access is gated behind **user-controlled settings**.
|
||||
|
||||
## iOS node
|
||||
|
||||
### User setting (default on)
|
||||
|
||||
- iOS Settings tab → **Camera** → **Allow Camera** (`camera.enabled`)
|
||||
- Default: **on** (missing key is treated as enabled).
|
||||
- When off: `camera.*` commands return `CAMERA_DISABLED`.
|
||||
|
||||
### Commands (via Gateway `node.invoke`)
|
||||
|
||||
- `camera.snap`
|
||||
- Params:
|
||||
- `facing`: `front|back` (default: `front`)
|
||||
- `maxWidth`: number (optional)
|
||||
- `quality`: `0..1` (optional; default `0.9`)
|
||||
- `format`: currently `jpg`
|
||||
- Response payload:
|
||||
- `format: "jpg"`
|
||||
- `base64: "<...>"`
|
||||
- `width`, `height`
|
||||
|
||||
- `camera.clip`
|
||||
- Params:
|
||||
- `facing`: `front|back` (default: `front`)
|
||||
- `durationMs`: number (default `3000`, clamped to a max)
|
||||
- `includeAudio`: boolean (default `true`)
|
||||
- `format`: currently `mp4`
|
||||
- Response payload:
|
||||
- `format: "mp4"`
|
||||
- `base64: "<...>"`
|
||||
- `durationMs`
|
||||
- `hasAudio`
|
||||
|
||||
### Foreground requirement
|
||||
|
||||
Like `screen.*`, the iOS node only allows `camera.*` commands in the **foreground**. Background invocations return `NODE_BACKGROUND_UNAVAILABLE`.
|
||||
|
||||
### CLI helper (temp files + MEDIA)
|
||||
|
||||
The easiest way to get attachments is via the CLI helper, which writes decoded media to a temp file and prints `MEDIA:<path>`.
|
||||
|
||||
Examples:
|
||||
|
||||
```bash
|
||||
clawdis nodes camera snap --node <id> # default: both front + back (2 MEDIA lines)
|
||||
clawdis nodes camera snap --node <id> --facing front
|
||||
clawdis nodes camera clip --node <id> --duration 3000
|
||||
clawdis nodes camera clip --node <id> --no-audio
|
||||
```
|
||||
|
||||
Notes:
|
||||
- `nodes camera snap` defaults to **both** facings to give the agent both views.
|
||||
- Output files are temporary (in the OS temp directory) unless you build your own wrapper.
|
||||
|
||||
## macOS app
|
||||
|
||||
### User setting (default off)
|
||||
|
||||
The macOS companion app exposes a checkbox:
|
||||
|
||||
- **Settings → Debug → Camera → Allow Camera (agent)** (`clawdis.cameraEnabled`)
|
||||
- Default: **off**
|
||||
- When off: camera requests return “Camera disabled by user”.
|
||||
|
||||
### CLI helper (local control socket)
|
||||
|
||||
The `clawdis-mac` helper talks to the running menu bar app over the local control socket.
|
||||
|
||||
Examples:
|
||||
|
||||
```bash
|
||||
clawdis-mac camera snap # prints MEDIA:<path>
|
||||
clawdis-mac camera snap --max-width 1280
|
||||
clawdis-mac camera clip --duration-ms 3000 # prints MEDIA:<path>
|
||||
clawdis-mac camera clip --no-audio
|
||||
```
|
||||
|
||||
## Safety + practical limits
|
||||
|
||||
- Camera and microphone access trigger the usual OS permission prompts (and require usage strings in Info.plist).
|
||||
- Video clips are intentionally short to avoid oversized bridge payloads (base64 overhead + WebSocket message limits).
|
||||
|
||||
@@ -24,9 +24,17 @@ Start conservative:
|
||||
## Prerequisites
|
||||
|
||||
- Node **22+**
|
||||
- CLAWDIS installed: `npm install -g clawdis`
|
||||
- CLAWDIS available on PATH (recommended during development: from source + global link)
|
||||
- A second phone number (SIM/eSIM/prepaid) for the assistant
|
||||
|
||||
From source (recommended while the npm package is still settling):
|
||||
|
||||
```bash
|
||||
pnpm install
|
||||
pnpm build
|
||||
pnpm link --global
|
||||
```
|
||||
|
||||
## The two-phone setup (recommended)
|
||||
|
||||
You want this:
|
||||
@@ -121,7 +129,7 @@ Example:
|
||||
## Sessions and memory
|
||||
|
||||
- Session files: `~/.clawdis/sessions/{{SessionId}}.jsonl`
|
||||
- Session metadata (token usage, last route, etc): `~/.clawdis/sessions.json`
|
||||
- Session metadata (token usage, last route, etc): `~/.clawdis/sessions/sessions.json` (legacy: `~/.clawdis/sessions.json`)
|
||||
- `/new` starts a fresh session for that chat (configurable via `resetTriggers`)
|
||||
|
||||
## Heartbeats (proactive mode)
|
||||
|
||||
@@ -5,9 +5,10 @@ read_when:
|
||||
---
|
||||
# Control channel API (newline-delimited JSON)
|
||||
|
||||
**Deprecated:** superseded by the WebSocket Gateway protocol (`clawdis gateway`, see `docs/architecture.md` and `docs/gateway.md`). Use only for legacy builds predating the Gateway rollout.
|
||||
**Deprecated (historical):** superseded by the WebSocket Gateway protocol (`clawdis gateway`, see `docs/architecture.md` and `docs/gateway.md`).
|
||||
Current builds use a WebSocket server on `ws://127.0.0.1:18789` and do **not** expose this TCP control channel.
|
||||
|
||||
Endpoint: `127.0.0.1:18789` (TCP, localhost only). Clients reach it via SSH port forward in remote mode.
|
||||
Legacy endpoint (if present in an older build): `127.0.0.1:18789` (TCP, localhost only), typically reached via SSH port forward in remote mode.
|
||||
|
||||
## Frame format
|
||||
Each line is a JSON object. Two shapes exist:
|
||||
@@ -45,4 +46,4 @@ Each line is a JSON object. Two shapes exist:
|
||||
4) For user toggles, send `set-heartbeats` and await response.
|
||||
|
||||
## Backward compatibility
|
||||
- If the control port is unavailable (older gateway), the client may fall back to the legacy CLI path, but the intended path is to rely solely on this API.
|
||||
- If the control channel is unavailable: that’s expected on modern builds. Use the Gateway WS protocol instead.
|
||||
|
||||
@@ -56,4 +56,4 @@ Notes:
|
||||
## Known considerations
|
||||
- Heartbeats are intentionally skipped for groups to avoid noisy broadcasts.
|
||||
- Echo suppression uses the combined batch string; if you send identical text twice without mentions, only the first will get a response.
|
||||
- Session store entries will appear as `group:<jid>` in `sessions.json`; a missing entry just means the group hasn’t triggered a run yet.
|
||||
- Session store entries will appear as `group:<jid>` in the session store (`~/.clawdis/sessions/sessions.json` by default); a missing entry just means the group hasn’t triggered a run yet.
|
||||
|
||||
@@ -16,7 +16,7 @@ Short guide to verify the WhatsApp Web / Baileys stack without guessing.
|
||||
|
||||
## Deep diagnostics
|
||||
- Creds on disk: `ls -l ~/.clawdis/credentials/creds.json` (mtime should be recent).
|
||||
- Session store: `ls -l ~/.clawdis/sessions.json` (path can be overridden in config). Count and recent recipients are surfaced via `status`.
|
||||
- Session store: `ls -l ~/.clawdis/sessions/sessions.json` (legacy: `~/.clawdis/sessions.json`; path can be overridden in config). Count and recent recipients are surfaced via `status`.
|
||||
- Relink flow: `clawdis logout && clawdis login --verbose` when status codes 409–515 or `loggedOut` appear in logs.
|
||||
|
||||
## When something fails
|
||||
|
||||
@@ -19,7 +19,7 @@ read_when:
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/steipete/clawdis">GitHub</a> ·
|
||||
<a href="https://www.npmjs.com/package/clawdis">npm</a> ·
|
||||
<a href="https://github.com/steipete/clawdis/releases">Releases</a> ·
|
||||
<a href="./clawd">Clawd setup</a>
|
||||
</p>
|
||||
|
||||
@@ -29,25 +29,41 @@ It’s built for [Clawd](https://clawd.me), a space lobster who needed a TARDIS.
|
||||
## How it works
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌──────────┐ ┌─────────────┐
|
||||
│ WhatsApp │ ───▶ │ CLAWDIS │ ───▶ │ AI Agent │
|
||||
│ Telegram │ ───▶ │ 🦞⏱️💙 │ ◀─── │ (Pi) │
|
||||
│ (You) │ ◀─── │ │ │ │
|
||||
└─────────────┘ └──────────┘ └─────────────┘
|
||||
WhatsApp / Telegram
|
||||
│
|
||||
▼
|
||||
┌──────────────────────────┐
|
||||
│ Gateway │ ws://127.0.0.1:18789 (loopback-only)
|
||||
│ (single source) │ tcp://0.0.0.0:18790 (optional Bridge)
|
||||
└───────────┬───────────────┘
|
||||
│
|
||||
├─ Pi agent (RPC)
|
||||
├─ CLI (clawdis …)
|
||||
├─ WebChat (loopback UI)
|
||||
├─ macOS app (Clawdis.app)
|
||||
└─ iOS node (Iris) via Bridge + pairing
|
||||
```
|
||||
|
||||
Most operations flow through the **Gateway** (`clawdis gateway`), a single long-running process that owns provider connections and the WebSocket control plane.
|
||||
|
||||
## Network model
|
||||
|
||||
- **One Gateway per host**: it is the only process allowed to own the WhatsApp Web session.
|
||||
- **Loopback-first**: Gateway WS is `ws://127.0.0.1:18789` (not exposed on the LAN).
|
||||
- **Bridge for nodes**: optional LAN/tailnet-facing bridge on `tcp://0.0.0.0:18790` for paired nodes (Bonjour-discoverable).
|
||||
- **Remote use**: SSH tunnel or tailnet/VPN; see `docs/remote.md` and `docs/discovery.md`.
|
||||
|
||||
## Features (high level)
|
||||
|
||||
- 📱 **WhatsApp Integration** — Uses Baileys for WhatsApp Web protocol
|
||||
- ✈️ **Telegram Bot** — DMs + groups via grammY
|
||||
- 🤖 **Agent bridge** — Pi (RPC mode) with tool streaming
|
||||
- 💬 **Sessions** — Per-sender (or shared `main`) conversation context
|
||||
- 💬 **Sessions** — Direct chats collapse into shared `main` (default); groups are isolated
|
||||
- 👥 **Group Chat Support** — Mention-based triggering in group chats
|
||||
- 📎 **Media Support** — Send and receive images, audio, documents
|
||||
- 🎤 **Voice notes** — Optional transcription hook
|
||||
- 🖥️ **WebChat + macOS app** — A local UI + menu bar companion for ops and voice wake
|
||||
- 🖥️ **WebChat + macOS app** — Local UI + menu bar companion for ops and voice wake
|
||||
- 📱 **iOS node (Iris)** — Pairs as a node and exposes a Canvas surface
|
||||
|
||||
Note: legacy Claude/Codex/Gemini/Opencode paths have been removed; Pi is the only coding-agent path.
|
||||
|
||||
@@ -56,8 +72,10 @@ Note: legacy Claude/Codex/Gemini/Opencode paths have been removed; Pi is the onl
|
||||
Runtime requirement: **Node ≥ 22**.
|
||||
|
||||
```bash
|
||||
# Install
|
||||
npm install -g clawdis
|
||||
# From source (recommended while the npm package is still settling)
|
||||
pnpm install
|
||||
pnpm build
|
||||
pnpm link --global
|
||||
|
||||
# Pair WhatsApp Web (shows QR)
|
||||
clawdis login
|
||||
@@ -95,18 +113,23 @@ Example:
|
||||
|
||||
## Docs
|
||||
|
||||
- [Configuration](./configuration.md)
|
||||
- [Gateway runbook](./gateway.md)
|
||||
- [WebChat](./webchat.md)
|
||||
- [Agent integration](./agents.md)
|
||||
- [Telegram](./telegram.md)
|
||||
- [Group messages](./group-messages.md)
|
||||
- [Media: images](./images.md)
|
||||
- [Media: audio](./audio.md)
|
||||
- [Sessions](./session.md)
|
||||
- [Cron + wakeups](./cron.md)
|
||||
- [Security](./security.md)
|
||||
- [Troubleshooting](./troubleshooting.md)
|
||||
- Start here:
|
||||
- [Configuration](./configuration.md)
|
||||
- [Clawd personal assistant setup](./clawd.md)
|
||||
- [Gateway runbook](./gateway.md)
|
||||
- [Discovery + transports](./discovery.md)
|
||||
- [Remote access](./remote.md)
|
||||
- Providers and UX:
|
||||
- [WebChat](./webchat.md)
|
||||
- [Telegram](./telegram.md)
|
||||
- [Group messages](./group-messages.md)
|
||||
- [Media: images](./images.md)
|
||||
- [Media: audio](./audio.md)
|
||||
- Ops and safety:
|
||||
- [Sessions](./session.md)
|
||||
- [Cron + wakeups](./cron.md)
|
||||
- [Security](./security.md)
|
||||
- [Troubleshooting](./troubleshooting.md)
|
||||
|
||||
## The name
|
||||
|
||||
|
||||
@@ -54,13 +54,13 @@ More debugging notes: `docs/bonjour.md`.
|
||||
In Iris:
|
||||
- Pick the discovered bridge (or hit refresh).
|
||||
- If not paired yet, Iris will initiate pairing automatically.
|
||||
- After the first successful pairing, Iris will auto-reconnect to the **last bridge** on launch (including after reinstall), as long as the iOS Keychain entry is still present.
|
||||
- After the first successful pairing, Iris will auto-reconnect **strictly to the last discovered gateway** on launch (including after reinstall), as long as the iOS Keychain entry is still present.
|
||||
|
||||
### Connection indicator (always visible)
|
||||
|
||||
The Settings tab icon shows a small status dot:
|
||||
- **Green**: connected to the bridge
|
||||
- **Yellow**: connecting
|
||||
- **Yellow**: connecting (subtle pulse)
|
||||
- **Red**: not connected / error
|
||||
|
||||
## 4) Approve pairing (CLI)
|
||||
|
||||
@@ -10,7 +10,7 @@ Context: web chat currently lives in a WKWebView that loads the pi-web bundle. S
|
||||
|
||||
## Target state
|
||||
- Gateway WS adds methods:
|
||||
- `chat.history { sessionKey }` → `{ sessionKey, messages[], thinkingLevel }` (reads the existing JSONL + sessions.json).
|
||||
- `chat.history { sessionKey }` → `{ sessionKey, messages[], thinkingLevel }` (reads the existing JSONL + session store).
|
||||
- `chat.send { sessionKey, message, attachments?, thinking?, deliver?, timeoutMs<=30000, idempotencyKey }` → `res { runId, status:"accepted" }` or `res ok:false` on validation/timeout.
|
||||
- Gateway WS emits `chat` events `{ runId, sessionKey, seq, state:"delta"|"final"|"error", message?, errorMessage?, usage?, stopReason? }`. Streaming is optional; minimum is a single `state:"final"` per send.
|
||||
- Client consumes only WS: bootstrap via `chat.history`, send via `chat.send`, live updates via `chat` events. No file watchers.
|
||||
|
||||
@@ -3,48 +3,50 @@ summary: "Remote mode topology using SSH control channels between gateway and ma
|
||||
read_when:
|
||||
- Running or troubleshooting remote gateway setups
|
||||
---
|
||||
# Remote mode with control channel
|
||||
# Remote access (SSH, tunnels, and tailnets)
|
||||
|
||||
This repo supports “remote over SSH” by keeping a single gateway (the master) running on a host (e.g., your Mac Studio) and connecting one or more macOS menu bar clients to it. The menu app no longer shells out to `pnpm clawdis …`; it talks to the gateway over a persistent control channel that is tunneled through SSH.
|
||||
This repo supports “remote over SSH” by keeping a single Gateway (the master) running on a host (e.g., your Mac Studio) and connecting clients to it.
|
||||
|
||||
Remote mode is the SSH fallback transport. As Clawdis adds a direct “bridge” transport for LAN/tailnet setups, SSH remains supported for universal reach.
|
||||
See `docs/discovery.md` for how clients choose between direct vs SSH.
|
||||
- For **operators (you / the macOS app)**: SSH tunneling is the universal fallback.
|
||||
- For **nodes (Iris/iOS and future devices)**: prefer the Gateway **Bridge** when on the same LAN/tailnet (see `docs/discovery.md`).
|
||||
|
||||
## Topology
|
||||
- Master: runs the gateway + control server on `127.0.0.1:18789` (in-process TCP server).
|
||||
- Clients: when “Remote over SSH” is selected, the app opens one SSH tunnel:
|
||||
- `ssh -N -L <localPort>:127.0.0.1:18789 <user>@<host>`
|
||||
- The app then connects to `localhost:<localPort>` and keeps that socket open.
|
||||
- Messages are newline-delimited JSON (documented in `docs/control-api.md`).
|
||||
## The core idea
|
||||
|
||||
## Connection flow (clients)
|
||||
1) Establish SSH tunnel.
|
||||
2) Open TCP socket to the local forwarded port.
|
||||
3) Send `ping` to verify connectivity.
|
||||
4) Issue `health`, `status`, and `last-heartbeat` requests to seed UI.
|
||||
5) Listen for `event` frames (heartbeat updates, gateway status).
|
||||
- The Gateway WebSocket binds to **loopback**: `ws://127.0.0.1:18789`.
|
||||
- For remote use, you forward that loopback port over SSH (or use a tailnet/VPN and tunnel less).
|
||||
|
||||
## Heartbeats
|
||||
- Heartbeats always run on the master gateway.
|
||||
- The control server emits `event: "heartbeat"` after each heartbeat attempt and keeps the latest in memory for `last-heartbeat` requests.
|
||||
- No file-based heartbeat logs/state are required when the control stream is available.
|
||||
## SSH tunnel (CLI + tools)
|
||||
|
||||
## Local mode
|
||||
- The menu app skips SSH and connects directly to `127.0.0.1:18789` with the same protocol.
|
||||
Create a local tunnel to the remote Gateway WS:
|
||||
|
||||
## Failure handling
|
||||
- If the tunnel drops, the client reconnects and re-issues `ping`, `health`, and `last-heartbeat` to refresh state (the mac app shows “Control channel disconnected”).
|
||||
- If the control port is unavailable (older gateway), the app can optionally fall back to the legacy CLI path, but the goal is to rely solely on the control channel.
|
||||
```bash
|
||||
ssh -N -L 18789:127.0.0.1:18789 user@host
|
||||
```
|
||||
|
||||
## Test Remote (in the mac app)
|
||||
1) SSH reachability check (`ssh -o BatchMode=yes … echo ok`).
|
||||
2) If SSH succeeds, the app opens the control tunnel and issues a `health` request; success marks the remote as ready.
|
||||
With the tunnel up:
|
||||
- `clawdis health` and `clawdis status --deep` now reach the remote gateway via `ws://127.0.0.1:18789`.
|
||||
- `clawdis gateway {status,health,send,agent,call}` can also target the forwarded URL via `--url` when needed.
|
||||
|
||||
## Security
|
||||
- Control server listens only on localhost.
|
||||
- SSH tunneling reuses existing keys/agent; no additional auth is added by the control server.
|
||||
## WebChat over SSH
|
||||
|
||||
## Files to keep in sync
|
||||
- Protocol definition: `docs/control-api.md`.
|
||||
- App connection logic: macOS `Remote over SSH` plumbing.
|
||||
- Gateway control server: lives inside the Node gateway process.
|
||||
Forward both the WebChat HTTP port and the Gateway WS port:
|
||||
|
||||
```bash
|
||||
ssh -N \
|
||||
-L 18788:127.0.0.1:18788 \
|
||||
-L 18789:127.0.0.1:18789 \
|
||||
user@host
|
||||
```
|
||||
|
||||
Then open `http://127.0.0.1:18788/webchat/` locally. (Details: `docs/webchat.md`.)
|
||||
|
||||
## macOS app “Remote over SSH”
|
||||
|
||||
The macOS menu bar app can drive the same setup end-to-end (remote status checks, WebChat, and Voice Wake forwarding).
|
||||
|
||||
Runbook: `docs/mac/remote.md`.
|
||||
|
||||
## Legacy control channel
|
||||
|
||||
Older builds experimented with a newline-delimited TCP control channel on the same port.
|
||||
That API is deprecated and should not be relied on. (Historical reference: `docs/control-api.md`.)
|
||||
|
||||
@@ -7,7 +7,7 @@ read_when:
|
||||
|
||||
Updated: 2025-12-07
|
||||
|
||||
Status: ready for bot-mode use with grammY (long-poll + webhook). Text + media send, proxy, and webhook helpers all ship in-tree.
|
||||
Status: ready for bot-mode use with grammY (long-polling by default; webhook supported when configured). Text + media send, mention-gated group replies, and optional proxy support are implemented.
|
||||
|
||||
## Goals
|
||||
- Let you talk to Clawdis via a Telegram bot in DMs and groups.
|
||||
@@ -17,7 +17,11 @@ Status: ready for bot-mode use with grammY (long-poll + webhook). Text + media s
|
||||
## How it will work (Bot API)
|
||||
1) Create a bot with @BotFather and grab the token.
|
||||
2) Configure Clawdis with `TELEGRAM_BOT_TOKEN` (or `telegram.botToken` in `~/.clawdis/clawdis.json`).
|
||||
3) Run the gateway; it auto-starts Telegram when the bot token is set. To force Telegram-only: `clawdis gateway --provider telegram`. Webhook mode: `clawdis gateway --provider telegram --webhook --port 8787 --webhook-secret <secret>` (optionally `--webhook-url` when the public URL differs).
|
||||
3) Run the gateway; it auto-starts Telegram when the bot token is set.
|
||||
- **Long-polling** is the default.
|
||||
- **Webhook mode** is enabled by setting `telegram.webhookUrl` (optionally `telegram.webhookSecret` / `telegram.webhookPath`).
|
||||
- The webhook listener currently binds to `0.0.0.0:8787` and serves `POST /telegram-webhook` by default.
|
||||
- If you need a different public port/host, set `telegram.webhookUrl` to the externally reachable URL and use a reverse proxy to forward to `:8787`.
|
||||
4) Direct chats: user sends the first message; all subsequent turns land in the shared `main` session (default, no extra config).
|
||||
5) Groups: add the bot, disable privacy mode (or make it admin) so it can read messages; group threads stay on `group:<chatId>` and require mention/command to trigger replies.
|
||||
6) Optional allowlist: reuse `inbound.allowFrom` for direct chats by chat id (`123456789` or `telegram:123456789`).
|
||||
@@ -32,7 +36,7 @@ Status: ready for bot-mode use with grammY (long-poll + webhook). Text + media s
|
||||
- Library: grammY is the only client for send + gateway (fetch fallback removed); grammY throttler is enabled by default to stay under Bot API limits.
|
||||
- Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, and `Timestamp`; groups require @bot mention by default.
|
||||
- Outbound: text and media (photo/video/audio/document) with optional caption; chunked to limits. Typing cue sent best-effort.
|
||||
- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.requireMention`, `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl` supported.
|
||||
- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.requireMention`, `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported.
|
||||
|
||||
Example config:
|
||||
```json5
|
||||
@@ -44,6 +48,7 @@ Example config:
|
||||
mediaMaxMb: 5,
|
||||
proxy: "socks5://localhost:9050",
|
||||
webhookSecret: "mysecret",
|
||||
webhookPath: "/telegram-webhook",
|
||||
webhookUrl: "https://yourdomain.com/telegram-webhook"
|
||||
}
|
||||
}
|
||||
@@ -62,6 +67,6 @@ Example config:
|
||||
- ⏳ Add more grammY coverage (webhook payloads, media edge cases)
|
||||
|
||||
## Safety & ops
|
||||
- Treat the bot token as a secret (equivalent to account control); store under `~/.clawdis/credentials/` with 0600 perms.
|
||||
- Respect Telegram rate limits (429s); we’ll add throttling in the provider to stay below flood thresholds.
|
||||
- Treat the bot token as a secret (equivalent to account control); prefer `TELEGRAM_BOT_TOKEN` or a locked-down config file (`chmod 600 ~/.clawdis/clawdis.json`).
|
||||
- Respect Telegram rate limits (429s); grammY throttling is enabled by default.
|
||||
- Use a test bot for development to avoid hitting production chats.
|
||||
|
||||
@@ -98,6 +98,8 @@ cat > "$APP_ROOT/Contents/Info.plist" <<PLIST
|
||||
<string>Clawdis needs notification permission to show alerts for agent actions.</string>
|
||||
<key>NSScreenCaptureDescription</key>
|
||||
<string>Clawdis captures the screen when the agent needs screenshots for context.</string>
|
||||
<key>NSCameraUsageDescription</key>
|
||||
<string>Clawdis can capture photos or short video clips when requested by the agent.</string>
|
||||
<key>NSMicrophoneUsageDescription</key>
|
||||
<string>Clawdis needs the mic for Voice Wake tests and agent audio capture.</string>
|
||||
<key>NSSpeechRecognitionUsageDescription</key>
|
||||
|
||||
64
src/cli/nodes-camera.test.ts
Normal file
64
src/cli/nodes-camera.test.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import * as fs from "node:fs/promises";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
cameraTempPath,
|
||||
parseCameraClipPayload,
|
||||
parseCameraSnapPayload,
|
||||
writeBase64ToFile,
|
||||
} from "./nodes-camera.js";
|
||||
|
||||
describe("nodes camera helpers", () => {
|
||||
it("parses camera.snap payload", () => {
|
||||
expect(
|
||||
parseCameraSnapPayload({
|
||||
format: "jpg",
|
||||
base64: "aGk=",
|
||||
width: 10,
|
||||
height: 20,
|
||||
}),
|
||||
).toEqual({ format: "jpg", base64: "aGk=", width: 10, height: 20 });
|
||||
});
|
||||
|
||||
it("rejects invalid camera.snap payload", () => {
|
||||
expect(() => parseCameraSnapPayload({ format: "jpg" })).toThrow(
|
||||
/invalid camera\.snap payload/i,
|
||||
);
|
||||
});
|
||||
|
||||
it("parses camera.clip payload", () => {
|
||||
expect(
|
||||
parseCameraClipPayload({
|
||||
format: "mp4",
|
||||
base64: "AAEC",
|
||||
durationMs: 1234,
|
||||
hasAudio: true,
|
||||
}),
|
||||
).toEqual({
|
||||
format: "mp4",
|
||||
base64: "AAEC",
|
||||
durationMs: 1234,
|
||||
hasAudio: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("builds stable temp paths when id provided", () => {
|
||||
const p = cameraTempPath({
|
||||
kind: "snap",
|
||||
facing: "front",
|
||||
ext: "jpg",
|
||||
tmpDir: "/tmp",
|
||||
id: "id1",
|
||||
});
|
||||
expect(p).toBe(path.join("/tmp", "clawdis-camera-snap-front-id1.jpg"));
|
||||
});
|
||||
|
||||
it("writes base64 to file", async () => {
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-test-"));
|
||||
const out = path.join(dir, "x.bin");
|
||||
await writeBase64ToFile(out, "aGk=");
|
||||
await expect(fs.readFile(out, "utf8")).resolves.toBe("hi");
|
||||
await fs.rm(dir, { recursive: true, force: true });
|
||||
});
|
||||
});
|
||||
92
src/cli/nodes-camera.ts
Normal file
92
src/cli/nodes-camera.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import * as fs from "node:fs/promises";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
|
||||
export type CameraFacing = "front" | "back";
|
||||
|
||||
export type CameraSnapPayload = {
|
||||
format: string;
|
||||
base64: string;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
|
||||
export type CameraClipPayload = {
|
||||
format: string;
|
||||
base64: string;
|
||||
durationMs: number;
|
||||
hasAudio: boolean;
|
||||
};
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null
|
||||
? (value as Record<string, unknown>)
|
||||
: {};
|
||||
}
|
||||
|
||||
function asString(value: unknown): string | undefined {
|
||||
return typeof value === "string" ? value : undefined;
|
||||
}
|
||||
|
||||
function asNumber(value: unknown): number | undefined {
|
||||
return typeof value === "number" && Number.isFinite(value)
|
||||
? value
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function asBoolean(value: unknown): boolean | undefined {
|
||||
return typeof value === "boolean" ? value : undefined;
|
||||
}
|
||||
|
||||
export function parseCameraSnapPayload(value: unknown): CameraSnapPayload {
|
||||
const obj = asRecord(value);
|
||||
const format = asString(obj.format);
|
||||
const base64 = asString(obj.base64);
|
||||
const width = asNumber(obj.width);
|
||||
const height = asNumber(obj.height);
|
||||
if (!format || !base64 || width === undefined || height === undefined) {
|
||||
throw new Error("invalid camera.snap payload");
|
||||
}
|
||||
return { format, base64, width, height };
|
||||
}
|
||||
|
||||
export function parseCameraClipPayload(value: unknown): CameraClipPayload {
|
||||
const obj = asRecord(value);
|
||||
const format = asString(obj.format);
|
||||
const base64 = asString(obj.base64);
|
||||
const durationMs = asNumber(obj.durationMs);
|
||||
const hasAudio = asBoolean(obj.hasAudio);
|
||||
if (
|
||||
!format ||
|
||||
!base64 ||
|
||||
durationMs === undefined ||
|
||||
hasAudio === undefined
|
||||
) {
|
||||
throw new Error("invalid camera.clip payload");
|
||||
}
|
||||
return { format, base64, durationMs, hasAudio };
|
||||
}
|
||||
|
||||
export function cameraTempPath(opts: {
|
||||
kind: "snap" | "clip";
|
||||
facing?: CameraFacing;
|
||||
ext: string;
|
||||
tmpDir?: string;
|
||||
id?: string;
|
||||
}) {
|
||||
const tmpDir = opts.tmpDir ?? os.tmpdir();
|
||||
const id = opts.id ?? randomUUID();
|
||||
const facingPart = opts.facing ? `-${opts.facing}` : "";
|
||||
const ext = opts.ext.startsWith(".") ? opts.ext : `.${opts.ext}`;
|
||||
return path.join(
|
||||
tmpDir,
|
||||
`clawdis-camera-${opts.kind}${facingPart}-${id}${ext}`,
|
||||
);
|
||||
}
|
||||
|
||||
export async function writeBase64ToFile(filePath: string, base64: string) {
|
||||
const buf = Buffer.from(base64, "base64");
|
||||
await fs.writeFile(filePath, buf);
|
||||
return { path: filePath, bytes: buf.length };
|
||||
}
|
||||
@@ -1,6 +1,13 @@
|
||||
import type { Command } from "commander";
|
||||
import { callGateway, randomIdempotencyKey } from "../gateway/call.js";
|
||||
import { defaultRuntime } from "../runtime.js";
|
||||
import {
|
||||
type CameraFacing,
|
||||
cameraTempPath,
|
||||
parseCameraClipPayload,
|
||||
parseCameraSnapPayload,
|
||||
writeBase64ToFile,
|
||||
} from "./nodes-camera.js";
|
||||
|
||||
type NodesRpcOpts = {
|
||||
url?: string;
|
||||
@@ -12,6 +19,11 @@ type NodesRpcOpts = {
|
||||
params?: string;
|
||||
invokeTimeout?: string;
|
||||
idempotencyKey?: string;
|
||||
facing?: string;
|
||||
maxWidth?: string;
|
||||
quality?: string;
|
||||
duration?: string;
|
||||
audio?: boolean;
|
||||
};
|
||||
|
||||
type NodeListNode = {
|
||||
@@ -340,4 +352,203 @@ export function registerNodesCli(program: Command) {
|
||||
}),
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
|
||||
const parseFacing = (value: string): CameraFacing => {
|
||||
const v = String(value ?? "")
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
if (v === "front" || v === "back") return v;
|
||||
throw new Error(`invalid facing: ${value} (expected front|back)`);
|
||||
};
|
||||
|
||||
const camera = nodes
|
||||
.command("camera")
|
||||
.description("Capture camera media from a paired node");
|
||||
|
||||
nodesCallOpts(
|
||||
camera
|
||||
.command("snap")
|
||||
.description("Capture a photo from a node camera (prints MEDIA:<path>)")
|
||||
.requiredOption("--node <idOrNameOrIp>", "Node id, name, or IP")
|
||||
.option("--facing <front|back|both>", "Camera facing", "both")
|
||||
.option("--max-width <px>", "Max width in px (optional)")
|
||||
.option("--quality <0-1>", "JPEG quality (default 0.9)")
|
||||
.option(
|
||||
"--invoke-timeout <ms>",
|
||||
"Node invoke timeout in ms (default 20000)",
|
||||
"20000",
|
||||
)
|
||||
.action(async (opts: NodesRpcOpts) => {
|
||||
try {
|
||||
const nodeId = await resolveNodeId(opts, String(opts.node ?? ""));
|
||||
const facingOpt = String(opts.facing ?? "both")
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
const facings: CameraFacing[] =
|
||||
facingOpt === "both"
|
||||
? ["front", "back"]
|
||||
: facingOpt === "front" || facingOpt === "back"
|
||||
? [facingOpt]
|
||||
: (() => {
|
||||
throw new Error(
|
||||
`invalid facing: ${String(opts.facing)} (expected front|back|both)`,
|
||||
);
|
||||
})();
|
||||
|
||||
const maxWidth = opts.maxWidth
|
||||
? Number.parseInt(String(opts.maxWidth), 10)
|
||||
: undefined;
|
||||
const quality = opts.quality
|
||||
? Number.parseFloat(String(opts.quality))
|
||||
: undefined;
|
||||
const timeoutMs = opts.invokeTimeout
|
||||
? Number.parseInt(String(opts.invokeTimeout), 10)
|
||||
: undefined;
|
||||
|
||||
const results: Array<{
|
||||
facing: CameraFacing;
|
||||
path: string;
|
||||
width: number;
|
||||
height: number;
|
||||
}> = [];
|
||||
|
||||
for (const facing of facings) {
|
||||
const invokeParams: Record<string, unknown> = {
|
||||
nodeId,
|
||||
command: "camera.snap",
|
||||
params: {
|
||||
facing,
|
||||
maxWidth: Number.isFinite(maxWidth) ? maxWidth : undefined,
|
||||
quality: Number.isFinite(quality) ? quality : undefined,
|
||||
format: "jpg",
|
||||
},
|
||||
idempotencyKey: randomIdempotencyKey(),
|
||||
};
|
||||
if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) {
|
||||
invokeParams.timeoutMs = timeoutMs;
|
||||
}
|
||||
|
||||
const raw = (await callGatewayCli(
|
||||
"node.invoke",
|
||||
opts,
|
||||
invokeParams,
|
||||
)) as unknown;
|
||||
|
||||
const res =
|
||||
typeof raw === "object" && raw !== null
|
||||
? (raw as { payload?: unknown })
|
||||
: {};
|
||||
const payload = parseCameraSnapPayload(res.payload);
|
||||
const filePath = cameraTempPath({
|
||||
kind: "snap",
|
||||
facing,
|
||||
ext: payload.format === "jpeg" ? "jpg" : payload.format,
|
||||
});
|
||||
await writeBase64ToFile(filePath, payload.base64);
|
||||
results.push({
|
||||
facing,
|
||||
path: filePath,
|
||||
width: payload.width,
|
||||
height: payload.height,
|
||||
});
|
||||
}
|
||||
|
||||
if (opts.json) {
|
||||
defaultRuntime.log(JSON.stringify({ files: results }, null, 2));
|
||||
return;
|
||||
}
|
||||
defaultRuntime.log(results.map((r) => `MEDIA:${r.path}`).join("\n"));
|
||||
} catch (err) {
|
||||
defaultRuntime.error(`nodes camera snap failed: ${String(err)}`);
|
||||
defaultRuntime.exit(1);
|
||||
}
|
||||
}),
|
||||
{ timeoutMs: 60_000 },
|
||||
);
|
||||
|
||||
nodesCallOpts(
|
||||
camera
|
||||
.command("clip")
|
||||
.description(
|
||||
"Capture a short video clip from a node camera (prints MEDIA:<path>)",
|
||||
)
|
||||
.requiredOption("--node <idOrNameOrIp>", "Node id, name, or IP")
|
||||
.option("--facing <front|back>", "Camera facing", "front")
|
||||
.option("--duration <ms>", "Duration in ms (default 3000)", "3000")
|
||||
.option("--no-audio", "Disable audio capture")
|
||||
.option(
|
||||
"--invoke-timeout <ms>",
|
||||
"Node invoke timeout in ms (default 45000)",
|
||||
"45000",
|
||||
)
|
||||
.action(async (opts: NodesRpcOpts & { audio?: boolean }) => {
|
||||
try {
|
||||
const nodeId = await resolveNodeId(opts, String(opts.node ?? ""));
|
||||
const facing = parseFacing(String(opts.facing ?? "front"));
|
||||
const durationMs = Number.parseInt(
|
||||
String(opts.duration ?? "3000"),
|
||||
10,
|
||||
);
|
||||
const includeAudio = opts.audio !== false;
|
||||
const timeoutMs = opts.invokeTimeout
|
||||
? Number.parseInt(String(opts.invokeTimeout), 10)
|
||||
: undefined;
|
||||
|
||||
const invokeParams: Record<string, unknown> = {
|
||||
nodeId,
|
||||
command: "camera.clip",
|
||||
params: {
|
||||
facing,
|
||||
durationMs: Number.isFinite(durationMs) ? durationMs : undefined,
|
||||
includeAudio,
|
||||
format: "mp4",
|
||||
},
|
||||
idempotencyKey: randomIdempotencyKey(),
|
||||
};
|
||||
if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) {
|
||||
invokeParams.timeoutMs = timeoutMs;
|
||||
}
|
||||
|
||||
const raw = (await callGatewayCli(
|
||||
"node.invoke",
|
||||
opts,
|
||||
invokeParams,
|
||||
)) as unknown;
|
||||
const res =
|
||||
typeof raw === "object" && raw !== null
|
||||
? (raw as { payload?: unknown })
|
||||
: {};
|
||||
const payload = parseCameraClipPayload(res.payload);
|
||||
const filePath = cameraTempPath({
|
||||
kind: "clip",
|
||||
facing,
|
||||
ext: payload.format,
|
||||
});
|
||||
await writeBase64ToFile(filePath, payload.base64);
|
||||
|
||||
if (opts.json) {
|
||||
defaultRuntime.log(
|
||||
JSON.stringify(
|
||||
{
|
||||
file: {
|
||||
facing,
|
||||
path: filePath,
|
||||
durationMs: payload.durationMs,
|
||||
hasAudio: payload.hasAudio,
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
defaultRuntime.log(`MEDIA:${filePath}`);
|
||||
} catch (err) {
|
||||
defaultRuntime.error(`nodes camera clip failed: ${String(err)}`);
|
||||
defaultRuntime.exit(1);
|
||||
}
|
||||
}),
|
||||
{ timeoutMs: 90_000 },
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import * as fs from "node:fs/promises";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const sendCommand = vi.fn();
|
||||
@@ -148,4 +149,145 @@ describe("cli program", () => {
|
||||
);
|
||||
expect(runtime.log).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("runs nodes camera snap and prints two MEDIA paths", async () => {
|
||||
callGateway
|
||||
.mockResolvedValueOnce({
|
||||
ts: Date.now(),
|
||||
nodes: [
|
||||
{
|
||||
nodeId: "ios-node",
|
||||
displayName: "iOS Node",
|
||||
remoteIp: "192.168.0.88",
|
||||
connected: true,
|
||||
},
|
||||
],
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
nodeId: "ios-node",
|
||||
command: "camera.snap",
|
||||
payload: { format: "jpg", base64: "aGk=", width: 1, height: 1 },
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
nodeId: "ios-node",
|
||||
command: "camera.snap",
|
||||
payload: { format: "jpg", base64: "aGk=", width: 1, height: 1 },
|
||||
});
|
||||
|
||||
const program = buildProgram();
|
||||
runtime.log.mockClear();
|
||||
await program.parseAsync(
|
||||
["nodes", "camera", "snap", "--node", "ios-node"],
|
||||
{
|
||||
from: "user",
|
||||
},
|
||||
);
|
||||
|
||||
expect(callGateway).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
method: "node.invoke",
|
||||
params: expect.objectContaining({
|
||||
nodeId: "ios-node",
|
||||
command: "camera.snap",
|
||||
timeoutMs: 20000,
|
||||
idempotencyKey: "idem-test",
|
||||
params: expect.objectContaining({ facing: "front", format: "jpg" }),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(callGateway).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
expect.objectContaining({
|
||||
method: "node.invoke",
|
||||
params: expect.objectContaining({
|
||||
nodeId: "ios-node",
|
||||
command: "camera.snap",
|
||||
timeoutMs: 20000,
|
||||
idempotencyKey: "idem-test",
|
||||
params: expect.objectContaining({ facing: "back", format: "jpg" }),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const out = String(runtime.log.mock.calls[0]?.[0] ?? "");
|
||||
const mediaPaths = out
|
||||
.split("\n")
|
||||
.filter((l) => l.startsWith("MEDIA:"))
|
||||
.map((l) => l.replace(/^MEDIA:/, ""))
|
||||
.filter(Boolean);
|
||||
expect(mediaPaths).toHaveLength(2);
|
||||
|
||||
try {
|
||||
for (const p of mediaPaths) {
|
||||
await expect(fs.readFile(p, "utf8")).resolves.toBe("hi");
|
||||
}
|
||||
} finally {
|
||||
await Promise.all(mediaPaths.map((p) => fs.unlink(p).catch(() => {})));
|
||||
}
|
||||
});
|
||||
|
||||
it("runs nodes camera clip and prints one MEDIA path", async () => {
|
||||
callGateway
|
||||
.mockResolvedValueOnce({
|
||||
ts: Date.now(),
|
||||
nodes: [
|
||||
{
|
||||
nodeId: "ios-node",
|
||||
displayName: "iOS Node",
|
||||
remoteIp: "192.168.0.88",
|
||||
connected: true,
|
||||
},
|
||||
],
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
nodeId: "ios-node",
|
||||
command: "camera.clip",
|
||||
payload: {
|
||||
format: "mp4",
|
||||
base64: "aGk=",
|
||||
durationMs: 3000,
|
||||
hasAudio: true,
|
||||
},
|
||||
});
|
||||
|
||||
const program = buildProgram();
|
||||
runtime.log.mockClear();
|
||||
await program.parseAsync(
|
||||
["nodes", "camera", "clip", "--node", "ios-node", "--duration", "3000"],
|
||||
{ from: "user" },
|
||||
);
|
||||
|
||||
expect(callGateway).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
method: "node.invoke",
|
||||
params: expect.objectContaining({
|
||||
nodeId: "ios-node",
|
||||
command: "camera.clip",
|
||||
timeoutMs: 45000,
|
||||
idempotencyKey: "idem-test",
|
||||
params: expect.objectContaining({
|
||||
facing: "front",
|
||||
durationMs: 3000,
|
||||
includeAudio: true,
|
||||
format: "mp4",
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const out = String(runtime.log.mock.calls[0]?.[0] ?? "");
|
||||
const mediaPath = out.replace(/^MEDIA:/, "").trim();
|
||||
expect(mediaPath).toMatch(/clawdis-camera-clip-front-.*\.mp4$/);
|
||||
|
||||
try {
|
||||
await expect(fs.readFile(mediaPath, "utf8")).resolves.toBe("hi");
|
||||
} finally {
|
||||
await fs.unlink(mediaPath).catch(() => {});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user