feat: add camera list and device selection
This commit is contained in:
@@ -79,6 +79,8 @@
|
|||||||
- Agent tools: map `camera.snap` JPEG payloads to `image/jpeg` to avoid MIME mismatch errors.
|
- Agent tools: map `camera.snap` JPEG payloads to `image/jpeg` to avoid MIME mismatch errors.
|
||||||
- Tests: cover `camera.snap` MIME mapping to prevent image/png vs image/jpeg mismatches.
|
- Tests: cover `camera.snap` MIME mapping to prevent image/png vs image/jpeg mismatches.
|
||||||
- macOS camera: wait for exposure/white balance to settle before capturing a snap to avoid dark images.
|
- macOS camera: wait for exposure/white balance to settle before capturing a snap to avoid dark images.
|
||||||
|
- Camera snap: add `delayMs` parameter (default 2000ms on macOS) to improve exposure reliability.
|
||||||
|
- Camera: add `camera.list` and optional `deviceId` selection for snaps/clips.
|
||||||
- macOS packaging: move rpath config into swift build for reliability (#69) — thanks @petter-b
|
- macOS packaging: move rpath config into swift build for reliability (#69) — thanks @petter-b
|
||||||
- macOS: prioritize main bundle for device resources to prevent crash (#73) — thanks @petter-b
|
- macOS: prioritize main bundle for device resources to prevent crash (#73) — thanks @petter-b
|
||||||
- macOS remote: route settings through gateway config and avoid local config reads in remote mode.
|
- macOS remote: route settings through gateway config and avoid local config reads in remote mode.
|
||||||
|
|||||||
@@ -247,6 +247,7 @@ final class BridgeConnectionController {
|
|||||||
|
|
||||||
let caps = Set(self.currentCaps())
|
let caps = Set(self.currentCaps())
|
||||||
if caps.contains(ClawdisCapability.camera.rawValue) {
|
if caps.contains(ClawdisCapability.camera.rawValue) {
|
||||||
|
commands.append(ClawdisCameraCommand.list.rawValue)
|
||||||
commands.append(ClawdisCameraCommand.snap.rawValue)
|
commands.append(ClawdisCameraCommand.snap.rawValue)
|
||||||
commands.append(ClawdisCameraCommand.clip.rawValue)
|
commands.append(ClawdisCameraCommand.clip.rawValue)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,13 @@ import ClawdisKit
|
|||||||
import Foundation
|
import Foundation
|
||||||
|
|
||||||
actor CameraController {
|
actor CameraController {
|
||||||
|
struct CameraDeviceInfo: Codable, Sendable {
|
||||||
|
var id: String
|
||||||
|
var name: String
|
||||||
|
var position: String
|
||||||
|
var deviceType: String
|
||||||
|
}
|
||||||
|
|
||||||
enum CameraError: LocalizedError, Sendable {
|
enum CameraError: LocalizedError, Sendable {
|
||||||
case cameraUnavailable
|
case cameraUnavailable
|
||||||
case microphoneUnavailable
|
case microphoneUnavailable
|
||||||
@@ -41,13 +48,14 @@ actor CameraController {
|
|||||||
// If you need the full-res photo, explicitly request a larger maxWidth.
|
// If you need the full-res photo, explicitly request a larger maxWidth.
|
||||||
let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600
|
let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600
|
||||||
let quality = Self.clampQuality(params.quality)
|
let quality = Self.clampQuality(params.quality)
|
||||||
|
let delayMs = max(0, params.delayMs ?? 0)
|
||||||
|
|
||||||
try await self.ensureAccess(for: .video)
|
try await self.ensureAccess(for: .video)
|
||||||
|
|
||||||
let session = AVCaptureSession()
|
let session = AVCaptureSession()
|
||||||
session.sessionPreset = .photo
|
session.sessionPreset = .photo
|
||||||
|
|
||||||
guard let device = Self.pickCamera(facing: facing) else {
|
guard let device = Self.pickCamera(facing: facing, deviceId: params.deviceId) else {
|
||||||
throw CameraError.cameraUnavailable
|
throw CameraError.cameraUnavailable
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,6 +75,7 @@ actor CameraController {
|
|||||||
session.startRunning()
|
session.startRunning()
|
||||||
defer { session.stopRunning() }
|
defer { session.stopRunning() }
|
||||||
await Self.warmUpCaptureSession()
|
await Self.warmUpCaptureSession()
|
||||||
|
await Self.sleepDelayMs(delayMs)
|
||||||
|
|
||||||
let settings: AVCapturePhotoSettings = {
|
let settings: AVCapturePhotoSettings = {
|
||||||
if output.availablePhotoCodecTypes.contains(.jpeg) {
|
if output.availablePhotoCodecTypes.contains(.jpeg) {
|
||||||
@@ -119,7 +128,7 @@ actor CameraController {
|
|||||||
let session = AVCaptureSession()
|
let session = AVCaptureSession()
|
||||||
session.sessionPreset = .high
|
session.sessionPreset = .high
|
||||||
|
|
||||||
guard let camera = Self.pickCamera(facing: facing) else {
|
guard let camera = Self.pickCamera(facing: facing, deviceId: params.deviceId) else {
|
||||||
throw CameraError.cameraUnavailable
|
throw CameraError.cameraUnavailable
|
||||||
}
|
}
|
||||||
let cameraInput = try AVCaptureDeviceInput(device: camera)
|
let cameraInput = try AVCaptureDeviceInput(device: camera)
|
||||||
@@ -180,6 +189,24 @@ actor CameraController {
|
|||||||
hasAudio: includeAudio)
|
hasAudio: includeAudio)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func listDevices() -> [CameraDeviceInfo] {
|
||||||
|
let types: [AVCaptureDevice.DeviceType] = [
|
||||||
|
.builtInWideAngleCamera,
|
||||||
|
.externalUnknown,
|
||||||
|
]
|
||||||
|
let session = AVCaptureDevice.DiscoverySession(
|
||||||
|
deviceTypes: types,
|
||||||
|
mediaType: .video,
|
||||||
|
position: .unspecified)
|
||||||
|
return session.devices.map { device in
|
||||||
|
CameraDeviceInfo(
|
||||||
|
id: device.uniqueID,
|
||||||
|
name: device.localizedName,
|
||||||
|
position: Self.positionLabel(device.position),
|
||||||
|
deviceType: device.deviceType.rawValue)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private func ensureAccess(for mediaType: AVMediaType) async throws {
|
private func ensureAccess(for mediaType: AVMediaType) async throws {
|
||||||
let status = AVCaptureDevice.authorizationStatus(for: mediaType)
|
let status = AVCaptureDevice.authorizationStatus(for: mediaType)
|
||||||
switch status {
|
switch status {
|
||||||
@@ -201,7 +228,15 @@ actor CameraController {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private nonisolated static func pickCamera(facing: ClawdisCameraFacing) -> AVCaptureDevice? {
|
private nonisolated static func pickCamera(
|
||||||
|
facing: ClawdisCameraFacing,
|
||||||
|
deviceId: String?) -> AVCaptureDevice?
|
||||||
|
{
|
||||||
|
if let deviceId, !deviceId.isEmpty {
|
||||||
|
if let match = AVCaptureDevice.devices(for: .video).first(where: { $0.uniqueID == deviceId }) {
|
||||||
|
return match
|
||||||
|
}
|
||||||
|
}
|
||||||
let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back
|
let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back
|
||||||
if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) {
|
if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) {
|
||||||
return device
|
return device
|
||||||
@@ -210,6 +245,14 @@ actor CameraController {
|
|||||||
return AVCaptureDevice.default(for: .video)
|
return AVCaptureDevice.default(for: .video)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private nonisolated static func positionLabel(_ position: AVCaptureDevice.Position) -> String {
|
||||||
|
switch position {
|
||||||
|
case .front: "front"
|
||||||
|
case .back: "back"
|
||||||
|
default: "unspecified"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
nonisolated static func clampQuality(_ quality: Double?) -> Double {
|
nonisolated static func clampQuality(_ quality: Double?) -> Double {
|
||||||
let q = quality ?? 0.9
|
let q = quality ?? 0.9
|
||||||
return min(1.0, max(0.05, q))
|
return min(1.0, max(0.05, q))
|
||||||
@@ -262,6 +305,12 @@ actor CameraController {
|
|||||||
// A short delay after `startRunning()` significantly reduces "blank first frame" captures on some devices.
|
// A short delay after `startRunning()` significantly reduces "blank first frame" captures on some devices.
|
||||||
try? await Task.sleep(nanoseconds: 150_000_000) // 150ms
|
try? await Task.sleep(nanoseconds: 150_000_000) // 150ms
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private nonisolated static func sleepDelayMs(_ delayMs: Int) async {
|
||||||
|
guard delayMs > 0 else { return }
|
||||||
|
let ns = UInt64(min(delayMs, 10_000)) * 1_000_000
|
||||||
|
try? await Task.sleep(nanoseconds: ns)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate {
|
private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate {
|
||||||
|
|||||||
@@ -589,6 +589,14 @@ final class NodeAppModel {
|
|||||||
let resultJSON = try await self.screen.eval(javaScript: js)
|
let resultJSON = try await self.screen.eval(javaScript: js)
|
||||||
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: resultJSON)
|
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: resultJSON)
|
||||||
|
|
||||||
|
case ClawdisCameraCommand.list.rawValue:
|
||||||
|
let devices = await self.camera.listDevices()
|
||||||
|
struct Payload: Codable {
|
||||||
|
var devices: [CameraController.CameraDeviceInfo]
|
||||||
|
}
|
||||||
|
let payload = try Self.encodePayload(Payload(devices: devices))
|
||||||
|
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
|
||||||
|
|
||||||
case ClawdisCameraCommand.snap.rawValue:
|
case ClawdisCameraCommand.snap.rawValue:
|
||||||
self.showCameraHUD(text: "Taking photo…", kind: .photo)
|
self.showCameraHUD(text: "Taking photo…", kind: .photo)
|
||||||
self.triggerCameraFlash()
|
self.triggerCameraFlash()
|
||||||
|
|||||||
@@ -333,6 +333,7 @@ struct SettingsTab: View {
|
|||||||
|
|
||||||
let caps = Set(self.currentCaps())
|
let caps = Set(self.currentCaps())
|
||||||
if caps.contains(ClawdisCapability.camera.rawValue) {
|
if caps.contains(ClawdisCapability.camera.rawValue) {
|
||||||
|
commands.append(ClawdisCameraCommand.list.rawValue)
|
||||||
commands.append(ClawdisCameraCommand.snap.rawValue)
|
commands.append(ClawdisCameraCommand.snap.rawValue)
|
||||||
commands.append(ClawdisCameraCommand.clip.rawValue)
|
commands.append(ClawdisCameraCommand.clip.rawValue)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,13 @@ import Foundation
|
|||||||
import OSLog
|
import OSLog
|
||||||
|
|
||||||
actor CameraCaptureService {
|
actor CameraCaptureService {
|
||||||
|
struct CameraDeviceInfo: Encodable, Sendable {
|
||||||
|
let id: String
|
||||||
|
let name: String
|
||||||
|
let position: String
|
||||||
|
let deviceType: String
|
||||||
|
}
|
||||||
|
|
||||||
enum CameraError: LocalizedError, Sendable {
|
enum CameraError: LocalizedError, Sendable {
|
||||||
case cameraUnavailable
|
case cameraUnavailable
|
||||||
case microphoneUnavailable
|
case microphoneUnavailable
|
||||||
@@ -31,18 +38,36 @@ actor CameraCaptureService {
|
|||||||
|
|
||||||
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "camera")
|
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "camera")
|
||||||
|
|
||||||
func snap(facing: CameraFacing?, maxWidth: Int?, quality: Double?) async throws -> (data: Data, size: CGSize) {
|
func listDevices() -> [CameraDeviceInfo] {
|
||||||
|
Self.availableCameras().map { device in
|
||||||
|
CameraDeviceInfo(
|
||||||
|
id: device.uniqueID,
|
||||||
|
name: device.localizedName,
|
||||||
|
position: Self.positionLabel(device.position),
|
||||||
|
deviceType: device.deviceType.rawValue)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func snap(
|
||||||
|
facing: CameraFacing?,
|
||||||
|
maxWidth: Int?,
|
||||||
|
quality: Double?,
|
||||||
|
deviceId: String?,
|
||||||
|
delayMs: Int) async throws -> (data: Data, size: CGSize)
|
||||||
|
{
|
||||||
let facing = facing ?? .front
|
let facing = facing ?? .front
|
||||||
let normalized = Self.normalizeSnap(maxWidth: maxWidth, quality: quality)
|
let normalized = Self.normalizeSnap(maxWidth: maxWidth, quality: quality)
|
||||||
let maxWidth = normalized.maxWidth
|
let maxWidth = normalized.maxWidth
|
||||||
let quality = normalized.quality
|
let quality = normalized.quality
|
||||||
|
let delayMs = max(0, delayMs)
|
||||||
|
let deviceId = deviceId?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
|
|
||||||
try await self.ensureAccess(for: .video)
|
try await self.ensureAccess(for: .video)
|
||||||
|
|
||||||
let session = AVCaptureSession()
|
let session = AVCaptureSession()
|
||||||
session.sessionPreset = .photo
|
session.sessionPreset = .photo
|
||||||
|
|
||||||
guard let device = Self.pickCamera(facing: facing) else {
|
guard let device = Self.pickCamera(facing: facing, deviceId: deviceId) else {
|
||||||
throw CameraError.cameraUnavailable
|
throw CameraError.cameraUnavailable
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -63,6 +88,7 @@ actor CameraCaptureService {
|
|||||||
defer { session.stopRunning() }
|
defer { session.stopRunning() }
|
||||||
await Self.warmUpCaptureSession()
|
await Self.warmUpCaptureSession()
|
||||||
await self.waitForExposureAndWhiteBalance(device: device)
|
await self.waitForExposureAndWhiteBalance(device: device)
|
||||||
|
await self.sleepDelayMs(delayMs)
|
||||||
|
|
||||||
let settings: AVCapturePhotoSettings = {
|
let settings: AVCapturePhotoSettings = {
|
||||||
if output.availablePhotoCodecTypes.contains(.jpeg) {
|
if output.availablePhotoCodecTypes.contains(.jpeg) {
|
||||||
@@ -95,10 +121,12 @@ actor CameraCaptureService {
|
|||||||
facing: CameraFacing?,
|
facing: CameraFacing?,
|
||||||
durationMs: Int?,
|
durationMs: Int?,
|
||||||
includeAudio: Bool,
|
includeAudio: Bool,
|
||||||
|
deviceId: String?,
|
||||||
outPath: String?) async throws -> (path: String, durationMs: Int, hasAudio: Bool)
|
outPath: String?) async throws -> (path: String, durationMs: Int, hasAudio: Bool)
|
||||||
{
|
{
|
||||||
let facing = facing ?? .front
|
let facing = facing ?? .front
|
||||||
let durationMs = Self.clampDurationMs(durationMs)
|
let durationMs = Self.clampDurationMs(durationMs)
|
||||||
|
let deviceId = deviceId?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
|
|
||||||
try await self.ensureAccess(for: .video)
|
try await self.ensureAccess(for: .video)
|
||||||
if includeAudio {
|
if includeAudio {
|
||||||
@@ -108,7 +136,7 @@ actor CameraCaptureService {
|
|||||||
let session = AVCaptureSession()
|
let session = AVCaptureSession()
|
||||||
session.sessionPreset = .high
|
session.sessionPreset = .high
|
||||||
|
|
||||||
guard let camera = Self.pickCamera(facing: facing) else {
|
guard let camera = Self.pickCamera(facing: facing, deviceId: deviceId) else {
|
||||||
throw CameraError.cameraUnavailable
|
throw CameraError.cameraUnavailable
|
||||||
}
|
}
|
||||||
let cameraInput = try AVCaptureDeviceInput(device: camera)
|
let cameraInput = try AVCaptureDeviceInput(device: camera)
|
||||||
@@ -188,7 +216,28 @@ actor CameraCaptureService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private nonisolated static func pickCamera(facing: CameraFacing) -> AVCaptureDevice? {
|
private nonisolated static func availableCameras() -> [AVCaptureDevice] {
|
||||||
|
let types: [AVCaptureDevice.DeviceType] = [
|
||||||
|
.builtInWideAngleCamera,
|
||||||
|
.externalUnknown,
|
||||||
|
.continuityCamera,
|
||||||
|
]
|
||||||
|
let session = AVCaptureDevice.DiscoverySession(
|
||||||
|
deviceTypes: types,
|
||||||
|
mediaType: .video,
|
||||||
|
position: .unspecified)
|
||||||
|
return session.devices
|
||||||
|
}
|
||||||
|
|
||||||
|
private nonisolated static func pickCamera(
|
||||||
|
facing: CameraFacing,
|
||||||
|
deviceId: String?) -> AVCaptureDevice?
|
||||||
|
{
|
||||||
|
if let deviceId, !deviceId.isEmpty {
|
||||||
|
if let match = Self.availableCameras().first(where: { $0.uniqueID == deviceId }) {
|
||||||
|
return match
|
||||||
|
}
|
||||||
|
}
|
||||||
let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back
|
let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back
|
||||||
|
|
||||||
if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) {
|
if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) {
|
||||||
@@ -269,6 +318,20 @@ actor CameraCaptureService {
|
|||||||
try? await Task.sleep(nanoseconds: stepNs)
|
try? await Task.sleep(nanoseconds: stepNs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private func sleepDelayMs(_ delayMs: Int) async {
|
||||||
|
guard delayMs > 0 else { return }
|
||||||
|
let ns = UInt64(min(delayMs, 10_000)) * 1_000_000
|
||||||
|
try? await Task.sleep(nanoseconds: ns)
|
||||||
|
}
|
||||||
|
|
||||||
|
private nonisolated static func positionLabel(_ position: AVCaptureDevice.Position) -> String {
|
||||||
|
switch position {
|
||||||
|
case .front: "front"
|
||||||
|
case .back: "back"
|
||||||
|
default: "unspecified"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate {
|
private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate {
|
||||||
|
|||||||
@@ -135,6 +135,7 @@ final class MacNodeModeCoordinator {
|
|||||||
|
|
||||||
let capsSet = Set(caps)
|
let capsSet = Set(caps)
|
||||||
if capsSet.contains(ClawdisCapability.camera.rawValue) {
|
if capsSet.contains(ClawdisCapability.camera.rawValue) {
|
||||||
|
commands.append(ClawdisCameraCommand.list.rawValue)
|
||||||
commands.append(ClawdisCameraCommand.snap.rawValue)
|
commands.append(ClawdisCameraCommand.snap.rawValue)
|
||||||
commands.append(ClawdisCameraCommand.clip.rawValue)
|
commands.append(ClawdisCameraCommand.clip.rawValue)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -103,10 +103,13 @@ actor MacNodeRuntime {
|
|||||||
}
|
}
|
||||||
let params = (try? Self.decodeParams(ClawdisCameraSnapParams.self, from: req.paramsJSON)) ??
|
let params = (try? Self.decodeParams(ClawdisCameraSnapParams.self, from: req.paramsJSON)) ??
|
||||||
ClawdisCameraSnapParams()
|
ClawdisCameraSnapParams()
|
||||||
|
let delayMs = min(10_000, max(0, params.delayMs ?? 2000))
|
||||||
let res = try await self.cameraCapture.snap(
|
let res = try await self.cameraCapture.snap(
|
||||||
facing: CameraFacing(rawValue: params.facing?.rawValue ?? "") ?? .front,
|
facing: CameraFacing(rawValue: params.facing?.rawValue ?? "") ?? .front,
|
||||||
maxWidth: params.maxWidth,
|
maxWidth: params.maxWidth,
|
||||||
quality: params.quality)
|
quality: params.quality,
|
||||||
|
deviceId: params.deviceId,
|
||||||
|
delayMs: delayMs)
|
||||||
struct SnapPayload: Encodable {
|
struct SnapPayload: Encodable {
|
||||||
var format: String
|
var format: String
|
||||||
var base64: String
|
var base64: String
|
||||||
@@ -135,6 +138,7 @@ actor MacNodeRuntime {
|
|||||||
facing: CameraFacing(rawValue: params.facing?.rawValue ?? "") ?? .front,
|
facing: CameraFacing(rawValue: params.facing?.rawValue ?? "") ?? .front,
|
||||||
durationMs: params.durationMs,
|
durationMs: params.durationMs,
|
||||||
includeAudio: params.includeAudio ?? true,
|
includeAudio: params.includeAudio ?? true,
|
||||||
|
deviceId: params.deviceId,
|
||||||
outPath: nil)
|
outPath: nil)
|
||||||
defer { try? FileManager.default.removeItem(atPath: res.path) }
|
defer { try? FileManager.default.removeItem(atPath: res.path) }
|
||||||
let data = try Data(contentsOf: URL(fileURLWithPath: res.path))
|
let data = try Data(contentsOf: URL(fileURLWithPath: res.path))
|
||||||
@@ -151,6 +155,19 @@ actor MacNodeRuntime {
|
|||||||
hasAudio: res.hasAudio))
|
hasAudio: res.hasAudio))
|
||||||
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
|
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
|
||||||
|
|
||||||
|
case ClawdisCameraCommand.list.rawValue:
|
||||||
|
guard Self.cameraEnabled() else {
|
||||||
|
return BridgeInvokeResponse(
|
||||||
|
id: req.id,
|
||||||
|
ok: false,
|
||||||
|
error: ClawdisNodeError(
|
||||||
|
code: .unavailable,
|
||||||
|
message: "CAMERA_DISABLED: enable Camera in Settings"))
|
||||||
|
}
|
||||||
|
let devices = await self.cameraCapture.listDevices()
|
||||||
|
let payload = try Self.encodePayload(["devices": devices])
|
||||||
|
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
|
||||||
|
|
||||||
case MacNodeScreenCommand.record.rawValue:
|
case MacNodeScreenCommand.record.rawValue:
|
||||||
let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ??
|
let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ??
|
||||||
MacNodeScreenRecordParams()
|
MacNodeScreenRecordParams()
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import Foundation
|
import Foundation
|
||||||
|
|
||||||
public enum ClawdisCameraCommand: String, Codable, Sendable {
|
public enum ClawdisCameraCommand: String, Codable, Sendable {
|
||||||
|
case list = "camera.list"
|
||||||
case snap = "camera.snap"
|
case snap = "camera.snap"
|
||||||
case clip = "camera.clip"
|
case clip = "camera.clip"
|
||||||
}
|
}
|
||||||
@@ -24,17 +25,23 @@ public struct ClawdisCameraSnapParams: Codable, Sendable, Equatable {
|
|||||||
public var maxWidth: Int?
|
public var maxWidth: Int?
|
||||||
public var quality: Double?
|
public var quality: Double?
|
||||||
public var format: ClawdisCameraImageFormat?
|
public var format: ClawdisCameraImageFormat?
|
||||||
|
public var deviceId: String?
|
||||||
|
public var delayMs: Int?
|
||||||
|
|
||||||
public init(
|
public init(
|
||||||
facing: ClawdisCameraFacing? = nil,
|
facing: ClawdisCameraFacing? = nil,
|
||||||
maxWidth: Int? = nil,
|
maxWidth: Int? = nil,
|
||||||
quality: Double? = nil,
|
quality: Double? = nil,
|
||||||
format: ClawdisCameraImageFormat? = nil)
|
format: ClawdisCameraImageFormat? = nil,
|
||||||
|
deviceId: String? = nil,
|
||||||
|
delayMs: Int? = nil)
|
||||||
{
|
{
|
||||||
self.facing = facing
|
self.facing = facing
|
||||||
self.maxWidth = maxWidth
|
self.maxWidth = maxWidth
|
||||||
self.quality = quality
|
self.quality = quality
|
||||||
self.format = format
|
self.format = format
|
||||||
|
self.deviceId = deviceId
|
||||||
|
self.delayMs = delayMs
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -43,16 +50,19 @@ public struct ClawdisCameraClipParams: Codable, Sendable, Equatable {
|
|||||||
public var durationMs: Int?
|
public var durationMs: Int?
|
||||||
public var includeAudio: Bool?
|
public var includeAudio: Bool?
|
||||||
public var format: ClawdisCameraVideoFormat?
|
public var format: ClawdisCameraVideoFormat?
|
||||||
|
public var deviceId: String?
|
||||||
|
|
||||||
public init(
|
public init(
|
||||||
facing: ClawdisCameraFacing? = nil,
|
facing: ClawdisCameraFacing? = nil,
|
||||||
durationMs: Int? = nil,
|
durationMs: Int? = nil,
|
||||||
includeAudio: Bool? = nil,
|
includeAudio: Bool? = nil,
|
||||||
format: ClawdisCameraVideoFormat? = nil)
|
format: ClawdisCameraVideoFormat? = nil,
|
||||||
|
deviceId: String? = nil)
|
||||||
{
|
{
|
||||||
self.facing = facing
|
self.facing = facing
|
||||||
self.durationMs = durationMs
|
self.durationMs = durationMs
|
||||||
self.includeAudio = includeAudio
|
self.includeAudio = includeAudio
|
||||||
self.format = format
|
self.format = format
|
||||||
|
self.deviceId = deviceId
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,12 +25,18 @@ All camera access is gated behind **user-controlled settings**.
|
|||||||
|
|
||||||
### Commands (via Gateway `node.invoke`)
|
### Commands (via Gateway `node.invoke`)
|
||||||
|
|
||||||
|
- `camera.list`
|
||||||
|
- Response payload:
|
||||||
|
- `devices`: array of `{ id, name, position, deviceType }`
|
||||||
|
|
||||||
- `camera.snap`
|
- `camera.snap`
|
||||||
- Params:
|
- Params:
|
||||||
- `facing`: `front|back` (default: `front`)
|
- `facing`: `front|back` (default: `front`)
|
||||||
- `maxWidth`: number (optional; default `1600` on the iOS node)
|
- `maxWidth`: number (optional; default `1600` on the iOS node)
|
||||||
- `quality`: `0..1` (optional; default `0.9`)
|
- `quality`: `0..1` (optional; default `0.9`)
|
||||||
- `format`: currently `jpg`
|
- `format`: currently `jpg`
|
||||||
|
- `delayMs`: number (optional; default `0`)
|
||||||
|
- `deviceId`: string (optional; from `camera.list`)
|
||||||
- Response payload:
|
- Response payload:
|
||||||
- `format: "jpg"`
|
- `format: "jpg"`
|
||||||
- `base64: "<...>"`
|
- `base64: "<...>"`
|
||||||
@@ -43,6 +49,7 @@ All camera access is gated behind **user-controlled settings**.
|
|||||||
- `durationMs`: number (default `3000`, clamped to a max of `60000`)
|
- `durationMs`: number (default `3000`, clamped to a max of `60000`)
|
||||||
- `includeAudio`: boolean (default `true`)
|
- `includeAudio`: boolean (default `true`)
|
||||||
- `format`: currently `mp4`
|
- `format`: currently `mp4`
|
||||||
|
- `deviceId`: string (optional; from `camera.list`)
|
||||||
- Response payload:
|
- Response payload:
|
||||||
- `format: "mp4"`
|
- `format: "mp4"`
|
||||||
- `base64: "<...>"`
|
- `base64: "<...>"`
|
||||||
@@ -112,15 +119,20 @@ Use the main `clawdis` CLI to invoke camera commands on the macOS node.
|
|||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
clawdis nodes camera list --node <id> # list camera ids
|
||||||
clawdis nodes camera snap --node <id> # prints MEDIA:<path>
|
clawdis nodes camera snap --node <id> # prints MEDIA:<path>
|
||||||
clawdis nodes camera snap --node <id> --max-width 1280
|
clawdis nodes camera snap --node <id> --max-width 1280
|
||||||
|
clawdis nodes camera snap --node <id> --delay-ms 2000
|
||||||
|
clawdis nodes camera snap --node <id> --device-id <id>
|
||||||
clawdis nodes camera clip --node <id> --duration 10s # prints MEDIA:<path>
|
clawdis nodes camera clip --node <id> --duration 10s # prints MEDIA:<path>
|
||||||
clawdis nodes camera clip --node <id> --duration-ms 3000 # prints MEDIA:<path> (legacy flag)
|
clawdis nodes camera clip --node <id> --duration-ms 3000 # prints MEDIA:<path> (legacy flag)
|
||||||
|
clawdis nodes camera clip --node <id> --device-id <id>
|
||||||
clawdis nodes camera clip --node <id> --no-audio
|
clawdis nodes camera clip --node <id> --no-audio
|
||||||
```
|
```
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
- `clawdis nodes camera snap` defaults to `maxWidth=1600` unless overridden.
|
- `clawdis nodes camera snap` defaults to `maxWidth=1600` unless overridden.
|
||||||
|
- On macOS, `camera.snap` waits `delayMs` (default 2000ms) after warm-up/exposure settle before capturing.
|
||||||
- Photo payloads are recompressed to keep base64 under 5 MB.
|
- Photo payloads are recompressed to keep base64 under 5 MB.
|
||||||
|
|
||||||
## Safety + practical limits
|
## Safety + practical limits
|
||||||
|
|||||||
@@ -746,6 +746,7 @@ const CanvasToolSchema = Type.Union([
|
|||||||
),
|
),
|
||||||
maxWidth: Type.Optional(Type.Number()),
|
maxWidth: Type.Optional(Type.Number()),
|
||||||
quality: Type.Optional(Type.Number()),
|
quality: Type.Optional(Type.Number()),
|
||||||
|
delayMs: Type.Optional(Type.Number()),
|
||||||
}),
|
}),
|
||||||
Type.Object({
|
Type.Object({
|
||||||
action: Type.Literal("a2ui_push"),
|
action: Type.Literal("a2ui_push"),
|
||||||
@@ -864,6 +865,20 @@ function createCanvasTool(): AnyAgentTool {
|
|||||||
Number.isFinite(params.quality)
|
Number.isFinite(params.quality)
|
||||||
? params.quality
|
? params.quality
|
||||||
: undefined;
|
: undefined;
|
||||||
|
const delayMs =
|
||||||
|
typeof params.delayMs === "number" &&
|
||||||
|
Number.isFinite(params.delayMs)
|
||||||
|
? params.delayMs
|
||||||
|
: undefined;
|
||||||
|
const deviceId =
|
||||||
|
typeof params.deviceId === "string" && params.deviceId.trim()
|
||||||
|
? params.deviceId.trim()
|
||||||
|
: undefined;
|
||||||
|
const delayMs =
|
||||||
|
typeof params.delayMs === "number" &&
|
||||||
|
Number.isFinite(params.delayMs)
|
||||||
|
? params.delayMs
|
||||||
|
: undefined;
|
||||||
const raw = (await invoke("canvas.snapshot", {
|
const raw = (await invoke("canvas.snapshot", {
|
||||||
format,
|
format,
|
||||||
maxWidth,
|
maxWidth,
|
||||||
@@ -978,6 +993,15 @@ const NodesToolSchema = Type.Union([
|
|||||||
),
|
),
|
||||||
maxWidth: Type.Optional(Type.Number()),
|
maxWidth: Type.Optional(Type.Number()),
|
||||||
quality: Type.Optional(Type.Number()),
|
quality: Type.Optional(Type.Number()),
|
||||||
|
delayMs: Type.Optional(Type.Number()),
|
||||||
|
deviceId: Type.Optional(Type.String()),
|
||||||
|
}),
|
||||||
|
Type.Object({
|
||||||
|
action: Type.Literal("camera_list"),
|
||||||
|
gatewayUrl: Type.Optional(Type.String()),
|
||||||
|
gatewayToken: Type.Optional(Type.String()),
|
||||||
|
timeoutMs: Type.Optional(Type.Number()),
|
||||||
|
node: Type.String(),
|
||||||
}),
|
}),
|
||||||
Type.Object({
|
Type.Object({
|
||||||
action: Type.Literal("camera_clip"),
|
action: Type.Literal("camera_clip"),
|
||||||
@@ -991,6 +1015,7 @@ const NodesToolSchema = Type.Union([
|
|||||||
duration: Type.Optional(Type.String()),
|
duration: Type.Optional(Type.String()),
|
||||||
durationMs: Type.Optional(Type.Number()),
|
durationMs: Type.Optional(Type.Number()),
|
||||||
includeAudio: Type.Optional(Type.Boolean()),
|
includeAudio: Type.Optional(Type.Boolean()),
|
||||||
|
deviceId: Type.Optional(Type.String()),
|
||||||
}),
|
}),
|
||||||
Type.Object({
|
Type.Object({
|
||||||
action: Type.Literal("screen_record"),
|
action: Type.Literal("screen_record"),
|
||||||
@@ -1127,6 +1152,8 @@ function createNodesTool(): AnyAgentTool {
|
|||||||
maxWidth,
|
maxWidth,
|
||||||
quality,
|
quality,
|
||||||
format: "jpg",
|
format: "jpg",
|
||||||
|
delayMs,
|
||||||
|
deviceId,
|
||||||
},
|
},
|
||||||
idempotencyKey: crypto.randomUUID(),
|
idempotencyKey: crypto.randomUUID(),
|
||||||
})) as { payload?: unknown };
|
})) as { payload?: unknown };
|
||||||
@@ -1155,6 +1182,21 @@ function createNodesTool(): AnyAgentTool {
|
|||||||
const result: AgentToolResult<unknown> = { content, details };
|
const result: AgentToolResult<unknown> = { content, details };
|
||||||
return await sanitizeToolResultImages(result, "nodes:camera_snap");
|
return await sanitizeToolResultImages(result, "nodes:camera_snap");
|
||||||
}
|
}
|
||||||
|
case "camera_list": {
|
||||||
|
const node = readStringParam(params, "node", { required: true });
|
||||||
|
const nodeId = await resolveNodeId(gatewayOpts, node);
|
||||||
|
const raw = (await callGatewayTool("node.invoke", gatewayOpts, {
|
||||||
|
nodeId,
|
||||||
|
command: "camera.list",
|
||||||
|
params: {},
|
||||||
|
idempotencyKey: crypto.randomUUID(),
|
||||||
|
})) as { payload?: unknown };
|
||||||
|
const payload =
|
||||||
|
raw && typeof raw.payload === "object" && raw.payload !== null
|
||||||
|
? raw.payload
|
||||||
|
: {};
|
||||||
|
return jsonResult(payload);
|
||||||
|
}
|
||||||
case "camera_clip": {
|
case "camera_clip": {
|
||||||
const node = readStringParam(params, "node", { required: true });
|
const node = readStringParam(params, "node", { required: true });
|
||||||
const nodeId = await resolveNodeId(gatewayOpts, node);
|
const nodeId = await resolveNodeId(gatewayOpts, node);
|
||||||
@@ -1176,6 +1218,10 @@ function createNodesTool(): AnyAgentTool {
|
|||||||
typeof params.includeAudio === "boolean"
|
typeof params.includeAudio === "boolean"
|
||||||
? params.includeAudio
|
? params.includeAudio
|
||||||
: true;
|
: true;
|
||||||
|
const deviceId =
|
||||||
|
typeof params.deviceId === "string" && params.deviceId.trim()
|
||||||
|
? params.deviceId.trim()
|
||||||
|
: undefined;
|
||||||
const raw = (await callGatewayTool("node.invoke", gatewayOpts, {
|
const raw = (await callGatewayTool("node.invoke", gatewayOpts, {
|
||||||
nodeId,
|
nodeId,
|
||||||
command: "camera.clip",
|
command: "camera.clip",
|
||||||
@@ -1184,6 +1230,7 @@ function createNodesTool(): AnyAgentTool {
|
|||||||
durationMs,
|
durationMs,
|
||||||
includeAudio,
|
includeAudio,
|
||||||
format: "mp4",
|
format: "mp4",
|
||||||
|
deviceId,
|
||||||
},
|
},
|
||||||
idempotencyKey: crypto.randomUUID(),
|
idempotencyKey: crypto.randomUUID(),
|
||||||
})) as { payload?: unknown };
|
})) as { payload?: unknown };
|
||||||
|
|||||||
@@ -846,14 +846,69 @@ export function registerNodesCli(program: Command) {
|
|||||||
{ timeoutMs: 60_000 },
|
{ timeoutMs: 60_000 },
|
||||||
);
|
);
|
||||||
|
|
||||||
|
nodesCallOpts(
|
||||||
|
camera
|
||||||
|
.command("list")
|
||||||
|
.description("List available cameras on a node")
|
||||||
|
.requiredOption("--node <idOrNameOrIp>", "Node id, name, or IP")
|
||||||
|
.action(async (opts: NodesRpcOpts) => {
|
||||||
|
try {
|
||||||
|
const nodeId = await resolveNodeId(opts, String(opts.node ?? ""));
|
||||||
|
const raw = (await callGatewayCli("node.invoke", opts, {
|
||||||
|
nodeId,
|
||||||
|
command: "camera.list",
|
||||||
|
params: {},
|
||||||
|
idempotencyKey: randomIdempotencyKey(),
|
||||||
|
})) as unknown;
|
||||||
|
|
||||||
|
const res =
|
||||||
|
typeof raw === "object" && raw !== null
|
||||||
|
? (raw as { payload?: unknown })
|
||||||
|
: {};
|
||||||
|
const payload =
|
||||||
|
typeof res.payload === "object" && res.payload !== null
|
||||||
|
? (res.payload as { devices?: unknown })
|
||||||
|
: {};
|
||||||
|
const devices = Array.isArray(payload.devices)
|
||||||
|
? (payload.devices as Array<Record<string, unknown>>)
|
||||||
|
: [];
|
||||||
|
|
||||||
|
if (opts.json) {
|
||||||
|
defaultRuntime.log(JSON.stringify({ devices }, null, 2));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (devices.length === 0) {
|
||||||
|
defaultRuntime.log("No cameras reported.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const device of devices) {
|
||||||
|
const id = typeof device.id === "string" ? device.id : "";
|
||||||
|
const name =
|
||||||
|
typeof device.name === "string" ? device.name : "Unknown Camera";
|
||||||
|
const position =
|
||||||
|
typeof device.position === "string" ? device.position : "unspecified";
|
||||||
|
defaultRuntime.log(`${name} (${position})${id ? ` — ${id}` : ""}`);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
defaultRuntime.error(`nodes camera list failed: ${String(err)}`);
|
||||||
|
defaultRuntime.exit(1);
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
{ timeoutMs: 60_000 },
|
||||||
|
);
|
||||||
|
|
||||||
nodesCallOpts(
|
nodesCallOpts(
|
||||||
camera
|
camera
|
||||||
.command("snap")
|
.command("snap")
|
||||||
.description("Capture a photo from a node camera (prints MEDIA:<path>)")
|
.description("Capture a photo from a node camera (prints MEDIA:<path>)")
|
||||||
.requiredOption("--node <idOrNameOrIp>", "Node id, name, or IP")
|
.requiredOption("--node <idOrNameOrIp>", "Node id, name, or IP")
|
||||||
.option("--facing <front|back|both>", "Camera facing", "both")
|
.option("--facing <front|back|both>", "Camera facing", "both")
|
||||||
|
.option("--device-id <id>", "Camera device id (from nodes camera list)")
|
||||||
.option("--max-width <px>", "Max width in px (optional)")
|
.option("--max-width <px>", "Max width in px (optional)")
|
||||||
.option("--quality <0-1>", "JPEG quality (default 0.9)")
|
.option("--quality <0-1>", "JPEG quality (default 0.9)")
|
||||||
|
.option("--delay-ms <ms>", "Delay before capture in ms (macOS default 2000)")
|
||||||
.option(
|
.option(
|
||||||
"--invoke-timeout <ms>",
|
"--invoke-timeout <ms>",
|
||||||
"Node invoke timeout in ms (default 20000)",
|
"Node invoke timeout in ms (default 20000)",
|
||||||
@@ -882,6 +937,10 @@ export function registerNodesCli(program: Command) {
|
|||||||
const quality = opts.quality
|
const quality = opts.quality
|
||||||
? Number.parseFloat(String(opts.quality))
|
? Number.parseFloat(String(opts.quality))
|
||||||
: undefined;
|
: undefined;
|
||||||
|
const delayMs = opts.delayMs
|
||||||
|
? Number.parseInt(String(opts.delayMs), 10)
|
||||||
|
: undefined;
|
||||||
|
const deviceId = opts.deviceId ? String(opts.deviceId).trim() : undefined;
|
||||||
const timeoutMs = opts.invokeTimeout
|
const timeoutMs = opts.invokeTimeout
|
||||||
? Number.parseInt(String(opts.invokeTimeout), 10)
|
? Number.parseInt(String(opts.invokeTimeout), 10)
|
||||||
: undefined;
|
: undefined;
|
||||||
@@ -902,6 +961,8 @@ export function registerNodesCli(program: Command) {
|
|||||||
maxWidth: Number.isFinite(maxWidth) ? maxWidth : undefined,
|
maxWidth: Number.isFinite(maxWidth) ? maxWidth : undefined,
|
||||||
quality: Number.isFinite(quality) ? quality : undefined,
|
quality: Number.isFinite(quality) ? quality : undefined,
|
||||||
format: "jpg",
|
format: "jpg",
|
||||||
|
delayMs: Number.isFinite(delayMs) ? delayMs : undefined,
|
||||||
|
deviceId: deviceId || undefined,
|
||||||
},
|
},
|
||||||
idempotencyKey: randomIdempotencyKey(),
|
idempotencyKey: randomIdempotencyKey(),
|
||||||
};
|
};
|
||||||
@@ -955,6 +1016,7 @@ export function registerNodesCli(program: Command) {
|
|||||||
)
|
)
|
||||||
.requiredOption("--node <idOrNameOrIp>", "Node id, name, or IP")
|
.requiredOption("--node <idOrNameOrIp>", "Node id, name, or IP")
|
||||||
.option("--facing <front|back>", "Camera facing", "front")
|
.option("--facing <front|back>", "Camera facing", "front")
|
||||||
|
.option("--device-id <id>", "Camera device id (from nodes camera list)")
|
||||||
.option(
|
.option(
|
||||||
"--duration <ms|10s|1m>",
|
"--duration <ms|10s|1m>",
|
||||||
"Duration (default 3000ms; supports ms/s/m, e.g. 10s)",
|
"Duration (default 3000ms; supports ms/s/m, e.g. 10s)",
|
||||||
@@ -975,18 +1037,20 @@ export function registerNodesCli(program: Command) {
|
|||||||
const timeoutMs = opts.invokeTimeout
|
const timeoutMs = opts.invokeTimeout
|
||||||
? Number.parseInt(String(opts.invokeTimeout), 10)
|
? Number.parseInt(String(opts.invokeTimeout), 10)
|
||||||
: undefined;
|
: undefined;
|
||||||
|
const deviceId = opts.deviceId ? String(opts.deviceId).trim() : undefined;
|
||||||
|
|
||||||
const invokeParams: Record<string, unknown> = {
|
const invokeParams: Record<string, unknown> = {
|
||||||
nodeId,
|
nodeId,
|
||||||
command: "camera.clip",
|
command: "camera.clip",
|
||||||
params: {
|
params: {
|
||||||
facing,
|
facing,
|
||||||
durationMs: Number.isFinite(durationMs) ? durationMs : undefined,
|
durationMs: Number.isFinite(durationMs) ? durationMs : undefined,
|
||||||
includeAudio,
|
includeAudio,
|
||||||
format: "mp4",
|
format: "mp4",
|
||||||
},
|
deviceId: deviceId || undefined,
|
||||||
idempotencyKey: randomIdempotencyKey(),
|
},
|
||||||
};
|
idempotencyKey: randomIdempotencyKey(),
|
||||||
|
};
|
||||||
if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) {
|
if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) {
|
||||||
invokeParams.timeoutMs = timeoutMs;
|
invokeParams.timeoutMs = timeoutMs;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user