feat: add camera list and device selection

This commit is contained in:
Peter Steinberger
2026-01-02 18:23:26 +01:00
parent 2b34bf08da
commit 74db53d939
12 changed files with 293 additions and 18 deletions

View File

@@ -247,6 +247,7 @@ final class BridgeConnectionController {
let caps = Set(self.currentCaps())
if caps.contains(ClawdisCapability.camera.rawValue) {
commands.append(ClawdisCameraCommand.list.rawValue)
commands.append(ClawdisCameraCommand.snap.rawValue)
commands.append(ClawdisCameraCommand.clip.rawValue)
}

View File

@@ -3,6 +3,13 @@ import ClawdisKit
import Foundation
actor CameraController {
struct CameraDeviceInfo: Codable, Sendable {
var id: String
var name: String
var position: String
var deviceType: String
}
enum CameraError: LocalizedError, Sendable {
case cameraUnavailable
case microphoneUnavailable
@@ -41,13 +48,14 @@ actor CameraController {
// If you need the full-res photo, explicitly request a larger maxWidth.
let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600
let quality = Self.clampQuality(params.quality)
let delayMs = max(0, params.delayMs ?? 0)
try await self.ensureAccess(for: .video)
let session = AVCaptureSession()
session.sessionPreset = .photo
guard let device = Self.pickCamera(facing: facing) else {
guard let device = Self.pickCamera(facing: facing, deviceId: params.deviceId) else {
throw CameraError.cameraUnavailable
}
@@ -67,6 +75,7 @@ actor CameraController {
session.startRunning()
defer { session.stopRunning() }
await Self.warmUpCaptureSession()
await Self.sleepDelayMs(delayMs)
let settings: AVCapturePhotoSettings = {
if output.availablePhotoCodecTypes.contains(.jpeg) {
@@ -119,7 +128,7 @@ actor CameraController {
let session = AVCaptureSession()
session.sessionPreset = .high
guard let camera = Self.pickCamera(facing: facing) else {
guard let camera = Self.pickCamera(facing: facing, deviceId: params.deviceId) else {
throw CameraError.cameraUnavailable
}
let cameraInput = try AVCaptureDeviceInput(device: camera)
@@ -180,6 +189,24 @@ actor CameraController {
hasAudio: includeAudio)
}
func listDevices() -> [CameraDeviceInfo] {
let types: [AVCaptureDevice.DeviceType] = [
.builtInWideAngleCamera,
.externalUnknown,
]
let session = AVCaptureDevice.DiscoverySession(
deviceTypes: types,
mediaType: .video,
position: .unspecified)
return session.devices.map { device in
CameraDeviceInfo(
id: device.uniqueID,
name: device.localizedName,
position: Self.positionLabel(device.position),
deviceType: device.deviceType.rawValue)
}
}
private func ensureAccess(for mediaType: AVMediaType) async throws {
let status = AVCaptureDevice.authorizationStatus(for: mediaType)
switch status {
@@ -201,7 +228,15 @@ actor CameraController {
}
}
private nonisolated static func pickCamera(facing: ClawdisCameraFacing) -> AVCaptureDevice? {
private nonisolated static func pickCamera(
facing: ClawdisCameraFacing,
deviceId: String?) -> AVCaptureDevice?
{
if let deviceId, !deviceId.isEmpty {
if let match = AVCaptureDevice.devices(for: .video).first(where: { $0.uniqueID == deviceId }) {
return match
}
}
let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back
if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) {
return device
@@ -210,6 +245,14 @@ actor CameraController {
return AVCaptureDevice.default(for: .video)
}
private nonisolated static func positionLabel(_ position: AVCaptureDevice.Position) -> String {
switch position {
case .front: "front"
case .back: "back"
default: "unspecified"
}
}
nonisolated static func clampQuality(_ quality: Double?) -> Double {
let q = quality ?? 0.9
return min(1.0, max(0.05, q))
@@ -262,6 +305,12 @@ actor CameraController {
// A short delay after `startRunning()` significantly reduces "blank first frame" captures on some devices.
try? await Task.sleep(nanoseconds: 150_000_000) // 150ms
}
private nonisolated static func sleepDelayMs(_ delayMs: Int) async {
guard delayMs > 0 else { return }
let ns = UInt64(min(delayMs, 10_000)) * 1_000_000
try? await Task.sleep(nanoseconds: ns)
}
}
private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate {

View File

@@ -589,6 +589,14 @@ final class NodeAppModel {
let resultJSON = try await self.screen.eval(javaScript: js)
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: resultJSON)
case ClawdisCameraCommand.list.rawValue:
let devices = await self.camera.listDevices()
struct Payload: Codable {
var devices: [CameraController.CameraDeviceInfo]
}
let payload = try Self.encodePayload(Payload(devices: devices))
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case ClawdisCameraCommand.snap.rawValue:
self.showCameraHUD(text: "Taking photo…", kind: .photo)
self.triggerCameraFlash()

View File

@@ -333,6 +333,7 @@ struct SettingsTab: View {
let caps = Set(self.currentCaps())
if caps.contains(ClawdisCapability.camera.rawValue) {
commands.append(ClawdisCameraCommand.list.rawValue)
commands.append(ClawdisCameraCommand.snap.rawValue)
commands.append(ClawdisCameraCommand.clip.rawValue)
}

View File

@@ -6,6 +6,13 @@ import Foundation
import OSLog
actor CameraCaptureService {
struct CameraDeviceInfo: Encodable, Sendable {
let id: String
let name: String
let position: String
let deviceType: String
}
enum CameraError: LocalizedError, Sendable {
case cameraUnavailable
case microphoneUnavailable
@@ -31,18 +38,36 @@ actor CameraCaptureService {
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "camera")
func snap(facing: CameraFacing?, maxWidth: Int?, quality: Double?) async throws -> (data: Data, size: CGSize) {
func listDevices() -> [CameraDeviceInfo] {
Self.availableCameras().map { device in
CameraDeviceInfo(
id: device.uniqueID,
name: device.localizedName,
position: Self.positionLabel(device.position),
deviceType: device.deviceType.rawValue)
}
}
func snap(
facing: CameraFacing?,
maxWidth: Int?,
quality: Double?,
deviceId: String?,
delayMs: Int) async throws -> (data: Data, size: CGSize)
{
let facing = facing ?? .front
let normalized = Self.normalizeSnap(maxWidth: maxWidth, quality: quality)
let maxWidth = normalized.maxWidth
let quality = normalized.quality
let delayMs = max(0, delayMs)
let deviceId = deviceId?.trimmingCharacters(in: .whitespacesAndNewlines)
try await self.ensureAccess(for: .video)
let session = AVCaptureSession()
session.sessionPreset = .photo
guard let device = Self.pickCamera(facing: facing) else {
guard let device = Self.pickCamera(facing: facing, deviceId: deviceId) else {
throw CameraError.cameraUnavailable
}
@@ -63,6 +88,7 @@ actor CameraCaptureService {
defer { session.stopRunning() }
await Self.warmUpCaptureSession()
await self.waitForExposureAndWhiteBalance(device: device)
await self.sleepDelayMs(delayMs)
let settings: AVCapturePhotoSettings = {
if output.availablePhotoCodecTypes.contains(.jpeg) {
@@ -95,10 +121,12 @@ actor CameraCaptureService {
facing: CameraFacing?,
durationMs: Int?,
includeAudio: Bool,
deviceId: String?,
outPath: String?) async throws -> (path: String, durationMs: Int, hasAudio: Bool)
{
let facing = facing ?? .front
let durationMs = Self.clampDurationMs(durationMs)
let deviceId = deviceId?.trimmingCharacters(in: .whitespacesAndNewlines)
try await self.ensureAccess(for: .video)
if includeAudio {
@@ -108,7 +136,7 @@ actor CameraCaptureService {
let session = AVCaptureSession()
session.sessionPreset = .high
guard let camera = Self.pickCamera(facing: facing) else {
guard let camera = Self.pickCamera(facing: facing, deviceId: deviceId) else {
throw CameraError.cameraUnavailable
}
let cameraInput = try AVCaptureDeviceInput(device: camera)
@@ -188,7 +216,28 @@ actor CameraCaptureService {
}
}
private nonisolated static func pickCamera(facing: CameraFacing) -> AVCaptureDevice? {
private nonisolated static func availableCameras() -> [AVCaptureDevice] {
let types: [AVCaptureDevice.DeviceType] = [
.builtInWideAngleCamera,
.externalUnknown,
.continuityCamera,
]
let session = AVCaptureDevice.DiscoverySession(
deviceTypes: types,
mediaType: .video,
position: .unspecified)
return session.devices
}
private nonisolated static func pickCamera(
facing: CameraFacing,
deviceId: String?) -> AVCaptureDevice?
{
if let deviceId, !deviceId.isEmpty {
if let match = Self.availableCameras().first(where: { $0.uniqueID == deviceId }) {
return match
}
}
let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back
if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) {
@@ -269,6 +318,20 @@ actor CameraCaptureService {
try? await Task.sleep(nanoseconds: stepNs)
}
}
private func sleepDelayMs(_ delayMs: Int) async {
guard delayMs > 0 else { return }
let ns = UInt64(min(delayMs, 10_000)) * 1_000_000
try? await Task.sleep(nanoseconds: ns)
}
private nonisolated static func positionLabel(_ position: AVCaptureDevice.Position) -> String {
switch position {
case .front: "front"
case .back: "back"
default: "unspecified"
}
}
}
private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate {

View File

@@ -135,6 +135,7 @@ final class MacNodeModeCoordinator {
let capsSet = Set(caps)
if capsSet.contains(ClawdisCapability.camera.rawValue) {
commands.append(ClawdisCameraCommand.list.rawValue)
commands.append(ClawdisCameraCommand.snap.rawValue)
commands.append(ClawdisCameraCommand.clip.rawValue)
}

View File

@@ -103,10 +103,13 @@ actor MacNodeRuntime {
}
let params = (try? Self.decodeParams(ClawdisCameraSnapParams.self, from: req.paramsJSON)) ??
ClawdisCameraSnapParams()
let delayMs = min(10_000, max(0, params.delayMs ?? 2000))
let res = try await self.cameraCapture.snap(
facing: CameraFacing(rawValue: params.facing?.rawValue ?? "") ?? .front,
maxWidth: params.maxWidth,
quality: params.quality)
quality: params.quality,
deviceId: params.deviceId,
delayMs: delayMs)
struct SnapPayload: Encodable {
var format: String
var base64: String
@@ -135,6 +138,7 @@ actor MacNodeRuntime {
facing: CameraFacing(rawValue: params.facing?.rawValue ?? "") ?? .front,
durationMs: params.durationMs,
includeAudio: params.includeAudio ?? true,
deviceId: params.deviceId,
outPath: nil)
defer { try? FileManager.default.removeItem(atPath: res.path) }
let data = try Data(contentsOf: URL(fileURLWithPath: res.path))
@@ -151,6 +155,19 @@ actor MacNodeRuntime {
hasAudio: res.hasAudio))
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case ClawdisCameraCommand.list.rawValue:
guard Self.cameraEnabled() else {
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(
code: .unavailable,
message: "CAMERA_DISABLED: enable Camera in Settings"))
}
let devices = await self.cameraCapture.listDevices()
let payload = try Self.encodePayload(["devices": devices])
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case MacNodeScreenCommand.record.rawValue:
let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ??
MacNodeScreenRecordParams()

View File

@@ -1,6 +1,7 @@
import Foundation
public enum ClawdisCameraCommand: String, Codable, Sendable {
case list = "camera.list"
case snap = "camera.snap"
case clip = "camera.clip"
}
@@ -24,17 +25,23 @@ public struct ClawdisCameraSnapParams: Codable, Sendable, Equatable {
public var maxWidth: Int?
public var quality: Double?
public var format: ClawdisCameraImageFormat?
public var deviceId: String?
public var delayMs: Int?
public init(
facing: ClawdisCameraFacing? = nil,
maxWidth: Int? = nil,
quality: Double? = nil,
format: ClawdisCameraImageFormat? = nil)
format: ClawdisCameraImageFormat? = nil,
deviceId: String? = nil,
delayMs: Int? = nil)
{
self.facing = facing
self.maxWidth = maxWidth
self.quality = quality
self.format = format
self.deviceId = deviceId
self.delayMs = delayMs
}
}
@@ -43,16 +50,19 @@ public struct ClawdisCameraClipParams: Codable, Sendable, Equatable {
public var durationMs: Int?
public var includeAudio: Bool?
public var format: ClawdisCameraVideoFormat?
public var deviceId: String?
public init(
facing: ClawdisCameraFacing? = nil,
durationMs: Int? = nil,
includeAudio: Bool? = nil,
format: ClawdisCameraVideoFormat? = nil)
format: ClawdisCameraVideoFormat? = nil,
deviceId: String? = nil)
{
self.facing = facing
self.durationMs = durationMs
self.includeAudio = includeAudio
self.format = format
self.deviceId = deviceId
}
}