macOS: add screen record + safer camera defaults

This commit is contained in:
Peter Steinberger
2025-12-19 00:29:38 +01:00
parent 7831e0040e
commit 3772a29557
6 changed files with 364 additions and 7 deletions

View File

@@ -206,7 +206,7 @@ actor CameraCaptureService {
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
let v = ms ?? 3000
return min(15000, max(250, v))
return min(60_000, max(250, v))
}
private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws {

View File

@@ -5,6 +5,7 @@ import OSLog
enum ControlRequestHandler {
private static let cameraCapture = CameraCaptureService()
@MainActor private static let screenRecorder = ScreenRecordService()
struct NodeListNode: Codable {
var nodeId: String
@@ -133,6 +134,13 @@ enum ControlRequestHandler {
durationMs: durationMs,
includeAudio: includeAudio,
outPath: outPath)
case let .screenRecord(screenIndex, durationMs, fps, outPath):
return await self.handleScreenRecord(
screenIndex: screenIndex,
durationMs: durationMs,
fps: fps,
outPath: outPath)
}
}
@@ -225,7 +233,7 @@ enum ControlRequestHandler {
}
private static func cameraEnabled() -> Bool {
UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? true
UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false
}
private static func handleCanvasShow(
@@ -534,4 +542,28 @@ enum ControlRequestHandler {
return Response(ok: false, message: error.localizedDescription)
}
}
private static func handleScreenRecord(
screenIndex: Int?,
durationMs: Int?,
fps: Double?,
outPath: String?) async -> Response
{
let authorized = await PermissionManager
.ensure([.screenRecording], interactive: false)[.screenRecording] ?? false
guard authorized else { return Response(ok: false, message: "screen recording permission missing") }
do {
let path = try await Task { @MainActor in
try await self.screenRecorder.record(
screenIndex: screenIndex,
durationMs: durationMs,
fps: fps,
outPath: outPath)
}.value
return Response(ok: true, message: path)
} catch {
return Response(ok: false, message: error.localizedDescription)
}
}
}

View File

@@ -4,7 +4,7 @@ import SwiftUI
struct GeneralSettings: View {
@Bindable var state: AppState
@AppStorage(cameraEnabledKey) private var cameraEnabled: Bool = true
@AppStorage(cameraEnabledKey) private var cameraEnabled: Bool = false
private let healthStore = HealthStore.shared
private let gatewayManager = GatewayProcessManager.shared
// swiftlint:disable:next inclusive_language

View File

@@ -0,0 +1,209 @@
import AVFoundation
import Foundation
import OSLog
@preconcurrency import ScreenCaptureKit
@MainActor
final class ScreenRecordService {
enum ScreenRecordError: LocalizedError {
case noDisplays
case invalidScreenIndex(Int)
case noFramesCaptured
case writeFailed(String)
var errorDescription: String? {
switch self {
case .noDisplays:
"No displays available for screen recording"
case let .invalidScreenIndex(idx):
"Invalid screen index \(idx)"
case .noFramesCaptured:
"No frames captured"
case let .writeFailed(msg):
msg
}
}
}
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "screenRecord")
func record(
screenIndex: Int?,
durationMs: Int?,
fps: Double?,
outPath: String?) async throws -> String
{
let durationMs = Self.clampDurationMs(durationMs)
let fps = Self.clampFps(fps)
let outURL: URL = {
if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
return URL(fileURLWithPath: outPath)
}
return FileManager.default.temporaryDirectory
.appendingPathComponent("clawdis-screen-record-\(UUID().uuidString).mp4")
}()
try? FileManager.default.removeItem(at: outURL)
let content = try await SCShareableContent.current
let displays = content.displays.sorted { $0.displayID < $1.displayID }
guard !displays.isEmpty else { throw ScreenRecordError.noDisplays }
let idx = screenIndex ?? 0
guard idx >= 0, idx < displays.count else { throw ScreenRecordError.invalidScreenIndex(idx) }
let display = displays[idx]
let filter = SCContentFilter(display: display, excludingWindows: [])
let config = SCStreamConfiguration()
config.width = display.width
config.height = display.height
config.queueDepth = 8
config.showsCursor = true
config.minimumFrameInterval = CMTime(value: 1, timescale: CMTimeScale(max(1, Int32(fps.rounded()))))
let recorder = try StreamRecorder(
outputURL: outURL,
width: display.width,
height: display.height,
logger: self.logger)
let stream = SCStream(filter: filter, configuration: config, delegate: recorder)
try stream.addStreamOutput(recorder, type: .screen, sampleHandlerQueue: recorder.queue)
self.logger.info(
"screen record start idx=\(idx) durationMs=\(durationMs) fps=\(fps) out=\(outURL.path, privacy: .public)")
var started = false
do {
try await stream.startCapture()
started = true
try await Task.sleep(nanoseconds: UInt64(durationMs) * 1_000_000)
try await stream.stopCapture()
} catch {
if started { try? await stream.stopCapture() }
throw error
}
try await recorder.finish()
return outURL.path
}
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
let v = ms ?? 10_000
return min(60_000, max(250, v))
}
private nonisolated static func clampFps(_ fps: Double?) -> Double {
let v = fps ?? 10
if !v.isFinite { return 10 }
return min(60, max(1, v))
}
}
private final class StreamRecorder: NSObject, SCStreamOutput, SCStreamDelegate, @unchecked Sendable {
let queue = DispatchQueue(label: "com.steipete.clawdis.screenRecord.writer")
private let logger: Logger
private let writer: AVAssetWriter
private let input: AVAssetWriterInput
private var started = false
private var sawFrame = false
private var didFinish = false
private var pendingErrorMessage: String?
init(outputURL: URL, width: Int, height: Int, logger: Logger) throws {
self.logger = logger
self.writer = try AVAssetWriter(outputURL: outputURL, fileType: .mp4)
let settings: [String: Any] = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: width,
AVVideoHeightKey: height,
]
self.input = AVAssetWriterInput(mediaType: .video, outputSettings: settings)
self.input.expectsMediaDataInRealTime = true
guard self.writer.canAdd(self.input) else {
throw ScreenRecordService.ScreenRecordError.writeFailed("Cannot add video input")
}
self.writer.add(self.input)
super.init()
}
func stream(_ stream: SCStream, didStopWithError error: any Error) {
self.queue.async {
let msg = String(describing: error)
self.pendingErrorMessage = msg
self.logger.error("screen record stream stopped with error: \(msg, privacy: .public)")
_ = stream
}
}
func stream(
_ stream: SCStream,
didOutputSampleBuffer sampleBuffer: CMSampleBuffer,
of type: SCStreamOutputType)
{
guard type == .screen else { return }
guard CMSampleBufferDataIsReady(sampleBuffer) else { return }
// Callback runs on `sampleHandlerQueue` (`self.queue`).
self.handle(sampleBuffer: sampleBuffer)
_ = stream
}
private func handle(sampleBuffer: CMSampleBuffer) {
if let msg = self.pendingErrorMessage {
self.logger.error("screen record aborting due to prior error: \(msg, privacy: .public)")
return
}
if self.didFinish { return }
if !self.started {
guard self.writer.startWriting() else {
self.pendingErrorMessage = self.writer.error?.localizedDescription ?? "Failed to start writer"
return
}
let pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
self.writer.startSession(atSourceTime: pts)
self.started = true
}
self.sawFrame = true
if self.input.isReadyForMoreMediaData {
_ = self.input.append(sampleBuffer)
}
}
func finish() async throws {
try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Void, Error>) in
self.queue.async {
if let msg = self.pendingErrorMessage {
cont.resume(throwing: ScreenRecordService.ScreenRecordError.writeFailed(msg))
return
}
guard self.started, self.sawFrame else {
cont.resume(throwing: ScreenRecordService.ScreenRecordError.noFramesCaptured)
return
}
if self.didFinish {
cont.resume()
return
}
self.didFinish = true
self.input.markAsFinished()
self.writer.finishWriting {
if let err = self.writer.error {
cont.resume(throwing: ScreenRecordService.ScreenRecordError.writeFailed(err.localizedDescription))
} else if self.writer.status != .completed {
cont.resume(throwing: ScreenRecordService.ScreenRecordError.writeFailed("Failed to finalize video"))
} else {
cont.resume()
}
}
}
}
}
}

View File

@@ -96,11 +96,62 @@ struct ClawdisCLI {
case "camera":
return try self.parseCamera(args: &args)
case "screen":
return try self.parseScreen(args: &args)
default:
throw CLIError.help
}
}
private static func parseDurationMsArg(_ raw: String?) throws -> Int? {
guard let raw else { return nil }
let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
if trimmed.isEmpty { return nil }
let regex = try NSRegularExpression(pattern: "^(\\d+(?:\\.\\d+)?)(ms|s|m)?$")
let range = NSRange(trimmed.startIndex..<trimmed.endIndex, in: trimmed)
guard let match = regex.firstMatch(in: trimmed, range: range) else {
throw NSError(domain: "ClawdisCLI", code: 3, userInfo: [
NSLocalizedDescriptionKey: "invalid duration: \(raw) (expected 1000, 10s, 1m)",
])
}
guard let valueRange = Range(match.range(at: 1), in: trimmed) else {
throw NSError(domain: "ClawdisCLI", code: 3, userInfo: [
NSLocalizedDescriptionKey: "invalid duration: \(raw) (expected 1000, 10s, 1m)",
])
}
let value = Double(trimmed[valueRange]) ?? Double.nan
guard value.isFinite, value >= 0 else {
throw NSError(domain: "ClawdisCLI", code: 3, userInfo: [
NSLocalizedDescriptionKey: "invalid duration: \(raw) (expected 1000, 10s, 1m)",
])
}
let unit: String = {
if let unitRange = Range(match.range(at: 2), in: trimmed) {
return String(trimmed[unitRange])
}
return "ms"
}()
let multiplier: Double = switch unit {
case "ms": 1
case "s": 1000
case "m": 60_000
default: 1
}
let ms = Int((value * multiplier).rounded())
guard ms >= 0 else {
throw NSError(domain: "ClawdisCLI", code: 3, userInfo: [
NSLocalizedDescriptionKey: "invalid duration: \(raw) (expected 1000, 10s, 1m)",
])
}
return ms
}
private static func parseNotify(args: inout [String]) throws -> ParsedCLIRequest {
var title: String?
var body: String?
@@ -392,6 +443,8 @@ struct ClawdisCLI {
switch arg {
case "--facing":
if let val = args.popFirst(), let f = CameraFacing(rawValue: val) { facing = f }
case "--duration":
durationMs = try self.parseDurationMsArg(args.popFirst())
case "--duration-ms":
durationMs = args.popFirst().flatMap(Int.init)
case "--no-audio":
@@ -415,6 +468,40 @@ struct ClawdisCLI {
}
}
private static func parseScreen(args: inout [String]) throws -> ParsedCLIRequest {
guard let sub = args.popFirst() else { throw CLIError.help }
switch sub {
case "record":
var screenIndex: Int?
var durationMs: Int?
var fps: Double?
var outPath: String?
while !args.isEmpty {
let arg = args.removeFirst()
switch arg {
case "--screen":
screenIndex = args.popFirst().flatMap(Int.init)
case "--duration":
durationMs = try self.parseDurationMsArg(args.popFirst())
case "--duration-ms":
durationMs = args.popFirst().flatMap(Int.init)
case "--fps":
fps = args.popFirst().flatMap(Double.init)
case "--out":
outPath = args.popFirst()
default:
break
}
}
return ParsedCLIRequest(
request: .screenRecord(screenIndex: screenIndex, durationMs: durationMs, fps: fps, outPath: outPath),
kind: .mediaPath)
default:
throw CLIError.help
}
}
private static func parseCanvasPlacement(
args: inout [String],
session: inout String,
@@ -674,7 +761,12 @@ struct ClawdisCLI {
Camera:
clawdis-mac camera snap [--facing <front|back>] [--max-width <px>] [--quality <0-1>] [--out <path>]
clawdis-mac camera clip [--facing <front|back>] [--duration-ms <ms>] [--no-audio] [--out <path>]
clawdis-mac camera clip [--facing <front|back>]
[--duration <ms|10s|1m>|--duration-ms <ms>] [--no-audio] [--out <path>]
Screen:
clawdis-mac screen record [--screen <index>]
[--duration <ms|10s|1m>|--duration-ms <ms>] [--fps <n>] [--out <path>]
Browser (clawd):
clawdis-mac browser status|start|stop|tabs|open|focus|close|screenshot|eval|query|dom|snapshot
@@ -703,7 +795,7 @@ struct ClawdisCLI {
Output:
Default output is text. Use --json for machine-readable output.
In text mode, `browser screenshot` prints MEDIA:<path>.
In text mode, `camera snap` and `camera clip` print MEDIA:<path>.
In text mode, `camera snap`, `camera clip`, and `screen record` print MEDIA:<path>.
"""
print(usage)
}
@@ -904,10 +996,16 @@ struct ClawdisCLI {
switch request {
case let .runShell(_, _, _, timeoutSec, _):
// Allow longer for commands; still cap overall to a sane bound.
min(300, max(10, (timeoutSec ?? 10) + 2))
return min(300, max(10, (timeoutSec ?? 10) + 2))
case let .cameraClip(_, durationMs, _, _):
let ms = durationMs ?? 3000
return min(180, max(10, TimeInterval(ms) / 1000.0 + 10))
case let .screenRecord(_, durationMs, _, _):
let ms = durationMs ?? 10_000
return min(180, max(10, TimeInterval(ms) / 1000.0 + 10))
default:
// Fail-fast so callers (incl. SSH tool calls) don't hang forever.
10
return 10
}
}

View File

@@ -132,6 +132,7 @@ public enum Request: Sendable {
case nodeInvoke(nodeId: String, command: String, paramsJSON: String?)
case cameraSnap(facing: CameraFacing?, maxWidth: Int?, quality: Double?, outPath: String?)
case cameraClip(facing: CameraFacing?, durationMs: Int?, includeAudio: Bool, outPath: String?)
case screenRecord(screenIndex: Int?, durationMs: Int?, fps: Double?, outPath: String?)
}
// MARK: - Responses
@@ -162,6 +163,8 @@ extension Request: Codable {
case path
case javaScript
case outPath
case screenIndex
case fps
case canvasA2UICommand
case jsonl
case facing
@@ -192,6 +195,7 @@ extension Request: Codable {
case nodeInvoke
case cameraSnap
case cameraClip
case screenRecord
}
public func encode(to encoder: Encoder) throws {
@@ -284,6 +288,13 @@ extension Request: Codable {
try container.encodeIfPresent(durationMs, forKey: .durationMs)
try container.encode(includeAudio, forKey: .includeAudio)
try container.encodeIfPresent(outPath, forKey: .outPath)
case let .screenRecord(screenIndex, durationMs, fps, outPath):
try container.encode(Kind.screenRecord, forKey: .type)
try container.encodeIfPresent(screenIndex, forKey: .screenIndex)
try container.encodeIfPresent(durationMs, forKey: .durationMs)
try container.encodeIfPresent(fps, forKey: .fps)
try container.encodeIfPresent(outPath, forKey: .outPath)
}
}
@@ -378,6 +389,13 @@ extension Request: Codable {
let includeAudio = (try? container.decode(Bool.self, forKey: .includeAudio)) ?? true
let outPath = try container.decodeIfPresent(String.self, forKey: .outPath)
self = .cameraClip(facing: facing, durationMs: durationMs, includeAudio: includeAudio, outPath: outPath)
case .screenRecord:
let screenIndex = try container.decodeIfPresent(Int.self, forKey: .screenIndex)
let durationMs = try container.decodeIfPresent(Int.self, forKey: .durationMs)
let fps = try container.decodeIfPresent(Double.self, forKey: .fps)
let outPath = try container.decodeIfPresent(String.self, forKey: .outPath)
self = .screenRecord(screenIndex: screenIndex, durationMs: durationMs, fps: fps, outPath: outPath)
}
}
}