macOS: add screen record + safer camera defaults
This commit is contained in:
@@ -206,7 +206,7 @@ actor CameraCaptureService {
|
||||
|
||||
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
|
||||
let v = ms ?? 3000
|
||||
return min(15000, max(250, v))
|
||||
return min(60_000, max(250, v))
|
||||
}
|
||||
|
||||
private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws {
|
||||
|
||||
@@ -5,6 +5,7 @@ import OSLog
|
||||
|
||||
enum ControlRequestHandler {
|
||||
private static let cameraCapture = CameraCaptureService()
|
||||
@MainActor private static let screenRecorder = ScreenRecordService()
|
||||
|
||||
struct NodeListNode: Codable {
|
||||
var nodeId: String
|
||||
@@ -133,6 +134,13 @@ enum ControlRequestHandler {
|
||||
durationMs: durationMs,
|
||||
includeAudio: includeAudio,
|
||||
outPath: outPath)
|
||||
|
||||
case let .screenRecord(screenIndex, durationMs, fps, outPath):
|
||||
return await self.handleScreenRecord(
|
||||
screenIndex: screenIndex,
|
||||
durationMs: durationMs,
|
||||
fps: fps,
|
||||
outPath: outPath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,7 +233,7 @@ enum ControlRequestHandler {
|
||||
}
|
||||
|
||||
private static func cameraEnabled() -> Bool {
|
||||
UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? true
|
||||
UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false
|
||||
}
|
||||
|
||||
private static func handleCanvasShow(
|
||||
@@ -534,4 +542,28 @@ enum ControlRequestHandler {
|
||||
return Response(ok: false, message: error.localizedDescription)
|
||||
}
|
||||
}
|
||||
|
||||
private static func handleScreenRecord(
|
||||
screenIndex: Int?,
|
||||
durationMs: Int?,
|
||||
fps: Double?,
|
||||
outPath: String?) async -> Response
|
||||
{
|
||||
let authorized = await PermissionManager
|
||||
.ensure([.screenRecording], interactive: false)[.screenRecording] ?? false
|
||||
guard authorized else { return Response(ok: false, message: "screen recording permission missing") }
|
||||
|
||||
do {
|
||||
let path = try await Task { @MainActor in
|
||||
try await self.screenRecorder.record(
|
||||
screenIndex: screenIndex,
|
||||
durationMs: durationMs,
|
||||
fps: fps,
|
||||
outPath: outPath)
|
||||
}.value
|
||||
return Response(ok: true, message: path)
|
||||
} catch {
|
||||
return Response(ok: false, message: error.localizedDescription)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ import SwiftUI
|
||||
|
||||
struct GeneralSettings: View {
|
||||
@Bindable var state: AppState
|
||||
@AppStorage(cameraEnabledKey) private var cameraEnabled: Bool = true
|
||||
@AppStorage(cameraEnabledKey) private var cameraEnabled: Bool = false
|
||||
private let healthStore = HealthStore.shared
|
||||
private let gatewayManager = GatewayProcessManager.shared
|
||||
// swiftlint:disable:next inclusive_language
|
||||
|
||||
209
apps/macos/Sources/Clawdis/ScreenRecordService.swift
Normal file
209
apps/macos/Sources/Clawdis/ScreenRecordService.swift
Normal file
@@ -0,0 +1,209 @@
|
||||
import AVFoundation
|
||||
import Foundation
|
||||
import OSLog
|
||||
@preconcurrency import ScreenCaptureKit
|
||||
|
||||
@MainActor
|
||||
final class ScreenRecordService {
|
||||
enum ScreenRecordError: LocalizedError {
|
||||
case noDisplays
|
||||
case invalidScreenIndex(Int)
|
||||
case noFramesCaptured
|
||||
case writeFailed(String)
|
||||
|
||||
var errorDescription: String? {
|
||||
switch self {
|
||||
case .noDisplays:
|
||||
"No displays available for screen recording"
|
||||
case let .invalidScreenIndex(idx):
|
||||
"Invalid screen index \(idx)"
|
||||
case .noFramesCaptured:
|
||||
"No frames captured"
|
||||
case let .writeFailed(msg):
|
||||
msg
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "screenRecord")
|
||||
|
||||
func record(
|
||||
screenIndex: Int?,
|
||||
durationMs: Int?,
|
||||
fps: Double?,
|
||||
outPath: String?) async throws -> String
|
||||
{
|
||||
let durationMs = Self.clampDurationMs(durationMs)
|
||||
let fps = Self.clampFps(fps)
|
||||
|
||||
let outURL: URL = {
|
||||
if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||||
return URL(fileURLWithPath: outPath)
|
||||
}
|
||||
return FileManager.default.temporaryDirectory
|
||||
.appendingPathComponent("clawdis-screen-record-\(UUID().uuidString).mp4")
|
||||
}()
|
||||
try? FileManager.default.removeItem(at: outURL)
|
||||
|
||||
let content = try await SCShareableContent.current
|
||||
let displays = content.displays.sorted { $0.displayID < $1.displayID }
|
||||
guard !displays.isEmpty else { throw ScreenRecordError.noDisplays }
|
||||
|
||||
let idx = screenIndex ?? 0
|
||||
guard idx >= 0, idx < displays.count else { throw ScreenRecordError.invalidScreenIndex(idx) }
|
||||
let display = displays[idx]
|
||||
|
||||
let filter = SCContentFilter(display: display, excludingWindows: [])
|
||||
let config = SCStreamConfiguration()
|
||||
config.width = display.width
|
||||
config.height = display.height
|
||||
config.queueDepth = 8
|
||||
config.showsCursor = true
|
||||
config.minimumFrameInterval = CMTime(value: 1, timescale: CMTimeScale(max(1, Int32(fps.rounded()))))
|
||||
|
||||
let recorder = try StreamRecorder(
|
||||
outputURL: outURL,
|
||||
width: display.width,
|
||||
height: display.height,
|
||||
logger: self.logger)
|
||||
|
||||
let stream = SCStream(filter: filter, configuration: config, delegate: recorder)
|
||||
try stream.addStreamOutput(recorder, type: .screen, sampleHandlerQueue: recorder.queue)
|
||||
|
||||
self.logger.info(
|
||||
"screen record start idx=\(idx) durationMs=\(durationMs) fps=\(fps) out=\(outURL.path, privacy: .public)")
|
||||
|
||||
var started = false
|
||||
do {
|
||||
try await stream.startCapture()
|
||||
started = true
|
||||
try await Task.sleep(nanoseconds: UInt64(durationMs) * 1_000_000)
|
||||
try await stream.stopCapture()
|
||||
} catch {
|
||||
if started { try? await stream.stopCapture() }
|
||||
throw error
|
||||
}
|
||||
|
||||
try await recorder.finish()
|
||||
return outURL.path
|
||||
}
|
||||
|
||||
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
|
||||
let v = ms ?? 10_000
|
||||
return min(60_000, max(250, v))
|
||||
}
|
||||
|
||||
private nonisolated static func clampFps(_ fps: Double?) -> Double {
|
||||
let v = fps ?? 10
|
||||
if !v.isFinite { return 10 }
|
||||
return min(60, max(1, v))
|
||||
}
|
||||
}
|
||||
|
||||
private final class StreamRecorder: NSObject, SCStreamOutput, SCStreamDelegate, @unchecked Sendable {
|
||||
let queue = DispatchQueue(label: "com.steipete.clawdis.screenRecord.writer")
|
||||
|
||||
private let logger: Logger
|
||||
private let writer: AVAssetWriter
|
||||
private let input: AVAssetWriterInput
|
||||
|
||||
private var started = false
|
||||
private var sawFrame = false
|
||||
private var didFinish = false
|
||||
private var pendingErrorMessage: String?
|
||||
|
||||
init(outputURL: URL, width: Int, height: Int, logger: Logger) throws {
|
||||
self.logger = logger
|
||||
self.writer = try AVAssetWriter(outputURL: outputURL, fileType: .mp4)
|
||||
|
||||
let settings: [String: Any] = [
|
||||
AVVideoCodecKey: AVVideoCodecType.h264,
|
||||
AVVideoWidthKey: width,
|
||||
AVVideoHeightKey: height,
|
||||
]
|
||||
self.input = AVAssetWriterInput(mediaType: .video, outputSettings: settings)
|
||||
self.input.expectsMediaDataInRealTime = true
|
||||
|
||||
guard self.writer.canAdd(self.input) else {
|
||||
throw ScreenRecordService.ScreenRecordError.writeFailed("Cannot add video input")
|
||||
}
|
||||
self.writer.add(self.input)
|
||||
super.init()
|
||||
}
|
||||
|
||||
func stream(_ stream: SCStream, didStopWithError error: any Error) {
|
||||
self.queue.async {
|
||||
let msg = String(describing: error)
|
||||
self.pendingErrorMessage = msg
|
||||
self.logger.error("screen record stream stopped with error: \(msg, privacy: .public)")
|
||||
_ = stream
|
||||
}
|
||||
}
|
||||
|
||||
func stream(
|
||||
_ stream: SCStream,
|
||||
didOutputSampleBuffer sampleBuffer: CMSampleBuffer,
|
||||
of type: SCStreamOutputType)
|
||||
{
|
||||
guard type == .screen else { return }
|
||||
guard CMSampleBufferDataIsReady(sampleBuffer) else { return }
|
||||
// Callback runs on `sampleHandlerQueue` (`self.queue`).
|
||||
self.handle(sampleBuffer: sampleBuffer)
|
||||
_ = stream
|
||||
}
|
||||
|
||||
private func handle(sampleBuffer: CMSampleBuffer) {
|
||||
if let msg = self.pendingErrorMessage {
|
||||
self.logger.error("screen record aborting due to prior error: \(msg, privacy: .public)")
|
||||
return
|
||||
}
|
||||
if self.didFinish { return }
|
||||
|
||||
if !self.started {
|
||||
guard self.writer.startWriting() else {
|
||||
self.pendingErrorMessage = self.writer.error?.localizedDescription ?? "Failed to start writer"
|
||||
return
|
||||
}
|
||||
let pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
|
||||
self.writer.startSession(atSourceTime: pts)
|
||||
self.started = true
|
||||
}
|
||||
|
||||
self.sawFrame = true
|
||||
if self.input.isReadyForMoreMediaData {
|
||||
_ = self.input.append(sampleBuffer)
|
||||
}
|
||||
}
|
||||
|
||||
func finish() async throws {
|
||||
try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Void, Error>) in
|
||||
self.queue.async {
|
||||
if let msg = self.pendingErrorMessage {
|
||||
cont.resume(throwing: ScreenRecordService.ScreenRecordError.writeFailed(msg))
|
||||
return
|
||||
}
|
||||
guard self.started, self.sawFrame else {
|
||||
cont.resume(throwing: ScreenRecordService.ScreenRecordError.noFramesCaptured)
|
||||
return
|
||||
}
|
||||
if self.didFinish {
|
||||
cont.resume()
|
||||
return
|
||||
}
|
||||
self.didFinish = true
|
||||
|
||||
self.input.markAsFinished()
|
||||
self.writer.finishWriting {
|
||||
if let err = self.writer.error {
|
||||
cont.resume(throwing: ScreenRecordService.ScreenRecordError.writeFailed(err.localizedDescription))
|
||||
} else if self.writer.status != .completed {
|
||||
cont.resume(throwing: ScreenRecordService.ScreenRecordError.writeFailed("Failed to finalize video"))
|
||||
} else {
|
||||
cont.resume()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,11 +96,62 @@ struct ClawdisCLI {
|
||||
case "camera":
|
||||
return try self.parseCamera(args: &args)
|
||||
|
||||
case "screen":
|
||||
return try self.parseScreen(args: &args)
|
||||
|
||||
default:
|
||||
throw CLIError.help
|
||||
}
|
||||
}
|
||||
|
||||
private static func parseDurationMsArg(_ raw: String?) throws -> Int? {
|
||||
guard let raw else { return nil }
|
||||
let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
|
||||
if trimmed.isEmpty { return nil }
|
||||
|
||||
let regex = try NSRegularExpression(pattern: "^(\\d+(?:\\.\\d+)?)(ms|s|m)?$")
|
||||
let range = NSRange(trimmed.startIndex..<trimmed.endIndex, in: trimmed)
|
||||
guard let match = regex.firstMatch(in: trimmed, range: range) else {
|
||||
throw NSError(domain: "ClawdisCLI", code: 3, userInfo: [
|
||||
NSLocalizedDescriptionKey: "invalid duration: \(raw) (expected 1000, 10s, 1m)",
|
||||
])
|
||||
}
|
||||
|
||||
guard let valueRange = Range(match.range(at: 1), in: trimmed) else {
|
||||
throw NSError(domain: "ClawdisCLI", code: 3, userInfo: [
|
||||
NSLocalizedDescriptionKey: "invalid duration: \(raw) (expected 1000, 10s, 1m)",
|
||||
])
|
||||
}
|
||||
let value = Double(trimmed[valueRange]) ?? Double.nan
|
||||
guard value.isFinite, value >= 0 else {
|
||||
throw NSError(domain: "ClawdisCLI", code: 3, userInfo: [
|
||||
NSLocalizedDescriptionKey: "invalid duration: \(raw) (expected 1000, 10s, 1m)",
|
||||
])
|
||||
}
|
||||
|
||||
let unit: String = {
|
||||
if let unitRange = Range(match.range(at: 2), in: trimmed) {
|
||||
return String(trimmed[unitRange])
|
||||
}
|
||||
return "ms"
|
||||
}()
|
||||
|
||||
let multiplier: Double = switch unit {
|
||||
case "ms": 1
|
||||
case "s": 1000
|
||||
case "m": 60_000
|
||||
default: 1
|
||||
}
|
||||
|
||||
let ms = Int((value * multiplier).rounded())
|
||||
guard ms >= 0 else {
|
||||
throw NSError(domain: "ClawdisCLI", code: 3, userInfo: [
|
||||
NSLocalizedDescriptionKey: "invalid duration: \(raw) (expected 1000, 10s, 1m)",
|
||||
])
|
||||
}
|
||||
return ms
|
||||
}
|
||||
|
||||
private static func parseNotify(args: inout [String]) throws -> ParsedCLIRequest {
|
||||
var title: String?
|
||||
var body: String?
|
||||
@@ -392,6 +443,8 @@ struct ClawdisCLI {
|
||||
switch arg {
|
||||
case "--facing":
|
||||
if let val = args.popFirst(), let f = CameraFacing(rawValue: val) { facing = f }
|
||||
case "--duration":
|
||||
durationMs = try self.parseDurationMsArg(args.popFirst())
|
||||
case "--duration-ms":
|
||||
durationMs = args.popFirst().flatMap(Int.init)
|
||||
case "--no-audio":
|
||||
@@ -415,6 +468,40 @@ struct ClawdisCLI {
|
||||
}
|
||||
}
|
||||
|
||||
private static func parseScreen(args: inout [String]) throws -> ParsedCLIRequest {
|
||||
guard let sub = args.popFirst() else { throw CLIError.help }
|
||||
switch sub {
|
||||
case "record":
|
||||
var screenIndex: Int?
|
||||
var durationMs: Int?
|
||||
var fps: Double?
|
||||
var outPath: String?
|
||||
while !args.isEmpty {
|
||||
let arg = args.removeFirst()
|
||||
switch arg {
|
||||
case "--screen":
|
||||
screenIndex = args.popFirst().flatMap(Int.init)
|
||||
case "--duration":
|
||||
durationMs = try self.parseDurationMsArg(args.popFirst())
|
||||
case "--duration-ms":
|
||||
durationMs = args.popFirst().flatMap(Int.init)
|
||||
case "--fps":
|
||||
fps = args.popFirst().flatMap(Double.init)
|
||||
case "--out":
|
||||
outPath = args.popFirst()
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
return ParsedCLIRequest(
|
||||
request: .screenRecord(screenIndex: screenIndex, durationMs: durationMs, fps: fps, outPath: outPath),
|
||||
kind: .mediaPath)
|
||||
|
||||
default:
|
||||
throw CLIError.help
|
||||
}
|
||||
}
|
||||
|
||||
private static func parseCanvasPlacement(
|
||||
args: inout [String],
|
||||
session: inout String,
|
||||
@@ -674,7 +761,12 @@ struct ClawdisCLI {
|
||||
|
||||
Camera:
|
||||
clawdis-mac camera snap [--facing <front|back>] [--max-width <px>] [--quality <0-1>] [--out <path>]
|
||||
clawdis-mac camera clip [--facing <front|back>] [--duration-ms <ms>] [--no-audio] [--out <path>]
|
||||
clawdis-mac camera clip [--facing <front|back>]
|
||||
[--duration <ms|10s|1m>|--duration-ms <ms>] [--no-audio] [--out <path>]
|
||||
|
||||
Screen:
|
||||
clawdis-mac screen record [--screen <index>]
|
||||
[--duration <ms|10s|1m>|--duration-ms <ms>] [--fps <n>] [--out <path>]
|
||||
|
||||
Browser (clawd):
|
||||
clawdis-mac browser status|start|stop|tabs|open|focus|close|screenshot|eval|query|dom|snapshot
|
||||
@@ -703,7 +795,7 @@ struct ClawdisCLI {
|
||||
Output:
|
||||
Default output is text. Use --json for machine-readable output.
|
||||
In text mode, `browser screenshot` prints MEDIA:<path>.
|
||||
In text mode, `camera snap` and `camera clip` print MEDIA:<path>.
|
||||
In text mode, `camera snap`, `camera clip`, and `screen record` print MEDIA:<path>.
|
||||
"""
|
||||
print(usage)
|
||||
}
|
||||
@@ -904,10 +996,16 @@ struct ClawdisCLI {
|
||||
switch request {
|
||||
case let .runShell(_, _, _, timeoutSec, _):
|
||||
// Allow longer for commands; still cap overall to a sane bound.
|
||||
min(300, max(10, (timeoutSec ?? 10) + 2))
|
||||
return min(300, max(10, (timeoutSec ?? 10) + 2))
|
||||
case let .cameraClip(_, durationMs, _, _):
|
||||
let ms = durationMs ?? 3000
|
||||
return min(180, max(10, TimeInterval(ms) / 1000.0 + 10))
|
||||
case let .screenRecord(_, durationMs, _, _):
|
||||
let ms = durationMs ?? 10_000
|
||||
return min(180, max(10, TimeInterval(ms) / 1000.0 + 10))
|
||||
default:
|
||||
// Fail-fast so callers (incl. SSH tool calls) don't hang forever.
|
||||
10
|
||||
return 10
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -132,6 +132,7 @@ public enum Request: Sendable {
|
||||
case nodeInvoke(nodeId: String, command: String, paramsJSON: String?)
|
||||
case cameraSnap(facing: CameraFacing?, maxWidth: Int?, quality: Double?, outPath: String?)
|
||||
case cameraClip(facing: CameraFacing?, durationMs: Int?, includeAudio: Bool, outPath: String?)
|
||||
case screenRecord(screenIndex: Int?, durationMs: Int?, fps: Double?, outPath: String?)
|
||||
}
|
||||
|
||||
// MARK: - Responses
|
||||
@@ -162,6 +163,8 @@ extension Request: Codable {
|
||||
case path
|
||||
case javaScript
|
||||
case outPath
|
||||
case screenIndex
|
||||
case fps
|
||||
case canvasA2UICommand
|
||||
case jsonl
|
||||
case facing
|
||||
@@ -192,6 +195,7 @@ extension Request: Codable {
|
||||
case nodeInvoke
|
||||
case cameraSnap
|
||||
case cameraClip
|
||||
case screenRecord
|
||||
}
|
||||
|
||||
public func encode(to encoder: Encoder) throws {
|
||||
@@ -284,6 +288,13 @@ extension Request: Codable {
|
||||
try container.encodeIfPresent(durationMs, forKey: .durationMs)
|
||||
try container.encode(includeAudio, forKey: .includeAudio)
|
||||
try container.encodeIfPresent(outPath, forKey: .outPath)
|
||||
|
||||
case let .screenRecord(screenIndex, durationMs, fps, outPath):
|
||||
try container.encode(Kind.screenRecord, forKey: .type)
|
||||
try container.encodeIfPresent(screenIndex, forKey: .screenIndex)
|
||||
try container.encodeIfPresent(durationMs, forKey: .durationMs)
|
||||
try container.encodeIfPresent(fps, forKey: .fps)
|
||||
try container.encodeIfPresent(outPath, forKey: .outPath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -378,6 +389,13 @@ extension Request: Codable {
|
||||
let includeAudio = (try? container.decode(Bool.self, forKey: .includeAudio)) ?? true
|
||||
let outPath = try container.decodeIfPresent(String.self, forKey: .outPath)
|
||||
self = .cameraClip(facing: facing, durationMs: durationMs, includeAudio: includeAudio, outPath: outPath)
|
||||
|
||||
case .screenRecord:
|
||||
let screenIndex = try container.decodeIfPresent(Int.self, forKey: .screenIndex)
|
||||
let durationMs = try container.decodeIfPresent(Int.self, forKey: .durationMs)
|
||||
let fps = try container.decodeIfPresent(Double.self, forKey: .fps)
|
||||
let outPath = try container.decodeIfPresent(String.self, forKey: .outPath)
|
||||
self = .screenRecord(screenIndex: screenIndex, durationMs: durationMs, fps: fps, outPath: outPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user