feat: add node screen recording across apps

This commit is contained in:
Peter Steinberger
2025-12-19 02:56:48 +01:00
parent b8012a2281
commit 7f3be083c1
20 changed files with 837 additions and 22 deletions

View File

@@ -156,7 +156,7 @@ final class BridgeConnectionController {
}
private func currentCaps() -> [String] {
var caps = [ClawdisCapability.canvas.rawValue]
var caps = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
// Default-on: if the key doesn't exist yet, treat it as enabled.
let cameraEnabled =
@@ -181,6 +181,7 @@ final class BridgeConnectionController {
ClawdisCanvasA2UICommand.push.rawValue,
ClawdisCanvasA2UICommand.pushJSONL.rawValue,
ClawdisCanvasA2UICommand.reset.rawValue,
ClawdisScreenCommand.record.rawValue,
]
let caps = Set(self.currentCaps())

View File

@@ -17,6 +17,7 @@ final class NodeAppModel {
var isBackgrounded: Bool = false
let screen = ScreenController()
let camera = CameraController()
private let screenRecorder = ScreenRecordService()
var bridgeStatusText: String = "Offline"
var bridgeServerName: String?
var bridgeRemoteAddress: String?
@@ -364,13 +365,15 @@ final class NodeAppModel {
private func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse {
let command = req.command
if command.hasPrefix("canvas.") || command.hasPrefix("camera."), self.isBackgrounded {
if (command.hasPrefix("canvas.") || command.hasPrefix("camera.") || command.hasPrefix("screen.")),
self.isBackgrounded
{
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(
code: .backgroundUnavailable,
message: "NODE_BACKGROUND_UNAVAILABLE: canvas/camera commands require foreground"))
message: "NODE_BACKGROUND_UNAVAILABLE: canvas/camera/screen commands require foreground"))
}
if command.hasPrefix("camera."), !self.isCameraEnabled() {
@@ -524,6 +527,36 @@ final class NodeAppModel {
self.showCameraHUD(text: "Clip captured", kind: .success, autoHideSeconds: 1.8)
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case ClawdisScreenCommand.record.rawValue:
let params = (try? Self.decodeParams(ClawdisScreenRecordParams.self, from: req.paramsJSON)) ??
ClawdisScreenRecordParams()
if let format = params.format, format.lowercased() != "mp4" {
throw NSError(domain: "Screen", code: 30, userInfo: [
NSLocalizedDescriptionKey: "INVALID_REQUEST: screen format must be mp4",
])
}
let path = try await self.screenRecorder.record(
screenIndex: params.screenIndex,
durationMs: params.durationMs,
fps: params.fps,
outPath: nil)
defer { try? FileManager.default.removeItem(atPath: path) }
let data = try Data(contentsOf: URL(fileURLWithPath: path))
struct Payload: Codable {
var format: String
var base64: String
var durationMs: Int?
var fps: Double?
var screenIndex: Int?
}
let payload = try Self.encodePayload(Payload(
format: "mp4",
base64: data.base64EncodedString(),
durationMs: params.durationMs,
fps: params.fps,
screenIndex: params.screenIndex))
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
default:
return BridgeInvokeResponse(
id: req.id,

View File

@@ -0,0 +1,205 @@
import AVFoundation
import UIKit
@MainActor
final class ScreenRecordService {
enum ScreenRecordError: LocalizedError {
case noWindow
case invalidScreenIndex(Int)
case captureFailed(String)
case writeFailed(String)
var errorDescription: String? {
switch self {
case .noWindow:
return "Screen capture unavailable"
case let .invalidScreenIndex(idx):
return "Invalid screen index \(idx)"
case let .captureFailed(msg):
return msg
case let .writeFailed(msg):
return msg
}
}
}
func record(
screenIndex: Int?,
durationMs: Int?,
fps: Double?,
outPath: String?) async throws -> String
{
let durationMs = Self.clampDurationMs(durationMs)
let fps = Self.clampFps(fps)
let fpsInt = Int32(fps.rounded())
let fpsValue = Double(fpsInt)
let outURL: URL = {
if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
return URL(fileURLWithPath: outPath)
}
return FileManager.default.temporaryDirectory
.appendingPathComponent("clawdis-screen-record-\(UUID().uuidString).mp4")
}()
try? FileManager.default.removeItem(at: outURL)
if let idx = screenIndex, idx != 0 {
throw ScreenRecordError.invalidScreenIndex(idx)
}
guard let window = Self.resolveKeyWindow() else {
throw ScreenRecordError.noWindow
}
let size = window.bounds.size
let scale = window.screen.scale
let widthPx = max(1, Int(size.width * scale))
let heightPx = max(1, Int(size.height * scale))
let writer = try AVAssetWriter(outputURL: outURL, fileType: .mp4)
let settings: [String: Any] = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: widthPx,
AVVideoHeightKey: heightPx,
]
let input = AVAssetWriterInput(mediaType: .video, outputSettings: settings)
input.expectsMediaDataInRealTime = false
let attrs: [String: Any] = [
kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA,
kCVPixelBufferWidthKey as String: widthPx,
kCVPixelBufferHeightKey as String: heightPx,
kCVPixelBufferCGImageCompatibilityKey as String: true,
kCVPixelBufferCGBitmapContextCompatibilityKey as String: true,
]
let adaptor = AVAssetWriterInputPixelBufferAdaptor(
assetWriterInput: input,
sourcePixelBufferAttributes: attrs)
guard writer.canAdd(input) else {
throw ScreenRecordError.writeFailed("Cannot add video input")
}
writer.add(input)
guard writer.startWriting() else {
throw ScreenRecordError.writeFailed(writer.error?.localizedDescription ?? "Failed to start writer")
}
writer.startSession(atSourceTime: .zero)
let frameCount = max(1, Int((Double(durationMs) / 1000.0 * fpsValue).rounded(.up)))
let frameDuration = CMTime(value: 1, timescale: fpsInt)
let frameSleepNs = UInt64(1_000_000_000.0 / fpsValue)
for frame in 0..<frameCount {
while !input.isReadyForMoreMediaData {
try await Task.sleep(nanoseconds: 10_000_000)
}
var frameError: Error?
autoreleasepool {
do {
guard let image = Self.captureImage(window: window, size: size) else {
throw ScreenRecordError.captureFailed("Failed to capture frame")
}
guard let buffer = Self.pixelBuffer(from: image, width: widthPx, height: heightPx) else {
throw ScreenRecordError.captureFailed("Failed to render frame")
}
let time = CMTimeMultiply(frameDuration, multiplier: Int32(frame))
if !adaptor.append(buffer, withPresentationTime: time) {
throw ScreenRecordError.writeFailed("Failed to append frame")
}
} catch {
frameError = error
}
}
if let frameError { throw frameError }
if frame < frameCount - 1 {
try await Task.sleep(nanoseconds: frameSleepNs)
}
}
input.markAsFinished()
try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Void, Error>) in
writer.finishWriting {
if let err = writer.error {
cont.resume(throwing: ScreenRecordError.writeFailed(err.localizedDescription))
} else if writer.status != .completed {
cont.resume(throwing: ScreenRecordError.writeFailed("Failed to finalize video"))
} else {
cont.resume()
}
}
}
return outURL.path
}
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
let v = ms ?? 10_000
return min(60_000, max(250, v))
}
private nonisolated static func clampFps(_ fps: Double?) -> Double {
let v = fps ?? 10
if !v.isFinite { return 10 }
return min(30, max(1, v))
}
private nonisolated static func resolveKeyWindow() -> UIWindow? {
let scenes = UIApplication.shared.connectedScenes
for scene in scenes {
guard let windowScene = scene as? UIWindowScene else { continue }
if let window = windowScene.windows.first(where: { $0.isKeyWindow }) {
return window
}
if let window = windowScene.windows.first {
return window
}
}
return nil
}
private nonisolated static func captureImage(window: UIWindow, size: CGSize) -> CGImage? {
let format = UIGraphicsImageRendererFormat()
format.scale = window.screen.scale
let renderer = UIGraphicsImageRenderer(size: size, format: format)
let image = renderer.image { _ in
window.drawHierarchy(in: CGRect(origin: .zero, size: size), afterScreenUpdates: false)
}
return image.cgImage
}
private nonisolated static func pixelBuffer(from image: CGImage, width: Int, height: Int) -> CVPixelBuffer? {
var buffer: CVPixelBuffer?
let status = CVPixelBufferCreate(
kCFAllocatorDefault,
width,
height,
kCVPixelFormatType_32BGRA,
[
kCVPixelBufferCGImageCompatibilityKey: true,
kCVPixelBufferCGBitmapContextCompatibilityKey: true,
] as CFDictionary,
&buffer)
guard status == kCVReturnSuccess, let buffer else { return nil }
CVPixelBufferLockBaseAddress(buffer, [])
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
guard let context = CGContext(
data: CVPixelBufferGetBaseAddress(buffer),
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: CVPixelBufferGetBytesPerRow(buffer),
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue
) else {
return nil
}
context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
return buffer
}
}

View File

@@ -283,7 +283,7 @@ struct SettingsTab: View {
}
private func currentCaps() -> [String] {
var caps = [ClawdisCapability.canvas.rawValue]
var caps = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
let cameraEnabled =
UserDefaults.standard.object(forKey: "camera.enabled") == nil
@@ -307,6 +307,7 @@ struct SettingsTab: View {
ClawdisCanvasA2UICommand.push.rawValue,
ClawdisCanvasA2UICommand.pushJSONL.rawValue,
ClawdisCanvasA2UICommand.reset.rawValue,
ClawdisScreenCommand.record.rawValue,
]
let caps = Set(self.currentCaps())