feat: add node screen recording across apps

This commit is contained in:
Peter Steinberger
2025-12-19 02:56:48 +01:00
parent b8012a2281
commit 7f3be083c1
20 changed files with 837 additions and 22 deletions

View File

@@ -4,6 +4,7 @@
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_DATA_SYNC" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PROJECTION" />
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
<uses-permission
android:name="android.permission.NEARBY_WIFI_DEVICES"
@@ -26,7 +27,7 @@
<service
android:name=".NodeForegroundService"
android:exported="false"
android:foregroundServiceType="dataSync|microphone" />
android:foregroundServiceType="dataSync|microphone|mediaProjection" />
<activity
android:name=".MainActivity"
android:exported="true">

View File

@@ -25,6 +25,7 @@ import kotlinx.coroutines.launch
class MainActivity : ComponentActivity() {
private val viewModel: MainViewModel by viewModels()
private lateinit var permissionRequester: PermissionRequester
private lateinit var screenCaptureRequester: ScreenCaptureRequester
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
@@ -35,8 +36,10 @@ class MainActivity : ComponentActivity() {
requestNotificationPermissionIfNeeded()
NodeForegroundService.start(this)
permissionRequester = PermissionRequester(this)
screenCaptureRequester = ScreenCaptureRequester(this)
viewModel.camera.attachLifecycleOwner(this)
viewModel.camera.attachPermissionRequester(permissionRequester)
viewModel.screenRecorder.attachScreenCaptureRequester(screenCaptureRequester)
lifecycleScope.launch {
repeatOnLifecycle(Lifecycle.State.STARTED) {

View File

@@ -6,6 +6,7 @@ import com.steipete.clawdis.node.bridge.BridgeEndpoint
import com.steipete.clawdis.node.chat.OutgoingAttachment
import com.steipete.clawdis.node.node.CameraCaptureManager
import com.steipete.clawdis.node.node.CanvasController
import com.steipete.clawdis.node.node.ScreenRecordManager
import kotlinx.coroutines.flow.StateFlow
class MainViewModel(app: Application) : AndroidViewModel(app) {
@@ -13,6 +14,7 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
val canvas: CanvasController = runtime.canvas
val camera: CameraCaptureManager = runtime.camera
val screenRecorder: ScreenRecordManager = runtime.screenRecorder
val bridges: StateFlow<List<BridgeEndpoint>> = runtime.bridges
val discoveryStatusText: StateFlow<String> = runtime.discoveryStatusText

View File

@@ -17,11 +17,13 @@ import com.steipete.clawdis.node.bridge.BridgePairingClient
import com.steipete.clawdis.node.bridge.BridgeSession
import com.steipete.clawdis.node.node.CameraCaptureManager
import com.steipete.clawdis.node.node.CanvasController
import com.steipete.clawdis.node.node.ScreenRecordManager
import com.steipete.clawdis.node.protocol.ClawdisCapability
import com.steipete.clawdis.node.protocol.ClawdisCameraCommand
import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UIAction
import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UICommand
import com.steipete.clawdis.node.protocol.ClawdisCanvasCommand
import com.steipete.clawdis.node.protocol.ClawdisScreenCommand
import com.steipete.clawdis.node.voice.VoiceWakeManager
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
@@ -51,6 +53,7 @@ class NodeRuntime(context: Context) {
val prefs = SecurePrefs(appContext)
val canvas = CanvasController()
val camera = CameraCaptureManager(appContext)
val screenRecorder = ScreenRecordManager(appContext)
private val json = Json { ignoreUnknownKeys = true }
private val externalAudioCaptureActive = MutableStateFlow(false)
@@ -287,6 +290,7 @@ class NodeRuntime(context: Context) {
add(ClawdisCanvasA2UICommand.Push.rawValue)
add(ClawdisCanvasA2UICommand.PushJSONL.rawValue)
add(ClawdisCanvasA2UICommand.Reset.rawValue)
add(ClawdisScreenCommand.Record.rawValue)
if (cameraEnabled.value) {
add(ClawdisCameraCommand.Snap.rawValue)
add(ClawdisCameraCommand.Clip.rawValue)
@@ -294,17 +298,18 @@ class NodeRuntime(context: Context) {
}
val resolved =
if (storedToken.isNullOrBlank()) {
_statusText.value = "Pairing…"
val caps = buildList {
add(ClawdisCapability.Canvas.rawValue)
if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
add(ClawdisCapability.VoiceWake.rawValue)
}
}
BridgePairingClient().pairAndHello(
endpoint = endpoint,
hello =
_statusText.value = "Pairing…"
val caps = buildList {
add(ClawdisCapability.Canvas.rawValue)
add(ClawdisCapability.Screen.rawValue)
if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
add(ClawdisCapability.VoiceWake.rawValue)
}
}
BridgePairingClient().pairAndHello(
endpoint = endpoint,
hello =
BridgePairingClient.Hello(
nodeId = instanceId.value,
displayName = displayName.value,
@@ -342,6 +347,7 @@ class NodeRuntime(context: Context) {
caps =
buildList {
add(ClawdisCapability.Canvas.rawValue)
add(ClawdisCapability.Screen.rawValue)
if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
add(ClawdisCapability.VoiceWake.rawValue)
@@ -534,12 +540,13 @@ class NodeRuntime(context: Context) {
if (
command.startsWith(ClawdisCanvasCommand.NamespacePrefix) ||
command.startsWith(ClawdisCanvasA2UICommand.NamespacePrefix) ||
command.startsWith(ClawdisCameraCommand.NamespacePrefix)
command.startsWith(ClawdisCameraCommand.NamespacePrefix) ||
command.startsWith(ClawdisScreenCommand.NamespacePrefix)
) {
if (!isForeground.value) {
return BridgeSession.InvokeResult.error(
code = "NODE_BACKGROUND_UNAVAILABLE",
message = "NODE_BACKGROUND_UNAVAILABLE: canvas/camera commands require foreground",
message = "NODE_BACKGROUND_UNAVAILABLE: canvas/camera/screen commands require foreground",
)
}
}
@@ -649,6 +656,16 @@ class NodeRuntime(context: Context) {
if (includeAudio) externalAudioCaptureActive.value = false
}
}
ClawdisScreenCommand.Record.rawValue -> {
val res =
try {
screenRecorder.record(paramsJson)
} catch (err: Throwable) {
val (code, message) = invokeErrorFromThrowable(err)
return BridgeSession.InvokeResult.error(code = code, message = message)
}
BridgeSession.InvokeResult.ok(res.payloadJson)
}
else ->
BridgeSession.InvokeResult.error(
code = "INVALID_REQUEST",

View File

@@ -2,6 +2,7 @@ package com.steipete.clawdis.node
import android.content.pm.PackageManager
import android.content.Intent
import android.Manifest
import android.net.Uri
import android.provider.Settings
import androidx.appcompat.app.AlertDialog

View File

@@ -0,0 +1,65 @@
package com.steipete.clawdis.node
import android.app.Activity
import android.content.Context
import android.content.Intent
import android.media.projection.MediaProjectionManager
import androidx.activity.ComponentActivity
import androidx.activity.result.ActivityResultLauncher
import androidx.activity.result.contract.ActivityResultContracts
import androidx.appcompat.app.AlertDialog
import kotlinx.coroutines.CompletableDeferred
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import kotlinx.coroutines.withContext
import kotlinx.coroutines.withTimeout
import kotlinx.coroutines.suspendCancellableCoroutine
import kotlin.coroutines.resume
class ScreenCaptureRequester(private val activity: ComponentActivity) {
data class CaptureResult(val resultCode: Int, val data: Intent)
private val mutex = Mutex()
private var pending: CompletableDeferred<CaptureResult?>? = null
private val launcher: ActivityResultLauncher<Intent> =
activity.registerForActivityResult(ActivityResultContracts.StartActivityForResult()) { result ->
val p = pending
pending = null
val data = result.data
if (result.resultCode == Activity.RESULT_OK && data != null) {
p?.complete(CaptureResult(result.resultCode, data))
} else {
p?.complete(null)
}
}
suspend fun requestCapture(timeoutMs: Long = 20_000): CaptureResult? =
mutex.withLock {
val proceed = showRationaleDialog()
if (!proceed) return null
val mgr = activity.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager
val intent = mgr.createScreenCaptureIntent()
val deferred = CompletableDeferred<CaptureResult?>()
pending = deferred
withContext(Dispatchers.Main) { launcher.launch(intent) }
withContext(Dispatchers.Default) { withTimeout(timeoutMs) { deferred.await() } }
}
private suspend fun showRationaleDialog(): Boolean =
withContext(Dispatchers.Main) {
suspendCancellableCoroutine { cont ->
AlertDialog.Builder(activity)
.setTitle("Screen recording required")
.setMessage("Clawdis needs to record the screen for this command.")
.setPositiveButton("Continue") { _, _ -> cont.resume(true) }
.setNegativeButton("Not now") { _, _ -> cont.resume(false) }
.setOnCancelListener { cont.resume(false) }
.show()
}
}
}

View File

@@ -0,0 +1,147 @@
package com.steipete.clawdis.node.node
import android.content.Context
import android.hardware.display.DisplayManager
import android.media.MediaRecorder
import android.media.projection.MediaProjectionManager
import android.util.Base64
import com.steipete.clawdis.node.ScreenCaptureRequester
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay
import kotlinx.coroutines.withContext
import java.io.File
import kotlin.math.roundToInt
class ScreenRecordManager(private val context: Context) {
data class Payload(val payloadJson: String)
@Volatile private var screenCaptureRequester: ScreenCaptureRequester? = null
fun attachScreenCaptureRequester(requester: ScreenCaptureRequester) {
screenCaptureRequester = requester
}
suspend fun record(paramsJson: String?): Payload =
withContext(Dispatchers.Default) {
val requester =
screenCaptureRequester
?: throw IllegalStateException(
"SCREEN_PERMISSION_REQUIRED: grant Screen Recording permission",
)
val durationMs = (parseDurationMs(paramsJson) ?: 10_000).coerceIn(250, 60_000)
val fps = (parseFps(paramsJson) ?: 10.0).coerceIn(1.0, 60.0)
val fpsInt = fps.roundToInt().coerceIn(1, 60)
val screenIndex = parseScreenIndex(paramsJson)
val format = parseString(paramsJson, key = "format")
if (format != null && format.lowercase() != "mp4") {
throw IllegalArgumentException("INVALID_REQUEST: screen format must be mp4")
}
if (screenIndex != null && screenIndex != 0) {
throw IllegalArgumentException("INVALID_REQUEST: screenIndex must be 0 on Android")
}
val capture = requester.requestCapture()
?: throw IllegalStateException(
"SCREEN_PERMISSION_REQUIRED: grant Screen Recording permission",
)
val mgr =
context.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager
val projection = mgr.getMediaProjection(capture.resultCode, capture.data)
?: throw IllegalStateException("UNAVAILABLE: screen capture unavailable")
val metrics = context.resources.displayMetrics
val width = metrics.widthPixels
val height = metrics.heightPixels
val densityDpi = metrics.densityDpi
val file = File.createTempFile("clawdis-screen-", ".mp4")
val recorder = MediaRecorder()
var virtualDisplay: android.hardware.display.VirtualDisplay? = null
try {
recorder.setVideoSource(MediaRecorder.VideoSource.SURFACE)
recorder.setOutputFormat(MediaRecorder.OutputFormat.MPEG_4)
recorder.setVideoEncoder(MediaRecorder.VideoEncoder.H264)
recorder.setVideoSize(width, height)
recorder.setVideoFrameRate(fpsInt)
recorder.setVideoEncodingBitRate(estimateBitrate(width, height, fpsInt))
recorder.setOutputFile(file.absolutePath)
recorder.prepare()
val surface = recorder.surface
virtualDisplay =
projection.createVirtualDisplay(
"clawdis-screen",
width,
height,
densityDpi,
DisplayManager.VIRTUAL_DISPLAY_FLAG_AUTO_MIRROR,
surface,
null,
null,
)
recorder.start()
delay(durationMs.toLong())
} finally {
try {
recorder.stop()
} catch (_: Throwable) {
// ignore
}
recorder.reset()
recorder.release()
virtualDisplay?.release()
projection.stop()
}
val bytes = withContext(Dispatchers.IO) { file.readBytes() }
file.delete()
val base64 = Base64.encodeToString(bytes, Base64.NO_WRAP)
Payload(
"""{"format":"mp4","base64":"$base64","durationMs":$durationMs,"fps":$fpsInt,"screenIndex":0}""",
)
}
private fun parseDurationMs(paramsJson: String?): Int? =
parseNumber(paramsJson, key = "durationMs")?.toIntOrNull()
private fun parseFps(paramsJson: String?): Double? =
parseNumber(paramsJson, key = "fps")?.toDoubleOrNull()
private fun parseScreenIndex(paramsJson: String?): Int? =
parseNumber(paramsJson, key = "screenIndex")?.toIntOrNull()
private fun parseNumber(paramsJson: String?, key: String): String? {
val raw = paramsJson ?: return null
val needle = "\"$key\""
val idx = raw.indexOf(needle)
if (idx < 0) return null
val colon = raw.indexOf(':', idx + needle.length)
if (colon < 0) return null
val tail = raw.substring(colon + 1).trimStart()
return tail.takeWhile { it.isDigit() || it == '.' || it == '-' }
}
private fun parseString(paramsJson: String?, key: String): String? {
val raw = paramsJson ?: return null
val needle = "\"$key\""
val idx = raw.indexOf(needle)
if (idx < 0) return null
val colon = raw.indexOf(':', idx + needle.length)
if (colon < 0) return null
val tail = raw.substring(colon + 1).trimStart()
if (!tail.startsWith('\"')) return null
val rest = tail.drop(1)
val end = rest.indexOf('\"')
if (end < 0) return null
return rest.substring(0, end)
}
private fun estimateBitrate(width: Int, height: Int, fps: Int): Int {
val pixels = width.toLong() * height.toLong()
val raw = (pixels * fps.toLong() * 2L).toInt()
return raw.coerceIn(1_000_000, 12_000_000)
}
}

View File

@@ -3,6 +3,7 @@ package com.steipete.clawdis.node.protocol
enum class ClawdisCapability(val rawValue: String) {
Canvas("canvas"),
Camera("camera"),
Screen("screen"),
VoiceWake("voiceWake"),
}
@@ -39,3 +40,12 @@ enum class ClawdisCameraCommand(val rawValue: String) {
const val NamespacePrefix: String = "camera."
}
}
enum class ClawdisScreenCommand(val rawValue: String) {
Record("screen.record"),
;
companion object {
const val NamespacePrefix: String = "screen."
}
}

View File

@@ -24,6 +24,12 @@ class ClawdisProtocolConstantsTest {
fun capabilitiesUseStableStrings() {
assertEquals("canvas", ClawdisCapability.Canvas.rawValue)
assertEquals("camera", ClawdisCapability.Camera.rawValue)
assertEquals("screen", ClawdisCapability.Screen.rawValue)
assertEquals("voiceWake", ClawdisCapability.VoiceWake.rawValue)
}
@Test
fun screenCommandsUseStableStrings() {
assertEquals("screen.record", ClawdisScreenCommand.Record.rawValue)
}
}

View File

@@ -156,7 +156,7 @@ final class BridgeConnectionController {
}
private func currentCaps() -> [String] {
var caps = [ClawdisCapability.canvas.rawValue]
var caps = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
// Default-on: if the key doesn't exist yet, treat it as enabled.
let cameraEnabled =
@@ -181,6 +181,7 @@ final class BridgeConnectionController {
ClawdisCanvasA2UICommand.push.rawValue,
ClawdisCanvasA2UICommand.pushJSONL.rawValue,
ClawdisCanvasA2UICommand.reset.rawValue,
ClawdisScreenCommand.record.rawValue,
]
let caps = Set(self.currentCaps())

View File

@@ -17,6 +17,7 @@ final class NodeAppModel {
var isBackgrounded: Bool = false
let screen = ScreenController()
let camera = CameraController()
private let screenRecorder = ScreenRecordService()
var bridgeStatusText: String = "Offline"
var bridgeServerName: String?
var bridgeRemoteAddress: String?
@@ -364,13 +365,15 @@ final class NodeAppModel {
private func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse {
let command = req.command
if command.hasPrefix("canvas.") || command.hasPrefix("camera."), self.isBackgrounded {
if (command.hasPrefix("canvas.") || command.hasPrefix("camera.") || command.hasPrefix("screen.")),
self.isBackgrounded
{
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(
code: .backgroundUnavailable,
message: "NODE_BACKGROUND_UNAVAILABLE: canvas/camera commands require foreground"))
message: "NODE_BACKGROUND_UNAVAILABLE: canvas/camera/screen commands require foreground"))
}
if command.hasPrefix("camera."), !self.isCameraEnabled() {
@@ -524,6 +527,36 @@ final class NodeAppModel {
self.showCameraHUD(text: "Clip captured", kind: .success, autoHideSeconds: 1.8)
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case ClawdisScreenCommand.record.rawValue:
let params = (try? Self.decodeParams(ClawdisScreenRecordParams.self, from: req.paramsJSON)) ??
ClawdisScreenRecordParams()
if let format = params.format, format.lowercased() != "mp4" {
throw NSError(domain: "Screen", code: 30, userInfo: [
NSLocalizedDescriptionKey: "INVALID_REQUEST: screen format must be mp4",
])
}
let path = try await self.screenRecorder.record(
screenIndex: params.screenIndex,
durationMs: params.durationMs,
fps: params.fps,
outPath: nil)
defer { try? FileManager.default.removeItem(atPath: path) }
let data = try Data(contentsOf: URL(fileURLWithPath: path))
struct Payload: Codable {
var format: String
var base64: String
var durationMs: Int?
var fps: Double?
var screenIndex: Int?
}
let payload = try Self.encodePayload(Payload(
format: "mp4",
base64: data.base64EncodedString(),
durationMs: params.durationMs,
fps: params.fps,
screenIndex: params.screenIndex))
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
default:
return BridgeInvokeResponse(
id: req.id,

View File

@@ -0,0 +1,205 @@
import AVFoundation
import UIKit
@MainActor
final class ScreenRecordService {
enum ScreenRecordError: LocalizedError {
case noWindow
case invalidScreenIndex(Int)
case captureFailed(String)
case writeFailed(String)
var errorDescription: String? {
switch self {
case .noWindow:
return "Screen capture unavailable"
case let .invalidScreenIndex(idx):
return "Invalid screen index \(idx)"
case let .captureFailed(msg):
return msg
case let .writeFailed(msg):
return msg
}
}
}
func record(
screenIndex: Int?,
durationMs: Int?,
fps: Double?,
outPath: String?) async throws -> String
{
let durationMs = Self.clampDurationMs(durationMs)
let fps = Self.clampFps(fps)
let fpsInt = Int32(fps.rounded())
let fpsValue = Double(fpsInt)
let outURL: URL = {
if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
return URL(fileURLWithPath: outPath)
}
return FileManager.default.temporaryDirectory
.appendingPathComponent("clawdis-screen-record-\(UUID().uuidString).mp4")
}()
try? FileManager.default.removeItem(at: outURL)
if let idx = screenIndex, idx != 0 {
throw ScreenRecordError.invalidScreenIndex(idx)
}
guard let window = Self.resolveKeyWindow() else {
throw ScreenRecordError.noWindow
}
let size = window.bounds.size
let scale = window.screen.scale
let widthPx = max(1, Int(size.width * scale))
let heightPx = max(1, Int(size.height * scale))
let writer = try AVAssetWriter(outputURL: outURL, fileType: .mp4)
let settings: [String: Any] = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: widthPx,
AVVideoHeightKey: heightPx,
]
let input = AVAssetWriterInput(mediaType: .video, outputSettings: settings)
input.expectsMediaDataInRealTime = false
let attrs: [String: Any] = [
kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA,
kCVPixelBufferWidthKey as String: widthPx,
kCVPixelBufferHeightKey as String: heightPx,
kCVPixelBufferCGImageCompatibilityKey as String: true,
kCVPixelBufferCGBitmapContextCompatibilityKey as String: true,
]
let adaptor = AVAssetWriterInputPixelBufferAdaptor(
assetWriterInput: input,
sourcePixelBufferAttributes: attrs)
guard writer.canAdd(input) else {
throw ScreenRecordError.writeFailed("Cannot add video input")
}
writer.add(input)
guard writer.startWriting() else {
throw ScreenRecordError.writeFailed(writer.error?.localizedDescription ?? "Failed to start writer")
}
writer.startSession(atSourceTime: .zero)
let frameCount = max(1, Int((Double(durationMs) / 1000.0 * fpsValue).rounded(.up)))
let frameDuration = CMTime(value: 1, timescale: fpsInt)
let frameSleepNs = UInt64(1_000_000_000.0 / fpsValue)
for frame in 0..<frameCount {
while !input.isReadyForMoreMediaData {
try await Task.sleep(nanoseconds: 10_000_000)
}
var frameError: Error?
autoreleasepool {
do {
guard let image = Self.captureImage(window: window, size: size) else {
throw ScreenRecordError.captureFailed("Failed to capture frame")
}
guard let buffer = Self.pixelBuffer(from: image, width: widthPx, height: heightPx) else {
throw ScreenRecordError.captureFailed("Failed to render frame")
}
let time = CMTimeMultiply(frameDuration, multiplier: Int32(frame))
if !adaptor.append(buffer, withPresentationTime: time) {
throw ScreenRecordError.writeFailed("Failed to append frame")
}
} catch {
frameError = error
}
}
if let frameError { throw frameError }
if frame < frameCount - 1 {
try await Task.sleep(nanoseconds: frameSleepNs)
}
}
input.markAsFinished()
try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Void, Error>) in
writer.finishWriting {
if let err = writer.error {
cont.resume(throwing: ScreenRecordError.writeFailed(err.localizedDescription))
} else if writer.status != .completed {
cont.resume(throwing: ScreenRecordError.writeFailed("Failed to finalize video"))
} else {
cont.resume()
}
}
}
return outURL.path
}
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
let v = ms ?? 10_000
return min(60_000, max(250, v))
}
private nonisolated static func clampFps(_ fps: Double?) -> Double {
let v = fps ?? 10
if !v.isFinite { return 10 }
return min(30, max(1, v))
}
private nonisolated static func resolveKeyWindow() -> UIWindow? {
let scenes = UIApplication.shared.connectedScenes
for scene in scenes {
guard let windowScene = scene as? UIWindowScene else { continue }
if let window = windowScene.windows.first(where: { $0.isKeyWindow }) {
return window
}
if let window = windowScene.windows.first {
return window
}
}
return nil
}
private nonisolated static func captureImage(window: UIWindow, size: CGSize) -> CGImage? {
let format = UIGraphicsImageRendererFormat()
format.scale = window.screen.scale
let renderer = UIGraphicsImageRenderer(size: size, format: format)
let image = renderer.image { _ in
window.drawHierarchy(in: CGRect(origin: .zero, size: size), afterScreenUpdates: false)
}
return image.cgImage
}
private nonisolated static func pixelBuffer(from image: CGImage, width: Int, height: Int) -> CVPixelBuffer? {
var buffer: CVPixelBuffer?
let status = CVPixelBufferCreate(
kCFAllocatorDefault,
width,
height,
kCVPixelFormatType_32BGRA,
[
kCVPixelBufferCGImageCompatibilityKey: true,
kCVPixelBufferCGBitmapContextCompatibilityKey: true,
] as CFDictionary,
&buffer)
guard status == kCVReturnSuccess, let buffer else { return nil }
CVPixelBufferLockBaseAddress(buffer, [])
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
guard let context = CGContext(
data: CVPixelBufferGetBaseAddress(buffer),
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: CVPixelBufferGetBytesPerRow(buffer),
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue
) else {
return nil
}
context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
return buffer
}
}

View File

@@ -283,7 +283,7 @@ struct SettingsTab: View {
}
private func currentCaps() -> [String] {
var caps = [ClawdisCapability.canvas.rawValue]
var caps = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
let cameraEnabled =
UserDefaults.standard.object(forKey: "camera.enabled") == nil
@@ -307,6 +307,7 @@ struct SettingsTab: View {
ClawdisCanvasA2UICommand.push.rawValue,
ClawdisCanvasA2UICommand.pushJSONL.rawValue,
ClawdisCanvasA2UICommand.reset.rawValue,
ClawdisScreenCommand.record.rawValue,
]
let caps = Set(self.currentCaps())

View File

@@ -98,7 +98,7 @@ final class MacNodeModeCoordinator {
}
private func currentCaps() -> [String] {
var caps: [String] = [ClawdisCapability.canvas.rawValue]
var caps: [String] = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
if UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false {
caps.append(ClawdisCapability.camera.rawValue)
}

View File

@@ -74,6 +74,14 @@ actor MacNodeRuntime {
return try await self.handleA2UIPush(req)
case ClawdisCameraCommand.snap.rawValue:
guard Self.cameraEnabled() else {
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(
code: .unavailable,
message: "CAMERA_DISABLED: enable Camera in Settings"))
}
let params = (try? Self.decodeParams(ClawdisCameraSnapParams.self, from: req.paramsJSON)) ??
ClawdisCameraSnapParams()
let res = try await self.cameraCapture.snap(
@@ -94,6 +102,14 @@ actor MacNodeRuntime {
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case ClawdisCameraCommand.clip.rawValue:
guard Self.cameraEnabled() else {
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(
code: .unavailable,
message: "CAMERA_DISABLED: enable Camera in Settings"))
}
let params = (try? Self.decodeParams(ClawdisCameraClipParams.self, from: req.paramsJSON)) ??
ClawdisCameraClipParams()
let res = try await self.cameraCapture.clip(
@@ -119,6 +135,12 @@ actor MacNodeRuntime {
case MacNodeScreenCommand.record.rawValue:
let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ??
MacNodeScreenRecordParams()
if let format = params.format?.lowercased(), !format.isEmpty, format != "mp4" {
return Self.errorResponse(
req,
code: .invalidRequest,
message: "INVALID_REQUEST: screen format must be mp4")
}
let path = try await self.screenRecorder.record(
screenIndex: params.screenIndex,
durationMs: params.durationMs,
@@ -134,7 +156,7 @@ actor MacNodeRuntime {
var screenIndex: Int?
}
let payload = try Self.encodePayload(ScreenPayload(
format: params.format ?? "mp4",
format: "mp4",
base64: data.base64EncodedString(),
durationMs: params.durationMs,
fps: params.fps,
@@ -224,6 +246,10 @@ actor MacNodeRuntime {
return json
}
private nonisolated static func cameraEnabled() -> Bool {
UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false
}
private static func errorResponse(
_ req: BridgeInvokeRequest,
code: ClawdisNodeErrorCode,

View File

@@ -3,5 +3,6 @@ import Foundation
public enum ClawdisCapability: String, Codable, Sendable {
case canvas
case camera
case screen
case voiceWake
}