feat: add node screen recording across apps

This commit is contained in:
Peter Steinberger
2025-12-19 02:56:48 +01:00
parent b8012a2281
commit 7f3be083c1
20 changed files with 837 additions and 22 deletions

View File

@@ -4,6 +4,7 @@
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_DATA_SYNC" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PROJECTION" />
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
<uses-permission
android:name="android.permission.NEARBY_WIFI_DEVICES"
@@ -26,7 +27,7 @@
<service
android:name=".NodeForegroundService"
android:exported="false"
android:foregroundServiceType="dataSync|microphone" />
android:foregroundServiceType="dataSync|microphone|mediaProjection" />
<activity
android:name=".MainActivity"
android:exported="true">

View File

@@ -25,6 +25,7 @@ import kotlinx.coroutines.launch
class MainActivity : ComponentActivity() {
private val viewModel: MainViewModel by viewModels()
private lateinit var permissionRequester: PermissionRequester
private lateinit var screenCaptureRequester: ScreenCaptureRequester
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
@@ -35,8 +36,10 @@ class MainActivity : ComponentActivity() {
requestNotificationPermissionIfNeeded()
NodeForegroundService.start(this)
permissionRequester = PermissionRequester(this)
screenCaptureRequester = ScreenCaptureRequester(this)
viewModel.camera.attachLifecycleOwner(this)
viewModel.camera.attachPermissionRequester(permissionRequester)
viewModel.screenRecorder.attachScreenCaptureRequester(screenCaptureRequester)
lifecycleScope.launch {
repeatOnLifecycle(Lifecycle.State.STARTED) {

View File

@@ -6,6 +6,7 @@ import com.steipete.clawdis.node.bridge.BridgeEndpoint
import com.steipete.clawdis.node.chat.OutgoingAttachment
import com.steipete.clawdis.node.node.CameraCaptureManager
import com.steipete.clawdis.node.node.CanvasController
import com.steipete.clawdis.node.node.ScreenRecordManager
import kotlinx.coroutines.flow.StateFlow
class MainViewModel(app: Application) : AndroidViewModel(app) {
@@ -13,6 +14,7 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
val canvas: CanvasController = runtime.canvas
val camera: CameraCaptureManager = runtime.camera
val screenRecorder: ScreenRecordManager = runtime.screenRecorder
val bridges: StateFlow<List<BridgeEndpoint>> = runtime.bridges
val discoveryStatusText: StateFlow<String> = runtime.discoveryStatusText

View File

@@ -17,11 +17,13 @@ import com.steipete.clawdis.node.bridge.BridgePairingClient
import com.steipete.clawdis.node.bridge.BridgeSession
import com.steipete.clawdis.node.node.CameraCaptureManager
import com.steipete.clawdis.node.node.CanvasController
import com.steipete.clawdis.node.node.ScreenRecordManager
import com.steipete.clawdis.node.protocol.ClawdisCapability
import com.steipete.clawdis.node.protocol.ClawdisCameraCommand
import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UIAction
import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UICommand
import com.steipete.clawdis.node.protocol.ClawdisCanvasCommand
import com.steipete.clawdis.node.protocol.ClawdisScreenCommand
import com.steipete.clawdis.node.voice.VoiceWakeManager
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
@@ -51,6 +53,7 @@ class NodeRuntime(context: Context) {
val prefs = SecurePrefs(appContext)
val canvas = CanvasController()
val camera = CameraCaptureManager(appContext)
val screenRecorder = ScreenRecordManager(appContext)
private val json = Json { ignoreUnknownKeys = true }
private val externalAudioCaptureActive = MutableStateFlow(false)
@@ -287,6 +290,7 @@ class NodeRuntime(context: Context) {
add(ClawdisCanvasA2UICommand.Push.rawValue)
add(ClawdisCanvasA2UICommand.PushJSONL.rawValue)
add(ClawdisCanvasA2UICommand.Reset.rawValue)
add(ClawdisScreenCommand.Record.rawValue)
if (cameraEnabled.value) {
add(ClawdisCameraCommand.Snap.rawValue)
add(ClawdisCameraCommand.Clip.rawValue)
@@ -294,17 +298,18 @@ class NodeRuntime(context: Context) {
}
val resolved =
if (storedToken.isNullOrBlank()) {
_statusText.value = "Pairing…"
val caps = buildList {
add(ClawdisCapability.Canvas.rawValue)
if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
add(ClawdisCapability.VoiceWake.rawValue)
}
}
BridgePairingClient().pairAndHello(
endpoint = endpoint,
hello =
_statusText.value = "Pairing…"
val caps = buildList {
add(ClawdisCapability.Canvas.rawValue)
add(ClawdisCapability.Screen.rawValue)
if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
add(ClawdisCapability.VoiceWake.rawValue)
}
}
BridgePairingClient().pairAndHello(
endpoint = endpoint,
hello =
BridgePairingClient.Hello(
nodeId = instanceId.value,
displayName = displayName.value,
@@ -342,6 +347,7 @@ class NodeRuntime(context: Context) {
caps =
buildList {
add(ClawdisCapability.Canvas.rawValue)
add(ClawdisCapability.Screen.rawValue)
if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
add(ClawdisCapability.VoiceWake.rawValue)
@@ -534,12 +540,13 @@ class NodeRuntime(context: Context) {
if (
command.startsWith(ClawdisCanvasCommand.NamespacePrefix) ||
command.startsWith(ClawdisCanvasA2UICommand.NamespacePrefix) ||
command.startsWith(ClawdisCameraCommand.NamespacePrefix)
command.startsWith(ClawdisCameraCommand.NamespacePrefix) ||
command.startsWith(ClawdisScreenCommand.NamespacePrefix)
) {
if (!isForeground.value) {
return BridgeSession.InvokeResult.error(
code = "NODE_BACKGROUND_UNAVAILABLE",
message = "NODE_BACKGROUND_UNAVAILABLE: canvas/camera commands require foreground",
message = "NODE_BACKGROUND_UNAVAILABLE: canvas/camera/screen commands require foreground",
)
}
}
@@ -649,6 +656,16 @@ class NodeRuntime(context: Context) {
if (includeAudio) externalAudioCaptureActive.value = false
}
}
ClawdisScreenCommand.Record.rawValue -> {
val res =
try {
screenRecorder.record(paramsJson)
} catch (err: Throwable) {
val (code, message) = invokeErrorFromThrowable(err)
return BridgeSession.InvokeResult.error(code = code, message = message)
}
BridgeSession.InvokeResult.ok(res.payloadJson)
}
else ->
BridgeSession.InvokeResult.error(
code = "INVALID_REQUEST",

View File

@@ -2,6 +2,7 @@ package com.steipete.clawdis.node
import android.content.pm.PackageManager
import android.content.Intent
import android.Manifest
import android.net.Uri
import android.provider.Settings
import androidx.appcompat.app.AlertDialog

View File

@@ -0,0 +1,65 @@
package com.steipete.clawdis.node
import android.app.Activity
import android.content.Context
import android.content.Intent
import android.media.projection.MediaProjectionManager
import androidx.activity.ComponentActivity
import androidx.activity.result.ActivityResultLauncher
import androidx.activity.result.contract.ActivityResultContracts
import androidx.appcompat.app.AlertDialog
import kotlinx.coroutines.CompletableDeferred
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import kotlinx.coroutines.withContext
import kotlinx.coroutines.withTimeout
import kotlinx.coroutines.suspendCancellableCoroutine
import kotlin.coroutines.resume
class ScreenCaptureRequester(private val activity: ComponentActivity) {
data class CaptureResult(val resultCode: Int, val data: Intent)
private val mutex = Mutex()
private var pending: CompletableDeferred<CaptureResult?>? = null
private val launcher: ActivityResultLauncher<Intent> =
activity.registerForActivityResult(ActivityResultContracts.StartActivityForResult()) { result ->
val p = pending
pending = null
val data = result.data
if (result.resultCode == Activity.RESULT_OK && data != null) {
p?.complete(CaptureResult(result.resultCode, data))
} else {
p?.complete(null)
}
}
suspend fun requestCapture(timeoutMs: Long = 20_000): CaptureResult? =
mutex.withLock {
val proceed = showRationaleDialog()
if (!proceed) return null
val mgr = activity.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager
val intent = mgr.createScreenCaptureIntent()
val deferred = CompletableDeferred<CaptureResult?>()
pending = deferred
withContext(Dispatchers.Main) { launcher.launch(intent) }
withContext(Dispatchers.Default) { withTimeout(timeoutMs) { deferred.await() } }
}
private suspend fun showRationaleDialog(): Boolean =
withContext(Dispatchers.Main) {
suspendCancellableCoroutine { cont ->
AlertDialog.Builder(activity)
.setTitle("Screen recording required")
.setMessage("Clawdis needs to record the screen for this command.")
.setPositiveButton("Continue") { _, _ -> cont.resume(true) }
.setNegativeButton("Not now") { _, _ -> cont.resume(false) }
.setOnCancelListener { cont.resume(false) }
.show()
}
}
}

View File

@@ -0,0 +1,147 @@
package com.steipete.clawdis.node.node
import android.content.Context
import android.hardware.display.DisplayManager
import android.media.MediaRecorder
import android.media.projection.MediaProjectionManager
import android.util.Base64
import com.steipete.clawdis.node.ScreenCaptureRequester
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay
import kotlinx.coroutines.withContext
import java.io.File
import kotlin.math.roundToInt
class ScreenRecordManager(private val context: Context) {
data class Payload(val payloadJson: String)
@Volatile private var screenCaptureRequester: ScreenCaptureRequester? = null
fun attachScreenCaptureRequester(requester: ScreenCaptureRequester) {
screenCaptureRequester = requester
}
suspend fun record(paramsJson: String?): Payload =
withContext(Dispatchers.Default) {
val requester =
screenCaptureRequester
?: throw IllegalStateException(
"SCREEN_PERMISSION_REQUIRED: grant Screen Recording permission",
)
val durationMs = (parseDurationMs(paramsJson) ?: 10_000).coerceIn(250, 60_000)
val fps = (parseFps(paramsJson) ?: 10.0).coerceIn(1.0, 60.0)
val fpsInt = fps.roundToInt().coerceIn(1, 60)
val screenIndex = parseScreenIndex(paramsJson)
val format = parseString(paramsJson, key = "format")
if (format != null && format.lowercase() != "mp4") {
throw IllegalArgumentException("INVALID_REQUEST: screen format must be mp4")
}
if (screenIndex != null && screenIndex != 0) {
throw IllegalArgumentException("INVALID_REQUEST: screenIndex must be 0 on Android")
}
val capture = requester.requestCapture()
?: throw IllegalStateException(
"SCREEN_PERMISSION_REQUIRED: grant Screen Recording permission",
)
val mgr =
context.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager
val projection = mgr.getMediaProjection(capture.resultCode, capture.data)
?: throw IllegalStateException("UNAVAILABLE: screen capture unavailable")
val metrics = context.resources.displayMetrics
val width = metrics.widthPixels
val height = metrics.heightPixels
val densityDpi = metrics.densityDpi
val file = File.createTempFile("clawdis-screen-", ".mp4")
val recorder = MediaRecorder()
var virtualDisplay: android.hardware.display.VirtualDisplay? = null
try {
recorder.setVideoSource(MediaRecorder.VideoSource.SURFACE)
recorder.setOutputFormat(MediaRecorder.OutputFormat.MPEG_4)
recorder.setVideoEncoder(MediaRecorder.VideoEncoder.H264)
recorder.setVideoSize(width, height)
recorder.setVideoFrameRate(fpsInt)
recorder.setVideoEncodingBitRate(estimateBitrate(width, height, fpsInt))
recorder.setOutputFile(file.absolutePath)
recorder.prepare()
val surface = recorder.surface
virtualDisplay =
projection.createVirtualDisplay(
"clawdis-screen",
width,
height,
densityDpi,
DisplayManager.VIRTUAL_DISPLAY_FLAG_AUTO_MIRROR,
surface,
null,
null,
)
recorder.start()
delay(durationMs.toLong())
} finally {
try {
recorder.stop()
} catch (_: Throwable) {
// ignore
}
recorder.reset()
recorder.release()
virtualDisplay?.release()
projection.stop()
}
val bytes = withContext(Dispatchers.IO) { file.readBytes() }
file.delete()
val base64 = Base64.encodeToString(bytes, Base64.NO_WRAP)
Payload(
"""{"format":"mp4","base64":"$base64","durationMs":$durationMs,"fps":$fpsInt,"screenIndex":0}""",
)
}
private fun parseDurationMs(paramsJson: String?): Int? =
parseNumber(paramsJson, key = "durationMs")?.toIntOrNull()
private fun parseFps(paramsJson: String?): Double? =
parseNumber(paramsJson, key = "fps")?.toDoubleOrNull()
private fun parseScreenIndex(paramsJson: String?): Int? =
parseNumber(paramsJson, key = "screenIndex")?.toIntOrNull()
private fun parseNumber(paramsJson: String?, key: String): String? {
val raw = paramsJson ?: return null
val needle = "\"$key\""
val idx = raw.indexOf(needle)
if (idx < 0) return null
val colon = raw.indexOf(':', idx + needle.length)
if (colon < 0) return null
val tail = raw.substring(colon + 1).trimStart()
return tail.takeWhile { it.isDigit() || it == '.' || it == '-' }
}
private fun parseString(paramsJson: String?, key: String): String? {
val raw = paramsJson ?: return null
val needle = "\"$key\""
val idx = raw.indexOf(needle)
if (idx < 0) return null
val colon = raw.indexOf(':', idx + needle.length)
if (colon < 0) return null
val tail = raw.substring(colon + 1).trimStart()
if (!tail.startsWith('\"')) return null
val rest = tail.drop(1)
val end = rest.indexOf('\"')
if (end < 0) return null
return rest.substring(0, end)
}
private fun estimateBitrate(width: Int, height: Int, fps: Int): Int {
val pixels = width.toLong() * height.toLong()
val raw = (pixels * fps.toLong() * 2L).toInt()
return raw.coerceIn(1_000_000, 12_000_000)
}
}

View File

@@ -3,6 +3,7 @@ package com.steipete.clawdis.node.protocol
enum class ClawdisCapability(val rawValue: String) {
Canvas("canvas"),
Camera("camera"),
Screen("screen"),
VoiceWake("voiceWake"),
}
@@ -39,3 +40,12 @@ enum class ClawdisCameraCommand(val rawValue: String) {
const val NamespacePrefix: String = "camera."
}
}
enum class ClawdisScreenCommand(val rawValue: String) {
Record("screen.record"),
;
companion object {
const val NamespacePrefix: String = "screen."
}
}

View File

@@ -24,6 +24,12 @@ class ClawdisProtocolConstantsTest {
fun capabilitiesUseStableStrings() {
assertEquals("canvas", ClawdisCapability.Canvas.rawValue)
assertEquals("camera", ClawdisCapability.Camera.rawValue)
assertEquals("screen", ClawdisCapability.Screen.rawValue)
assertEquals("voiceWake", ClawdisCapability.VoiceWake.rawValue)
}
@Test
fun screenCommandsUseStableStrings() {
assertEquals("screen.record", ClawdisScreenCommand.Record.rawValue)
}
}

View File

@@ -156,7 +156,7 @@ final class BridgeConnectionController {
}
private func currentCaps() -> [String] {
var caps = [ClawdisCapability.canvas.rawValue]
var caps = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
// Default-on: if the key doesn't exist yet, treat it as enabled.
let cameraEnabled =
@@ -181,6 +181,7 @@ final class BridgeConnectionController {
ClawdisCanvasA2UICommand.push.rawValue,
ClawdisCanvasA2UICommand.pushJSONL.rawValue,
ClawdisCanvasA2UICommand.reset.rawValue,
ClawdisScreenCommand.record.rawValue,
]
let caps = Set(self.currentCaps())

View File

@@ -17,6 +17,7 @@ final class NodeAppModel {
var isBackgrounded: Bool = false
let screen = ScreenController()
let camera = CameraController()
private let screenRecorder = ScreenRecordService()
var bridgeStatusText: String = "Offline"
var bridgeServerName: String?
var bridgeRemoteAddress: String?
@@ -364,13 +365,15 @@ final class NodeAppModel {
private func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse {
let command = req.command
if command.hasPrefix("canvas.") || command.hasPrefix("camera."), self.isBackgrounded {
if (command.hasPrefix("canvas.") || command.hasPrefix("camera.") || command.hasPrefix("screen.")),
self.isBackgrounded
{
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(
code: .backgroundUnavailable,
message: "NODE_BACKGROUND_UNAVAILABLE: canvas/camera commands require foreground"))
message: "NODE_BACKGROUND_UNAVAILABLE: canvas/camera/screen commands require foreground"))
}
if command.hasPrefix("camera."), !self.isCameraEnabled() {
@@ -524,6 +527,36 @@ final class NodeAppModel {
self.showCameraHUD(text: "Clip captured", kind: .success, autoHideSeconds: 1.8)
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case ClawdisScreenCommand.record.rawValue:
let params = (try? Self.decodeParams(ClawdisScreenRecordParams.self, from: req.paramsJSON)) ??
ClawdisScreenRecordParams()
if let format = params.format, format.lowercased() != "mp4" {
throw NSError(domain: "Screen", code: 30, userInfo: [
NSLocalizedDescriptionKey: "INVALID_REQUEST: screen format must be mp4",
])
}
let path = try await self.screenRecorder.record(
screenIndex: params.screenIndex,
durationMs: params.durationMs,
fps: params.fps,
outPath: nil)
defer { try? FileManager.default.removeItem(atPath: path) }
let data = try Data(contentsOf: URL(fileURLWithPath: path))
struct Payload: Codable {
var format: String
var base64: String
var durationMs: Int?
var fps: Double?
var screenIndex: Int?
}
let payload = try Self.encodePayload(Payload(
format: "mp4",
base64: data.base64EncodedString(),
durationMs: params.durationMs,
fps: params.fps,
screenIndex: params.screenIndex))
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
default:
return BridgeInvokeResponse(
id: req.id,

View File

@@ -0,0 +1,205 @@
import AVFoundation
import UIKit
@MainActor
final class ScreenRecordService {
enum ScreenRecordError: LocalizedError {
case noWindow
case invalidScreenIndex(Int)
case captureFailed(String)
case writeFailed(String)
var errorDescription: String? {
switch self {
case .noWindow:
return "Screen capture unavailable"
case let .invalidScreenIndex(idx):
return "Invalid screen index \(idx)"
case let .captureFailed(msg):
return msg
case let .writeFailed(msg):
return msg
}
}
}
func record(
screenIndex: Int?,
durationMs: Int?,
fps: Double?,
outPath: String?) async throws -> String
{
let durationMs = Self.clampDurationMs(durationMs)
let fps = Self.clampFps(fps)
let fpsInt = Int32(fps.rounded())
let fpsValue = Double(fpsInt)
let outURL: URL = {
if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
return URL(fileURLWithPath: outPath)
}
return FileManager.default.temporaryDirectory
.appendingPathComponent("clawdis-screen-record-\(UUID().uuidString).mp4")
}()
try? FileManager.default.removeItem(at: outURL)
if let idx = screenIndex, idx != 0 {
throw ScreenRecordError.invalidScreenIndex(idx)
}
guard let window = Self.resolveKeyWindow() else {
throw ScreenRecordError.noWindow
}
let size = window.bounds.size
let scale = window.screen.scale
let widthPx = max(1, Int(size.width * scale))
let heightPx = max(1, Int(size.height * scale))
let writer = try AVAssetWriter(outputURL: outURL, fileType: .mp4)
let settings: [String: Any] = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: widthPx,
AVVideoHeightKey: heightPx,
]
let input = AVAssetWriterInput(mediaType: .video, outputSettings: settings)
input.expectsMediaDataInRealTime = false
let attrs: [String: Any] = [
kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA,
kCVPixelBufferWidthKey as String: widthPx,
kCVPixelBufferHeightKey as String: heightPx,
kCVPixelBufferCGImageCompatibilityKey as String: true,
kCVPixelBufferCGBitmapContextCompatibilityKey as String: true,
]
let adaptor = AVAssetWriterInputPixelBufferAdaptor(
assetWriterInput: input,
sourcePixelBufferAttributes: attrs)
guard writer.canAdd(input) else {
throw ScreenRecordError.writeFailed("Cannot add video input")
}
writer.add(input)
guard writer.startWriting() else {
throw ScreenRecordError.writeFailed(writer.error?.localizedDescription ?? "Failed to start writer")
}
writer.startSession(atSourceTime: .zero)
let frameCount = max(1, Int((Double(durationMs) / 1000.0 * fpsValue).rounded(.up)))
let frameDuration = CMTime(value: 1, timescale: fpsInt)
let frameSleepNs = UInt64(1_000_000_000.0 / fpsValue)
for frame in 0..<frameCount {
while !input.isReadyForMoreMediaData {
try await Task.sleep(nanoseconds: 10_000_000)
}
var frameError: Error?
autoreleasepool {
do {
guard let image = Self.captureImage(window: window, size: size) else {
throw ScreenRecordError.captureFailed("Failed to capture frame")
}
guard let buffer = Self.pixelBuffer(from: image, width: widthPx, height: heightPx) else {
throw ScreenRecordError.captureFailed("Failed to render frame")
}
let time = CMTimeMultiply(frameDuration, multiplier: Int32(frame))
if !adaptor.append(buffer, withPresentationTime: time) {
throw ScreenRecordError.writeFailed("Failed to append frame")
}
} catch {
frameError = error
}
}
if let frameError { throw frameError }
if frame < frameCount - 1 {
try await Task.sleep(nanoseconds: frameSleepNs)
}
}
input.markAsFinished()
try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Void, Error>) in
writer.finishWriting {
if let err = writer.error {
cont.resume(throwing: ScreenRecordError.writeFailed(err.localizedDescription))
} else if writer.status != .completed {
cont.resume(throwing: ScreenRecordError.writeFailed("Failed to finalize video"))
} else {
cont.resume()
}
}
}
return outURL.path
}
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
let v = ms ?? 10_000
return min(60_000, max(250, v))
}
private nonisolated static func clampFps(_ fps: Double?) -> Double {
let v = fps ?? 10
if !v.isFinite { return 10 }
return min(30, max(1, v))
}
private nonisolated static func resolveKeyWindow() -> UIWindow? {
let scenes = UIApplication.shared.connectedScenes
for scene in scenes {
guard let windowScene = scene as? UIWindowScene else { continue }
if let window = windowScene.windows.first(where: { $0.isKeyWindow }) {
return window
}
if let window = windowScene.windows.first {
return window
}
}
return nil
}
private nonisolated static func captureImage(window: UIWindow, size: CGSize) -> CGImage? {
let format = UIGraphicsImageRendererFormat()
format.scale = window.screen.scale
let renderer = UIGraphicsImageRenderer(size: size, format: format)
let image = renderer.image { _ in
window.drawHierarchy(in: CGRect(origin: .zero, size: size), afterScreenUpdates: false)
}
return image.cgImage
}
private nonisolated static func pixelBuffer(from image: CGImage, width: Int, height: Int) -> CVPixelBuffer? {
var buffer: CVPixelBuffer?
let status = CVPixelBufferCreate(
kCFAllocatorDefault,
width,
height,
kCVPixelFormatType_32BGRA,
[
kCVPixelBufferCGImageCompatibilityKey: true,
kCVPixelBufferCGBitmapContextCompatibilityKey: true,
] as CFDictionary,
&buffer)
guard status == kCVReturnSuccess, let buffer else { return nil }
CVPixelBufferLockBaseAddress(buffer, [])
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
guard let context = CGContext(
data: CVPixelBufferGetBaseAddress(buffer),
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: CVPixelBufferGetBytesPerRow(buffer),
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue
) else {
return nil
}
context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
return buffer
}
}

View File

@@ -283,7 +283,7 @@ struct SettingsTab: View {
}
private func currentCaps() -> [String] {
var caps = [ClawdisCapability.canvas.rawValue]
var caps = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
let cameraEnabled =
UserDefaults.standard.object(forKey: "camera.enabled") == nil
@@ -307,6 +307,7 @@ struct SettingsTab: View {
ClawdisCanvasA2UICommand.push.rawValue,
ClawdisCanvasA2UICommand.pushJSONL.rawValue,
ClawdisCanvasA2UICommand.reset.rawValue,
ClawdisScreenCommand.record.rawValue,
]
let caps = Set(self.currentCaps())

View File

@@ -98,7 +98,7 @@ final class MacNodeModeCoordinator {
}
private func currentCaps() -> [String] {
var caps: [String] = [ClawdisCapability.canvas.rawValue]
var caps: [String] = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
if UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false {
caps.append(ClawdisCapability.camera.rawValue)
}

View File

@@ -74,6 +74,14 @@ actor MacNodeRuntime {
return try await self.handleA2UIPush(req)
case ClawdisCameraCommand.snap.rawValue:
guard Self.cameraEnabled() else {
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(
code: .unavailable,
message: "CAMERA_DISABLED: enable Camera in Settings"))
}
let params = (try? Self.decodeParams(ClawdisCameraSnapParams.self, from: req.paramsJSON)) ??
ClawdisCameraSnapParams()
let res = try await self.cameraCapture.snap(
@@ -94,6 +102,14 @@ actor MacNodeRuntime {
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case ClawdisCameraCommand.clip.rawValue:
guard Self.cameraEnabled() else {
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(
code: .unavailable,
message: "CAMERA_DISABLED: enable Camera in Settings"))
}
let params = (try? Self.decodeParams(ClawdisCameraClipParams.self, from: req.paramsJSON)) ??
ClawdisCameraClipParams()
let res = try await self.cameraCapture.clip(
@@ -119,6 +135,12 @@ actor MacNodeRuntime {
case MacNodeScreenCommand.record.rawValue:
let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ??
MacNodeScreenRecordParams()
if let format = params.format?.lowercased(), !format.isEmpty, format != "mp4" {
return Self.errorResponse(
req,
code: .invalidRequest,
message: "INVALID_REQUEST: screen format must be mp4")
}
let path = try await self.screenRecorder.record(
screenIndex: params.screenIndex,
durationMs: params.durationMs,
@@ -134,7 +156,7 @@ actor MacNodeRuntime {
var screenIndex: Int?
}
let payload = try Self.encodePayload(ScreenPayload(
format: params.format ?? "mp4",
format: "mp4",
base64: data.base64EncodedString(),
durationMs: params.durationMs,
fps: params.fps,
@@ -224,6 +246,10 @@ actor MacNodeRuntime {
return json
}
private nonisolated static func cameraEnabled() -> Bool {
UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false
}
private static func errorResponse(
_ req: BridgeInvokeRequest,
code: ClawdisNodeErrorCode,

View File

@@ -3,5 +3,6 @@ import Foundation
public enum ClawdisCapability: String, Codable, Sendable {
case canvas
case camera
case screen
case voiceWake
}

View File

@@ -47,6 +47,13 @@ clawdis nodes canvas snapshot --node <idOrNameOrIp> --format png
clawdis nodes canvas snapshot --node <idOrNameOrIp> --format jpg --max-width 1200 --quality 0.9
```
Simple shortcut (auto-picks a single connected node if possible):
```bash
clawdis canvas snapshot --format png
clawdis canvas snapshot --format jpg --max-width 1200 --quality 0.9
```
## Photos + videos (node camera)
Photos (`jpg`):
@@ -68,14 +75,19 @@ Notes:
- Clip duration is clamped (currently `<= 60s`) to avoid oversized base64 payloads.
- Android will prompt for `CAMERA`/`RECORD_AUDIO` permissions when possible; denied permissions fail with `*_PERMISSION_REQUIRED`.
## Screen recordings (mac node)
## Screen recordings (nodes)
Mac node mode exposes `screen.record` (mp4). Example:
Nodes expose `screen.record` (mp4). Example:
```bash
clawdis nodes screen record --node <idOrNameOrIp> --duration 10s --fps 10
```
Notes:
- `screen.record` requires the node app to be foregrounded.
- Android will show the system screen-capture prompt before recording.
- Screen recordings are clamped to `<= 60s`.
## Mac node mode
- The macOS menubar app connects to the Gateway bridge as a node (so `clawdis nodes …` works against this Mac).

244
src/cli/canvas-cli.ts Normal file
View File

@@ -0,0 +1,244 @@
import type { Command } from "commander";
import { callGateway, randomIdempotencyKey } from "../gateway/call.js";
import { defaultRuntime } from "../runtime.js";
import { writeBase64ToFile } from "./nodes-camera.js";
import {
canvasSnapshotTempPath,
parseCanvasSnapshotPayload,
} from "./nodes-canvas.js";
type CanvasOpts = {
url?: string;
token?: string;
timeout?: string;
json?: boolean;
node?: string;
format?: string;
maxWidth?: string;
quality?: string;
};
type NodeListNode = {
nodeId: string;
displayName?: string;
platform?: string;
remoteIp?: string;
caps?: string[];
connected?: boolean;
};
type PendingRequest = {
requestId: string;
nodeId: string;
displayName?: string;
remoteIp?: string;
};
type PairedNode = {
nodeId: string;
displayName?: string;
remoteIp?: string;
};
type PairingList = {
pending: PendingRequest[];
paired: PairedNode[];
};
const canvasCallOpts = (cmd: Command) =>
cmd
.option("--url <url>", "Gateway WebSocket URL", "ws://127.0.0.1:18789")
.option("--token <token>", "Gateway token (if required)")
.option("--timeout <ms>", "Timeout in ms", "10000")
.option("--json", "Output JSON", false);
const callGatewayCli = async (
method: string,
opts: CanvasOpts,
params?: unknown,
) =>
callGateway({
url: opts.url,
token: opts.token,
method,
params,
timeoutMs: Number(opts.timeout ?? 10_000),
clientName: "cli",
mode: "cli",
});
function parseNodeList(value: unknown): NodeListNode[] {
const obj =
typeof value === "object" && value !== null
? (value as Record<string, unknown>)
: {};
return Array.isArray(obj.nodes) ? (obj.nodes as NodeListNode[]) : [];
}
function parsePairingList(value: unknown): PairingList {
const obj =
typeof value === "object" && value !== null
? (value as Record<string, unknown>)
: {};
const pending = Array.isArray(obj.pending)
? (obj.pending as PendingRequest[])
: [];
const paired = Array.isArray(obj.paired) ? (obj.paired as PairedNode[]) : [];
return { pending, paired };
}
function normalizeNodeKey(value: string) {
return value
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-+/, "")
.replace(/-+$/, "");
}
async function loadNodes(opts: CanvasOpts): Promise<NodeListNode[]> {
try {
const res = (await callGatewayCli("node.list", opts, {})) as unknown;
return parseNodeList(res);
} catch {
const res = (await callGatewayCli("node.pair.list", opts, {})) as unknown;
const { paired } = parsePairingList(res);
return paired.map((n) => ({
nodeId: n.nodeId,
displayName: n.displayName,
remoteIp: n.remoteIp,
}));
}
}
function pickDefaultNode(nodes: NodeListNode[]): NodeListNode | null {
const withCanvas = nodes.filter((n) =>
Array.isArray(n.caps) ? n.caps.includes("canvas") : true,
);
if (withCanvas.length === 0) return null;
const connected = withCanvas.filter((n) => n.connected);
const candidates = connected.length > 0 ? connected : withCanvas;
if (candidates.length === 1) return candidates[0];
const local = candidates.filter((n) =>
n.platform?.toLowerCase().startsWith("mac") &&
typeof n.nodeId === "string" &&
n.nodeId.startsWith("mac-"),
);
if (local.length === 1) return local[0];
return null;
}
async function resolveNodeId(opts: CanvasOpts, query?: string) {
const nodes = await loadNodes(opts);
const q = String(query ?? "").trim();
if (!q) {
const picked = pickDefaultNode(nodes);
if (picked) return picked.nodeId;
throw new Error(
"node required (use --node or ensure only one connected node is available)",
);
}
const qNorm = normalizeNodeKey(q);
const matches = nodes.filter((n) => {
if (n.nodeId === q) return true;
if (typeof n.remoteIp === "string" && n.remoteIp === q) return true;
const name = typeof n.displayName === "string" ? n.displayName : "";
if (name && normalizeNodeKey(name) === qNorm) return true;
if (q.length >= 6 && n.nodeId.startsWith(q)) return true;
return false;
});
if (matches.length === 1) return matches[0].nodeId;
if (matches.length === 0) {
const known = nodes
.map((n) => n.displayName || n.remoteIp || n.nodeId)
.filter(Boolean)
.join(", ");
throw new Error(`unknown node: ${q}${known ? ` (known: ${known})` : ""}`);
}
throw new Error(
`ambiguous node: ${q} (matches: ${matches
.map((n) => n.displayName || n.remoteIp || n.nodeId)
.join(", ")})`,
);
}
function normalizeFormat(format: string) {
const trimmed = format.trim().toLowerCase();
if (trimmed === "jpg") return "jpeg";
return trimmed;
}
export function registerCanvasCli(program: Command) {
const canvas = program
.command("canvas")
.description("Render the canvas to a snapshot via nodes");
canvasCallOpts(
canvas
.command("snapshot")
.description("Capture a canvas snapshot (prints MEDIA:<path>)")
.option("--node <idOrNameOrIp>", "Node id, name, or IP")
.option("--format <png|jpg>", "Output format", "png")
.option("--max-width <px>", "Max width (px)")
.option("--quality <0-1>", "JPEG quality (default 0.82)")
.action(async (opts: CanvasOpts) => {
try {
const nodeId = await resolveNodeId(opts, opts.node);
const format = normalizeFormat(String(opts.format ?? "png"));
if (format !== "png" && format !== "jpeg") {
throw new Error("invalid format (use png or jpg)");
}
const maxWidth = opts.maxWidth
? Number.parseInt(String(opts.maxWidth), 10)
: undefined;
const quality = opts.quality
? Number.parseFloat(String(opts.quality))
: undefined;
const raw = (await callGatewayCli("node.invoke", opts, {
nodeId,
command: "canvas.snapshot",
params: {
format,
maxWidth: Number.isFinite(maxWidth) ? maxWidth : undefined,
quality: Number.isFinite(quality) ? quality : undefined,
},
idempotencyKey: randomIdempotencyKey(),
})) as unknown;
const res =
typeof raw === "object" && raw !== null
? (raw as { payload?: unknown })
: {};
const payload = parseCanvasSnapshotPayload(res.payload);
const filePath = canvasSnapshotTempPath({
ext: payload.format === "jpeg" ? "jpg" : payload.format,
});
await writeBase64ToFile(filePath, payload.base64);
if (opts.json) {
defaultRuntime.log(
JSON.stringify(
{
file: {
path: filePath,
},
},
null,
2,
),
);
return;
}
defaultRuntime.log(`MEDIA:${filePath}`);
} catch (err) {
defaultRuntime.error(`canvas snapshot failed: ${String(err)}`);
defaultRuntime.exit(1);
}
}),
);
}

View File

@@ -607,6 +607,44 @@ describe("cli program", () => {
}
});
it("runs canvas snapshot and prints MEDIA path", async () => {
callGateway
.mockResolvedValueOnce({
ts: Date.now(),
nodes: [
{
nodeId: "mac-1",
displayName: "Mac Node",
platform: "macos",
connected: true,
caps: ["canvas"],
},
],
})
.mockResolvedValueOnce({
ok: true,
nodeId: "mac-1",
command: "canvas.snapshot",
payload: { format: "png", base64: "aGk=" },
});
const program = buildProgram();
runtime.log.mockClear();
await program.parseAsync(["canvas", "snapshot", "--format", "png"], {
from: "user",
});
const out = String(runtime.log.mock.calls[0]?.[0] ?? "");
const mediaPath = out.replace(/^MEDIA:/, "").trim();
expect(mediaPath).toMatch(/clawdis-canvas-snapshot-.*\.png$/);
try {
await expect(fs.readFile(mediaPath, "utf8")).resolves.toBe("hi");
} finally {
await fs.unlink(mediaPath).catch(() => {});
}
});
it("fails nodes camera snap on invalid facing", async () => {
callGateway.mockResolvedValueOnce({
ts: Date.now(),

View File

@@ -26,6 +26,7 @@ import { danger, info, setVerbose } from "../globals.js";
import { loginWeb, logoutWeb } from "../provider-web.js";
import { defaultRuntime } from "../runtime.js";
import { VERSION } from "../version.js";
import { registerCanvasCli } from "./canvas-cli.js";
import { registerCronCli } from "./cron-cli.js";
import { createDefaultDeps } from "./deps.js";
import { registerDnsCli } from "./dns-cli.js";
@@ -245,6 +246,7 @@ Examples:
}
});
registerCanvasCli(program);
registerGatewayCli(program);
registerNodesCli(program);
registerCronCli(program);