diff --git a/apps/android/app/src/main/AndroidManifest.xml b/apps/android/app/src/main/AndroidManifest.xml
index 0588e4c4e..e8547b4eb 100644
--- a/apps/android/app/src/main/AndroidManifest.xml
+++ b/apps/android/app/src/main/AndroidManifest.xml
@@ -4,6 +4,7 @@
+
+ android:foregroundServiceType="dataSync|microphone|mediaProjection" />
diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/MainActivity.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/MainActivity.kt
index 6478f7fba..609c08bf8 100644
--- a/apps/android/app/src/main/java/com/steipete/clawdis/node/MainActivity.kt
+++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/MainActivity.kt
@@ -25,6 +25,7 @@ import kotlinx.coroutines.launch
class MainActivity : ComponentActivity() {
private val viewModel: MainViewModel by viewModels()
private lateinit var permissionRequester: PermissionRequester
+ private lateinit var screenCaptureRequester: ScreenCaptureRequester
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
@@ -35,8 +36,10 @@ class MainActivity : ComponentActivity() {
requestNotificationPermissionIfNeeded()
NodeForegroundService.start(this)
permissionRequester = PermissionRequester(this)
+ screenCaptureRequester = ScreenCaptureRequester(this)
viewModel.camera.attachLifecycleOwner(this)
viewModel.camera.attachPermissionRequester(permissionRequester)
+ viewModel.screenRecorder.attachScreenCaptureRequester(screenCaptureRequester)
lifecycleScope.launch {
repeatOnLifecycle(Lifecycle.State.STARTED) {
diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt
index 216a37aee..245e0156c 100644
--- a/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt
+++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt
@@ -6,6 +6,7 @@ import com.steipete.clawdis.node.bridge.BridgeEndpoint
import com.steipete.clawdis.node.chat.OutgoingAttachment
import com.steipete.clawdis.node.node.CameraCaptureManager
import com.steipete.clawdis.node.node.CanvasController
+import com.steipete.clawdis.node.node.ScreenRecordManager
import kotlinx.coroutines.flow.StateFlow
class MainViewModel(app: Application) : AndroidViewModel(app) {
@@ -13,6 +14,7 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
val canvas: CanvasController = runtime.canvas
val camera: CameraCaptureManager = runtime.camera
+ val screenRecorder: ScreenRecordManager = runtime.screenRecorder
val bridges: StateFlow> = runtime.bridges
val discoveryStatusText: StateFlow = runtime.discoveryStatusText
diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt
index c1cd428b5..5f6b4ec12 100644
--- a/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt
+++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt
@@ -17,11 +17,13 @@ import com.steipete.clawdis.node.bridge.BridgePairingClient
import com.steipete.clawdis.node.bridge.BridgeSession
import com.steipete.clawdis.node.node.CameraCaptureManager
import com.steipete.clawdis.node.node.CanvasController
+import com.steipete.clawdis.node.node.ScreenRecordManager
import com.steipete.clawdis.node.protocol.ClawdisCapability
import com.steipete.clawdis.node.protocol.ClawdisCameraCommand
import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UIAction
import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UICommand
import com.steipete.clawdis.node.protocol.ClawdisCanvasCommand
+import com.steipete.clawdis.node.protocol.ClawdisScreenCommand
import com.steipete.clawdis.node.voice.VoiceWakeManager
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
@@ -51,6 +53,7 @@ class NodeRuntime(context: Context) {
val prefs = SecurePrefs(appContext)
val canvas = CanvasController()
val camera = CameraCaptureManager(appContext)
+ val screenRecorder = ScreenRecordManager(appContext)
private val json = Json { ignoreUnknownKeys = true }
private val externalAudioCaptureActive = MutableStateFlow(false)
@@ -287,6 +290,7 @@ class NodeRuntime(context: Context) {
add(ClawdisCanvasA2UICommand.Push.rawValue)
add(ClawdisCanvasA2UICommand.PushJSONL.rawValue)
add(ClawdisCanvasA2UICommand.Reset.rawValue)
+ add(ClawdisScreenCommand.Record.rawValue)
if (cameraEnabled.value) {
add(ClawdisCameraCommand.Snap.rawValue)
add(ClawdisCameraCommand.Clip.rawValue)
@@ -294,17 +298,18 @@ class NodeRuntime(context: Context) {
}
val resolved =
if (storedToken.isNullOrBlank()) {
- _statusText.value = "Pairing…"
- val caps = buildList {
- add(ClawdisCapability.Canvas.rawValue)
- if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
- if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
- add(ClawdisCapability.VoiceWake.rawValue)
- }
- }
- BridgePairingClient().pairAndHello(
- endpoint = endpoint,
- hello =
+ _statusText.value = "Pairing…"
+ val caps = buildList {
+ add(ClawdisCapability.Canvas.rawValue)
+ add(ClawdisCapability.Screen.rawValue)
+ if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
+ if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
+ add(ClawdisCapability.VoiceWake.rawValue)
+ }
+ }
+ BridgePairingClient().pairAndHello(
+ endpoint = endpoint,
+ hello =
BridgePairingClient.Hello(
nodeId = instanceId.value,
displayName = displayName.value,
@@ -342,6 +347,7 @@ class NodeRuntime(context: Context) {
caps =
buildList {
add(ClawdisCapability.Canvas.rawValue)
+ add(ClawdisCapability.Screen.rawValue)
if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
add(ClawdisCapability.VoiceWake.rawValue)
@@ -534,12 +540,13 @@ class NodeRuntime(context: Context) {
if (
command.startsWith(ClawdisCanvasCommand.NamespacePrefix) ||
command.startsWith(ClawdisCanvasA2UICommand.NamespacePrefix) ||
- command.startsWith(ClawdisCameraCommand.NamespacePrefix)
+ command.startsWith(ClawdisCameraCommand.NamespacePrefix) ||
+ command.startsWith(ClawdisScreenCommand.NamespacePrefix)
) {
if (!isForeground.value) {
return BridgeSession.InvokeResult.error(
code = "NODE_BACKGROUND_UNAVAILABLE",
- message = "NODE_BACKGROUND_UNAVAILABLE: canvas/camera commands require foreground",
+ message = "NODE_BACKGROUND_UNAVAILABLE: canvas/camera/screen commands require foreground",
)
}
}
@@ -649,6 +656,16 @@ class NodeRuntime(context: Context) {
if (includeAudio) externalAudioCaptureActive.value = false
}
}
+ ClawdisScreenCommand.Record.rawValue -> {
+ val res =
+ try {
+ screenRecorder.record(paramsJson)
+ } catch (err: Throwable) {
+ val (code, message) = invokeErrorFromThrowable(err)
+ return BridgeSession.InvokeResult.error(code = code, message = message)
+ }
+ BridgeSession.InvokeResult.ok(res.payloadJson)
+ }
else ->
BridgeSession.InvokeResult.error(
code = "INVALID_REQUEST",
diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/PermissionRequester.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/PermissionRequester.kt
index 8c6f65e56..7879c85d5 100644
--- a/apps/android/app/src/main/java/com/steipete/clawdis/node/PermissionRequester.kt
+++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/PermissionRequester.kt
@@ -2,6 +2,7 @@ package com.steipete.clawdis.node
import android.content.pm.PackageManager
import android.content.Intent
+import android.Manifest
import android.net.Uri
import android.provider.Settings
import androidx.appcompat.app.AlertDialog
diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/ScreenCaptureRequester.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/ScreenCaptureRequester.kt
new file mode 100644
index 000000000..53c9e173c
--- /dev/null
+++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/ScreenCaptureRequester.kt
@@ -0,0 +1,65 @@
+package com.steipete.clawdis.node
+
+import android.app.Activity
+import android.content.Context
+import android.content.Intent
+import android.media.projection.MediaProjectionManager
+import androidx.activity.ComponentActivity
+import androidx.activity.result.ActivityResultLauncher
+import androidx.activity.result.contract.ActivityResultContracts
+import androidx.appcompat.app.AlertDialog
+import kotlinx.coroutines.CompletableDeferred
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.sync.Mutex
+import kotlinx.coroutines.sync.withLock
+import kotlinx.coroutines.withContext
+import kotlinx.coroutines.withTimeout
+import kotlinx.coroutines.suspendCancellableCoroutine
+import kotlin.coroutines.resume
+
+class ScreenCaptureRequester(private val activity: ComponentActivity) {
+ data class CaptureResult(val resultCode: Int, val data: Intent)
+
+ private val mutex = Mutex()
+ private var pending: CompletableDeferred? = null
+
+ private val launcher: ActivityResultLauncher =
+ activity.registerForActivityResult(ActivityResultContracts.StartActivityForResult()) { result ->
+ val p = pending
+ pending = null
+ val data = result.data
+ if (result.resultCode == Activity.RESULT_OK && data != null) {
+ p?.complete(CaptureResult(result.resultCode, data))
+ } else {
+ p?.complete(null)
+ }
+ }
+
+ suspend fun requestCapture(timeoutMs: Long = 20_000): CaptureResult? =
+ mutex.withLock {
+ val proceed = showRationaleDialog()
+ if (!proceed) return null
+
+ val mgr = activity.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager
+ val intent = mgr.createScreenCaptureIntent()
+
+ val deferred = CompletableDeferred()
+ pending = deferred
+ withContext(Dispatchers.Main) { launcher.launch(intent) }
+
+ withContext(Dispatchers.Default) { withTimeout(timeoutMs) { deferred.await() } }
+ }
+
+ private suspend fun showRationaleDialog(): Boolean =
+ withContext(Dispatchers.Main) {
+ suspendCancellableCoroutine { cont ->
+ AlertDialog.Builder(activity)
+ .setTitle("Screen recording required")
+ .setMessage("Clawdis needs to record the screen for this command.")
+ .setPositiveButton("Continue") { _, _ -> cont.resume(true) }
+ .setNegativeButton("Not now") { _, _ -> cont.resume(false) }
+ .setOnCancelListener { cont.resume(false) }
+ .show()
+ }
+ }
+}
diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/node/ScreenRecordManager.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/node/ScreenRecordManager.kt
new file mode 100644
index 000000000..4de891ff3
--- /dev/null
+++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/node/ScreenRecordManager.kt
@@ -0,0 +1,147 @@
+package com.steipete.clawdis.node.node
+
+import android.content.Context
+import android.hardware.display.DisplayManager
+import android.media.MediaRecorder
+import android.media.projection.MediaProjectionManager
+import android.util.Base64
+import com.steipete.clawdis.node.ScreenCaptureRequester
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.delay
+import kotlinx.coroutines.withContext
+import java.io.File
+import kotlin.math.roundToInt
+
+class ScreenRecordManager(private val context: Context) {
+ data class Payload(val payloadJson: String)
+
+ @Volatile private var screenCaptureRequester: ScreenCaptureRequester? = null
+
+ fun attachScreenCaptureRequester(requester: ScreenCaptureRequester) {
+ screenCaptureRequester = requester
+ }
+
+ suspend fun record(paramsJson: String?): Payload =
+ withContext(Dispatchers.Default) {
+ val requester =
+ screenCaptureRequester
+ ?: throw IllegalStateException(
+ "SCREEN_PERMISSION_REQUIRED: grant Screen Recording permission",
+ )
+
+ val durationMs = (parseDurationMs(paramsJson) ?: 10_000).coerceIn(250, 60_000)
+ val fps = (parseFps(paramsJson) ?: 10.0).coerceIn(1.0, 60.0)
+ val fpsInt = fps.roundToInt().coerceIn(1, 60)
+ val screenIndex = parseScreenIndex(paramsJson)
+ val format = parseString(paramsJson, key = "format")
+ if (format != null && format.lowercase() != "mp4") {
+ throw IllegalArgumentException("INVALID_REQUEST: screen format must be mp4")
+ }
+ if (screenIndex != null && screenIndex != 0) {
+ throw IllegalArgumentException("INVALID_REQUEST: screenIndex must be 0 on Android")
+ }
+
+ val capture = requester.requestCapture()
+ ?: throw IllegalStateException(
+ "SCREEN_PERMISSION_REQUIRED: grant Screen Recording permission",
+ )
+
+ val mgr =
+ context.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager
+ val projection = mgr.getMediaProjection(capture.resultCode, capture.data)
+ ?: throw IllegalStateException("UNAVAILABLE: screen capture unavailable")
+
+ val metrics = context.resources.displayMetrics
+ val width = metrics.widthPixels
+ val height = metrics.heightPixels
+ val densityDpi = metrics.densityDpi
+
+ val file = File.createTempFile("clawdis-screen-", ".mp4")
+ val recorder = MediaRecorder()
+ var virtualDisplay: android.hardware.display.VirtualDisplay? = null
+ try {
+ recorder.setVideoSource(MediaRecorder.VideoSource.SURFACE)
+ recorder.setOutputFormat(MediaRecorder.OutputFormat.MPEG_4)
+ recorder.setVideoEncoder(MediaRecorder.VideoEncoder.H264)
+ recorder.setVideoSize(width, height)
+ recorder.setVideoFrameRate(fpsInt)
+ recorder.setVideoEncodingBitRate(estimateBitrate(width, height, fpsInt))
+ recorder.setOutputFile(file.absolutePath)
+ recorder.prepare()
+
+ val surface = recorder.surface
+ virtualDisplay =
+ projection.createVirtualDisplay(
+ "clawdis-screen",
+ width,
+ height,
+ densityDpi,
+ DisplayManager.VIRTUAL_DISPLAY_FLAG_AUTO_MIRROR,
+ surface,
+ null,
+ null,
+ )
+
+ recorder.start()
+ delay(durationMs.toLong())
+ } finally {
+ try {
+ recorder.stop()
+ } catch (_: Throwable) {
+ // ignore
+ }
+ recorder.reset()
+ recorder.release()
+ virtualDisplay?.release()
+ projection.stop()
+ }
+
+ val bytes = withContext(Dispatchers.IO) { file.readBytes() }
+ file.delete()
+ val base64 = Base64.encodeToString(bytes, Base64.NO_WRAP)
+ Payload(
+ """{"format":"mp4","base64":"$base64","durationMs":$durationMs,"fps":$fpsInt,"screenIndex":0}""",
+ )
+ }
+
+ private fun parseDurationMs(paramsJson: String?): Int? =
+ parseNumber(paramsJson, key = "durationMs")?.toIntOrNull()
+
+ private fun parseFps(paramsJson: String?): Double? =
+ parseNumber(paramsJson, key = "fps")?.toDoubleOrNull()
+
+ private fun parseScreenIndex(paramsJson: String?): Int? =
+ parseNumber(paramsJson, key = "screenIndex")?.toIntOrNull()
+
+ private fun parseNumber(paramsJson: String?, key: String): String? {
+ val raw = paramsJson ?: return null
+ val needle = "\"$key\""
+ val idx = raw.indexOf(needle)
+ if (idx < 0) return null
+ val colon = raw.indexOf(':', idx + needle.length)
+ if (colon < 0) return null
+ val tail = raw.substring(colon + 1).trimStart()
+ return tail.takeWhile { it.isDigit() || it == '.' || it == '-' }
+ }
+
+ private fun parseString(paramsJson: String?, key: String): String? {
+ val raw = paramsJson ?: return null
+ val needle = "\"$key\""
+ val idx = raw.indexOf(needle)
+ if (idx < 0) return null
+ val colon = raw.indexOf(':', idx + needle.length)
+ if (colon < 0) return null
+ val tail = raw.substring(colon + 1).trimStart()
+ if (!tail.startsWith('\"')) return null
+ val rest = tail.drop(1)
+ val end = rest.indexOf('\"')
+ if (end < 0) return null
+ return rest.substring(0, end)
+ }
+
+ private fun estimateBitrate(width: Int, height: Int, fps: Int): Int {
+ val pixels = width.toLong() * height.toLong()
+ val raw = (pixels * fps.toLong() * 2L).toInt()
+ return raw.coerceIn(1_000_000, 12_000_000)
+ }
+}
diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstants.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstants.kt
index 6494d5c79..fdd77e026 100644
--- a/apps/android/app/src/main/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstants.kt
+++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstants.kt
@@ -3,6 +3,7 @@ package com.steipete.clawdis.node.protocol
enum class ClawdisCapability(val rawValue: String) {
Canvas("canvas"),
Camera("camera"),
+ Screen("screen"),
VoiceWake("voiceWake"),
}
@@ -39,3 +40,12 @@ enum class ClawdisCameraCommand(val rawValue: String) {
const val NamespacePrefix: String = "camera."
}
}
+
+enum class ClawdisScreenCommand(val rawValue: String) {
+ Record("screen.record"),
+ ;
+
+ companion object {
+ const val NamespacePrefix: String = "screen."
+ }
+}
diff --git a/apps/android/app/src/test/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstantsTest.kt b/apps/android/app/src/test/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstantsTest.kt
index 5ea73b4b1..05b1760ba 100644
--- a/apps/android/app/src/test/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstantsTest.kt
+++ b/apps/android/app/src/test/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstantsTest.kt
@@ -24,6 +24,12 @@ class ClawdisProtocolConstantsTest {
fun capabilitiesUseStableStrings() {
assertEquals("canvas", ClawdisCapability.Canvas.rawValue)
assertEquals("camera", ClawdisCapability.Camera.rawValue)
+ assertEquals("screen", ClawdisCapability.Screen.rawValue)
assertEquals("voiceWake", ClawdisCapability.VoiceWake.rawValue)
}
+
+ @Test
+ fun screenCommandsUseStableStrings() {
+ assertEquals("screen.record", ClawdisScreenCommand.Record.rawValue)
+ }
}
diff --git a/apps/ios/Sources/Bridge/BridgeConnectionController.swift b/apps/ios/Sources/Bridge/BridgeConnectionController.swift
index e8421a2c1..d83780569 100644
--- a/apps/ios/Sources/Bridge/BridgeConnectionController.swift
+++ b/apps/ios/Sources/Bridge/BridgeConnectionController.swift
@@ -156,7 +156,7 @@ final class BridgeConnectionController {
}
private func currentCaps() -> [String] {
- var caps = [ClawdisCapability.canvas.rawValue]
+ var caps = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
// Default-on: if the key doesn't exist yet, treat it as enabled.
let cameraEnabled =
@@ -181,6 +181,7 @@ final class BridgeConnectionController {
ClawdisCanvasA2UICommand.push.rawValue,
ClawdisCanvasA2UICommand.pushJSONL.rawValue,
ClawdisCanvasA2UICommand.reset.rawValue,
+ ClawdisScreenCommand.record.rawValue,
]
let caps = Set(self.currentCaps())
diff --git a/apps/ios/Sources/Model/NodeAppModel.swift b/apps/ios/Sources/Model/NodeAppModel.swift
index ca94a727b..1cb380f3b 100644
--- a/apps/ios/Sources/Model/NodeAppModel.swift
+++ b/apps/ios/Sources/Model/NodeAppModel.swift
@@ -17,6 +17,7 @@ final class NodeAppModel {
var isBackgrounded: Bool = false
let screen = ScreenController()
let camera = CameraController()
+ private let screenRecorder = ScreenRecordService()
var bridgeStatusText: String = "Offline"
var bridgeServerName: String?
var bridgeRemoteAddress: String?
@@ -364,13 +365,15 @@ final class NodeAppModel {
private func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse {
let command = req.command
- if command.hasPrefix("canvas.") || command.hasPrefix("camera."), self.isBackgrounded {
+ if (command.hasPrefix("canvas.") || command.hasPrefix("camera.") || command.hasPrefix("screen.")),
+ self.isBackgrounded
+ {
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(
code: .backgroundUnavailable,
- message: "NODE_BACKGROUND_UNAVAILABLE: canvas/camera commands require foreground"))
+ message: "NODE_BACKGROUND_UNAVAILABLE: canvas/camera/screen commands require foreground"))
}
if command.hasPrefix("camera."), !self.isCameraEnabled() {
@@ -524,6 +527,36 @@ final class NodeAppModel {
self.showCameraHUD(text: "Clip captured", kind: .success, autoHideSeconds: 1.8)
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
+ case ClawdisScreenCommand.record.rawValue:
+ let params = (try? Self.decodeParams(ClawdisScreenRecordParams.self, from: req.paramsJSON)) ??
+ ClawdisScreenRecordParams()
+ if let format = params.format, format.lowercased() != "mp4" {
+ throw NSError(domain: "Screen", code: 30, userInfo: [
+ NSLocalizedDescriptionKey: "INVALID_REQUEST: screen format must be mp4",
+ ])
+ }
+ let path = try await self.screenRecorder.record(
+ screenIndex: params.screenIndex,
+ durationMs: params.durationMs,
+ fps: params.fps,
+ outPath: nil)
+ defer { try? FileManager.default.removeItem(atPath: path) }
+ let data = try Data(contentsOf: URL(fileURLWithPath: path))
+ struct Payload: Codable {
+ var format: String
+ var base64: String
+ var durationMs: Int?
+ var fps: Double?
+ var screenIndex: Int?
+ }
+ let payload = try Self.encodePayload(Payload(
+ format: "mp4",
+ base64: data.base64EncodedString(),
+ durationMs: params.durationMs,
+ fps: params.fps,
+ screenIndex: params.screenIndex))
+ return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
+
default:
return BridgeInvokeResponse(
id: req.id,
diff --git a/apps/ios/Sources/Screen/ScreenRecordService.swift b/apps/ios/Sources/Screen/ScreenRecordService.swift
new file mode 100644
index 000000000..54224ec26
--- /dev/null
+++ b/apps/ios/Sources/Screen/ScreenRecordService.swift
@@ -0,0 +1,205 @@
+import AVFoundation
+import UIKit
+
+@MainActor
+final class ScreenRecordService {
+ enum ScreenRecordError: LocalizedError {
+ case noWindow
+ case invalidScreenIndex(Int)
+ case captureFailed(String)
+ case writeFailed(String)
+
+ var errorDescription: String? {
+ switch self {
+ case .noWindow:
+ return "Screen capture unavailable"
+ case let .invalidScreenIndex(idx):
+ return "Invalid screen index \(idx)"
+ case let .captureFailed(msg):
+ return msg
+ case let .writeFailed(msg):
+ return msg
+ }
+ }
+ }
+
+ func record(
+ screenIndex: Int?,
+ durationMs: Int?,
+ fps: Double?,
+ outPath: String?) async throws -> String
+ {
+ let durationMs = Self.clampDurationMs(durationMs)
+ let fps = Self.clampFps(fps)
+ let fpsInt = Int32(fps.rounded())
+ let fpsValue = Double(fpsInt)
+
+ let outURL: URL = {
+ if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
+ return URL(fileURLWithPath: outPath)
+ }
+ return FileManager.default.temporaryDirectory
+ .appendingPathComponent("clawdis-screen-record-\(UUID().uuidString).mp4")
+ }()
+ try? FileManager.default.removeItem(at: outURL)
+
+ if let idx = screenIndex, idx != 0 {
+ throw ScreenRecordError.invalidScreenIndex(idx)
+ }
+
+ guard let window = Self.resolveKeyWindow() else {
+ throw ScreenRecordError.noWindow
+ }
+
+ let size = window.bounds.size
+ let scale = window.screen.scale
+ let widthPx = max(1, Int(size.width * scale))
+ let heightPx = max(1, Int(size.height * scale))
+
+ let writer = try AVAssetWriter(outputURL: outURL, fileType: .mp4)
+ let settings: [String: Any] = [
+ AVVideoCodecKey: AVVideoCodecType.h264,
+ AVVideoWidthKey: widthPx,
+ AVVideoHeightKey: heightPx,
+ ]
+ let input = AVAssetWriterInput(mediaType: .video, outputSettings: settings)
+ input.expectsMediaDataInRealTime = false
+
+ let attrs: [String: Any] = [
+ kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA,
+ kCVPixelBufferWidthKey as String: widthPx,
+ kCVPixelBufferHeightKey as String: heightPx,
+ kCVPixelBufferCGImageCompatibilityKey as String: true,
+ kCVPixelBufferCGBitmapContextCompatibilityKey as String: true,
+ ]
+ let adaptor = AVAssetWriterInputPixelBufferAdaptor(
+ assetWriterInput: input,
+ sourcePixelBufferAttributes: attrs)
+
+ guard writer.canAdd(input) else {
+ throw ScreenRecordError.writeFailed("Cannot add video input")
+ }
+ writer.add(input)
+
+ guard writer.startWriting() else {
+ throw ScreenRecordError.writeFailed(writer.error?.localizedDescription ?? "Failed to start writer")
+ }
+ writer.startSession(atSourceTime: .zero)
+
+ let frameCount = max(1, Int((Double(durationMs) / 1000.0 * fpsValue).rounded(.up)))
+ let frameDuration = CMTime(value: 1, timescale: fpsInt)
+ let frameSleepNs = UInt64(1_000_000_000.0 / fpsValue)
+
+ for frame in 0..) in
+ writer.finishWriting {
+ if let err = writer.error {
+ cont.resume(throwing: ScreenRecordError.writeFailed(err.localizedDescription))
+ } else if writer.status != .completed {
+ cont.resume(throwing: ScreenRecordError.writeFailed("Failed to finalize video"))
+ } else {
+ cont.resume()
+ }
+ }
+ }
+
+ return outURL.path
+ }
+
+ private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
+ let v = ms ?? 10_000
+ return min(60_000, max(250, v))
+ }
+
+ private nonisolated static func clampFps(_ fps: Double?) -> Double {
+ let v = fps ?? 10
+ if !v.isFinite { return 10 }
+ return min(30, max(1, v))
+ }
+
+ private nonisolated static func resolveKeyWindow() -> UIWindow? {
+ let scenes = UIApplication.shared.connectedScenes
+ for scene in scenes {
+ guard let windowScene = scene as? UIWindowScene else { continue }
+ if let window = windowScene.windows.first(where: { $0.isKeyWindow }) {
+ return window
+ }
+ if let window = windowScene.windows.first {
+ return window
+ }
+ }
+ return nil
+ }
+
+ private nonisolated static func captureImage(window: UIWindow, size: CGSize) -> CGImage? {
+ let format = UIGraphicsImageRendererFormat()
+ format.scale = window.screen.scale
+ let renderer = UIGraphicsImageRenderer(size: size, format: format)
+ let image = renderer.image { _ in
+ window.drawHierarchy(in: CGRect(origin: .zero, size: size), afterScreenUpdates: false)
+ }
+ return image.cgImage
+ }
+
+ private nonisolated static func pixelBuffer(from image: CGImage, width: Int, height: Int) -> CVPixelBuffer? {
+ var buffer: CVPixelBuffer?
+ let status = CVPixelBufferCreate(
+ kCFAllocatorDefault,
+ width,
+ height,
+ kCVPixelFormatType_32BGRA,
+ [
+ kCVPixelBufferCGImageCompatibilityKey: true,
+ kCVPixelBufferCGBitmapContextCompatibilityKey: true,
+ ] as CFDictionary,
+ &buffer)
+ guard status == kCVReturnSuccess, let buffer else { return nil }
+
+ CVPixelBufferLockBaseAddress(buffer, [])
+ defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
+
+ guard let context = CGContext(
+ data: CVPixelBufferGetBaseAddress(buffer),
+ width: width,
+ height: height,
+ bitsPerComponent: 8,
+ bytesPerRow: CVPixelBufferGetBytesPerRow(buffer),
+ space: CGColorSpaceCreateDeviceRGB(),
+ bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue
+ ) else {
+ return nil
+ }
+
+ context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
+ return buffer
+ }
+}
diff --git a/apps/ios/Sources/Settings/SettingsTab.swift b/apps/ios/Sources/Settings/SettingsTab.swift
index 09251fffa..02016243f 100644
--- a/apps/ios/Sources/Settings/SettingsTab.swift
+++ b/apps/ios/Sources/Settings/SettingsTab.swift
@@ -283,7 +283,7 @@ struct SettingsTab: View {
}
private func currentCaps() -> [String] {
- var caps = [ClawdisCapability.canvas.rawValue]
+ var caps = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
let cameraEnabled =
UserDefaults.standard.object(forKey: "camera.enabled") == nil
@@ -307,6 +307,7 @@ struct SettingsTab: View {
ClawdisCanvasA2UICommand.push.rawValue,
ClawdisCanvasA2UICommand.pushJSONL.rawValue,
ClawdisCanvasA2UICommand.reset.rawValue,
+ ClawdisScreenCommand.record.rawValue,
]
let caps = Set(self.currentCaps())
diff --git a/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift b/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift
index 41b93b5c1..1effbd89e 100644
--- a/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift
+++ b/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift
@@ -98,7 +98,7 @@ final class MacNodeModeCoordinator {
}
private func currentCaps() -> [String] {
- var caps: [String] = [ClawdisCapability.canvas.rawValue]
+ var caps: [String] = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue]
if UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false {
caps.append(ClawdisCapability.camera.rawValue)
}
diff --git a/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift b/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift
index 157e0a62f..9696d34f7 100644
--- a/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift
+++ b/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift
@@ -74,6 +74,14 @@ actor MacNodeRuntime {
return try await self.handleA2UIPush(req)
case ClawdisCameraCommand.snap.rawValue:
+ guard Self.cameraEnabled() else {
+ return BridgeInvokeResponse(
+ id: req.id,
+ ok: false,
+ error: ClawdisNodeError(
+ code: .unavailable,
+ message: "CAMERA_DISABLED: enable Camera in Settings"))
+ }
let params = (try? Self.decodeParams(ClawdisCameraSnapParams.self, from: req.paramsJSON)) ??
ClawdisCameraSnapParams()
let res = try await self.cameraCapture.snap(
@@ -94,6 +102,14 @@ actor MacNodeRuntime {
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case ClawdisCameraCommand.clip.rawValue:
+ guard Self.cameraEnabled() else {
+ return BridgeInvokeResponse(
+ id: req.id,
+ ok: false,
+ error: ClawdisNodeError(
+ code: .unavailable,
+ message: "CAMERA_DISABLED: enable Camera in Settings"))
+ }
let params = (try? Self.decodeParams(ClawdisCameraClipParams.self, from: req.paramsJSON)) ??
ClawdisCameraClipParams()
let res = try await self.cameraCapture.clip(
@@ -119,6 +135,12 @@ actor MacNodeRuntime {
case MacNodeScreenCommand.record.rawValue:
let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ??
MacNodeScreenRecordParams()
+ if let format = params.format?.lowercased(), !format.isEmpty, format != "mp4" {
+ return Self.errorResponse(
+ req,
+ code: .invalidRequest,
+ message: "INVALID_REQUEST: screen format must be mp4")
+ }
let path = try await self.screenRecorder.record(
screenIndex: params.screenIndex,
durationMs: params.durationMs,
@@ -134,7 +156,7 @@ actor MacNodeRuntime {
var screenIndex: Int?
}
let payload = try Self.encodePayload(ScreenPayload(
- format: params.format ?? "mp4",
+ format: "mp4",
base64: data.base64EncodedString(),
durationMs: params.durationMs,
fps: params.fps,
@@ -224,6 +246,10 @@ actor MacNodeRuntime {
return json
}
+ private nonisolated static func cameraEnabled() -> Bool {
+ UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false
+ }
+
private static func errorResponse(
_ req: BridgeInvokeRequest,
code: ClawdisNodeErrorCode,
diff --git a/apps/shared/ClawdisKit/Sources/ClawdisKit/Capabilities.swift b/apps/shared/ClawdisKit/Sources/ClawdisKit/Capabilities.swift
index 25f98c301..2abd1a6ec 100644
--- a/apps/shared/ClawdisKit/Sources/ClawdisKit/Capabilities.swift
+++ b/apps/shared/ClawdisKit/Sources/ClawdisKit/Capabilities.swift
@@ -3,5 +3,6 @@ import Foundation
public enum ClawdisCapability: String, Codable, Sendable {
case canvas
case camera
+ case screen
case voiceWake
}
diff --git a/docs/nodes.md b/docs/nodes.md
index 52fa095f3..d5383bc10 100644
--- a/docs/nodes.md
+++ b/docs/nodes.md
@@ -47,6 +47,13 @@ clawdis nodes canvas snapshot --node --format png
clawdis nodes canvas snapshot --node --format jpg --max-width 1200 --quality 0.9
```
+Simple shortcut (auto-picks a single connected node if possible):
+
+```bash
+clawdis canvas snapshot --format png
+clawdis canvas snapshot --format jpg --max-width 1200 --quality 0.9
+```
+
## Photos + videos (node camera)
Photos (`jpg`):
@@ -68,14 +75,19 @@ Notes:
- Clip duration is clamped (currently `<= 60s`) to avoid oversized base64 payloads.
- Android will prompt for `CAMERA`/`RECORD_AUDIO` permissions when possible; denied permissions fail with `*_PERMISSION_REQUIRED`.
-## Screen recordings (mac node)
+## Screen recordings (nodes)
-Mac node mode exposes `screen.record` (mp4). Example:
+Nodes expose `screen.record` (mp4). Example:
```bash
clawdis nodes screen record --node --duration 10s --fps 10
```
+Notes:
+- `screen.record` requires the node app to be foregrounded.
+- Android will show the system screen-capture prompt before recording.
+- Screen recordings are clamped to `<= 60s`.
+
## Mac node mode
- The macOS menubar app connects to the Gateway bridge as a node (so `clawdis nodes …` works against this Mac).
diff --git a/src/cli/canvas-cli.ts b/src/cli/canvas-cli.ts
new file mode 100644
index 000000000..f9ceb2712
--- /dev/null
+++ b/src/cli/canvas-cli.ts
@@ -0,0 +1,244 @@
+import type { Command } from "commander";
+import { callGateway, randomIdempotencyKey } from "../gateway/call.js";
+import { defaultRuntime } from "../runtime.js";
+import { writeBase64ToFile } from "./nodes-camera.js";
+import {
+ canvasSnapshotTempPath,
+ parseCanvasSnapshotPayload,
+} from "./nodes-canvas.js";
+
+type CanvasOpts = {
+ url?: string;
+ token?: string;
+ timeout?: string;
+ json?: boolean;
+ node?: string;
+ format?: string;
+ maxWidth?: string;
+ quality?: string;
+};
+
+type NodeListNode = {
+ nodeId: string;
+ displayName?: string;
+ platform?: string;
+ remoteIp?: string;
+ caps?: string[];
+ connected?: boolean;
+};
+
+type PendingRequest = {
+ requestId: string;
+ nodeId: string;
+ displayName?: string;
+ remoteIp?: string;
+};
+
+type PairedNode = {
+ nodeId: string;
+ displayName?: string;
+ remoteIp?: string;
+};
+
+type PairingList = {
+ pending: PendingRequest[];
+ paired: PairedNode[];
+};
+
+const canvasCallOpts = (cmd: Command) =>
+ cmd
+ .option("--url ", "Gateway WebSocket URL", "ws://127.0.0.1:18789")
+ .option("--token ", "Gateway token (if required)")
+ .option("--timeout ", "Timeout in ms", "10000")
+ .option("--json", "Output JSON", false);
+
+const callGatewayCli = async (
+ method: string,
+ opts: CanvasOpts,
+ params?: unknown,
+) =>
+ callGateway({
+ url: opts.url,
+ token: opts.token,
+ method,
+ params,
+ timeoutMs: Number(opts.timeout ?? 10_000),
+ clientName: "cli",
+ mode: "cli",
+ });
+
+function parseNodeList(value: unknown): NodeListNode[] {
+ const obj =
+ typeof value === "object" && value !== null
+ ? (value as Record)
+ : {};
+ return Array.isArray(obj.nodes) ? (obj.nodes as NodeListNode[]) : [];
+}
+
+function parsePairingList(value: unknown): PairingList {
+ const obj =
+ typeof value === "object" && value !== null
+ ? (value as Record)
+ : {};
+ const pending = Array.isArray(obj.pending)
+ ? (obj.pending as PendingRequest[])
+ : [];
+ const paired = Array.isArray(obj.paired) ? (obj.paired as PairedNode[]) : [];
+ return { pending, paired };
+}
+
+function normalizeNodeKey(value: string) {
+ return value
+ .toLowerCase()
+ .replace(/[^a-z0-9]+/g, "-")
+ .replace(/^-+/, "")
+ .replace(/-+$/, "");
+}
+
+async function loadNodes(opts: CanvasOpts): Promise {
+ try {
+ const res = (await callGatewayCli("node.list", opts, {})) as unknown;
+ return parseNodeList(res);
+ } catch {
+ const res = (await callGatewayCli("node.pair.list", opts, {})) as unknown;
+ const { paired } = parsePairingList(res);
+ return paired.map((n) => ({
+ nodeId: n.nodeId,
+ displayName: n.displayName,
+ remoteIp: n.remoteIp,
+ }));
+ }
+}
+
+function pickDefaultNode(nodes: NodeListNode[]): NodeListNode | null {
+ const withCanvas = nodes.filter((n) =>
+ Array.isArray(n.caps) ? n.caps.includes("canvas") : true,
+ );
+ if (withCanvas.length === 0) return null;
+
+ const connected = withCanvas.filter((n) => n.connected);
+ const candidates = connected.length > 0 ? connected : withCanvas;
+ if (candidates.length === 1) return candidates[0];
+
+ const local = candidates.filter((n) =>
+ n.platform?.toLowerCase().startsWith("mac") &&
+ typeof n.nodeId === "string" &&
+ n.nodeId.startsWith("mac-"),
+ );
+ if (local.length === 1) return local[0];
+
+ return null;
+}
+
+async function resolveNodeId(opts: CanvasOpts, query?: string) {
+ const nodes = await loadNodes(opts);
+ const q = String(query ?? "").trim();
+ if (!q) {
+ const picked = pickDefaultNode(nodes);
+ if (picked) return picked.nodeId;
+ throw new Error(
+ "node required (use --node or ensure only one connected node is available)",
+ );
+ }
+
+ const qNorm = normalizeNodeKey(q);
+ const matches = nodes.filter((n) => {
+ if (n.nodeId === q) return true;
+ if (typeof n.remoteIp === "string" && n.remoteIp === q) return true;
+ const name = typeof n.displayName === "string" ? n.displayName : "";
+ if (name && normalizeNodeKey(name) === qNorm) return true;
+ if (q.length >= 6 && n.nodeId.startsWith(q)) return true;
+ return false;
+ });
+
+ if (matches.length === 1) return matches[0].nodeId;
+ if (matches.length === 0) {
+ const known = nodes
+ .map((n) => n.displayName || n.remoteIp || n.nodeId)
+ .filter(Boolean)
+ .join(", ");
+ throw new Error(`unknown node: ${q}${known ? ` (known: ${known})` : ""}`);
+ }
+ throw new Error(
+ `ambiguous node: ${q} (matches: ${matches
+ .map((n) => n.displayName || n.remoteIp || n.nodeId)
+ .join(", ")})`,
+ );
+}
+
+function normalizeFormat(format: string) {
+ const trimmed = format.trim().toLowerCase();
+ if (trimmed === "jpg") return "jpeg";
+ return trimmed;
+}
+
+export function registerCanvasCli(program: Command) {
+ const canvas = program
+ .command("canvas")
+ .description("Render the canvas to a snapshot via nodes");
+
+ canvasCallOpts(
+ canvas
+ .command("snapshot")
+ .description("Capture a canvas snapshot (prints MEDIA:)")
+ .option("--node ", "Node id, name, or IP")
+ .option("--format ", "Output format", "png")
+ .option("--max-width ", "Max width (px)")
+ .option("--quality <0-1>", "JPEG quality (default 0.82)")
+ .action(async (opts: CanvasOpts) => {
+ try {
+ const nodeId = await resolveNodeId(opts, opts.node);
+ const format = normalizeFormat(String(opts.format ?? "png"));
+ if (format !== "png" && format !== "jpeg") {
+ throw new Error("invalid format (use png or jpg)");
+ }
+ const maxWidth = opts.maxWidth
+ ? Number.parseInt(String(opts.maxWidth), 10)
+ : undefined;
+ const quality = opts.quality
+ ? Number.parseFloat(String(opts.quality))
+ : undefined;
+
+ const raw = (await callGatewayCli("node.invoke", opts, {
+ nodeId,
+ command: "canvas.snapshot",
+ params: {
+ format,
+ maxWidth: Number.isFinite(maxWidth) ? maxWidth : undefined,
+ quality: Number.isFinite(quality) ? quality : undefined,
+ },
+ idempotencyKey: randomIdempotencyKey(),
+ })) as unknown;
+
+ const res =
+ typeof raw === "object" && raw !== null
+ ? (raw as { payload?: unknown })
+ : {};
+ const payload = parseCanvasSnapshotPayload(res.payload);
+ const filePath = canvasSnapshotTempPath({
+ ext: payload.format === "jpeg" ? "jpg" : payload.format,
+ });
+ await writeBase64ToFile(filePath, payload.base64);
+
+ if (opts.json) {
+ defaultRuntime.log(
+ JSON.stringify(
+ {
+ file: {
+ path: filePath,
+ },
+ },
+ null,
+ 2,
+ ),
+ );
+ return;
+ }
+ defaultRuntime.log(`MEDIA:${filePath}`);
+ } catch (err) {
+ defaultRuntime.error(`canvas snapshot failed: ${String(err)}`);
+ defaultRuntime.exit(1);
+ }
+ }),
+ );
+}
diff --git a/src/cli/program.test.ts b/src/cli/program.test.ts
index 98888f4c8..0fbbd7f80 100644
--- a/src/cli/program.test.ts
+++ b/src/cli/program.test.ts
@@ -607,6 +607,44 @@ describe("cli program", () => {
}
});
+ it("runs canvas snapshot and prints MEDIA path", async () => {
+ callGateway
+ .mockResolvedValueOnce({
+ ts: Date.now(),
+ nodes: [
+ {
+ nodeId: "mac-1",
+ displayName: "Mac Node",
+ platform: "macos",
+ connected: true,
+ caps: ["canvas"],
+ },
+ ],
+ })
+ .mockResolvedValueOnce({
+ ok: true,
+ nodeId: "mac-1",
+ command: "canvas.snapshot",
+ payload: { format: "png", base64: "aGk=" },
+ });
+
+ const program = buildProgram();
+ runtime.log.mockClear();
+ await program.parseAsync(["canvas", "snapshot", "--format", "png"], {
+ from: "user",
+ });
+
+ const out = String(runtime.log.mock.calls[0]?.[0] ?? "");
+ const mediaPath = out.replace(/^MEDIA:/, "").trim();
+ expect(mediaPath).toMatch(/clawdis-canvas-snapshot-.*\.png$/);
+
+ try {
+ await expect(fs.readFile(mediaPath, "utf8")).resolves.toBe("hi");
+ } finally {
+ await fs.unlink(mediaPath).catch(() => {});
+ }
+ });
+
it("fails nodes camera snap on invalid facing", async () => {
callGateway.mockResolvedValueOnce({
ts: Date.now(),
diff --git a/src/cli/program.ts b/src/cli/program.ts
index df8bbab78..b3f73e933 100644
--- a/src/cli/program.ts
+++ b/src/cli/program.ts
@@ -26,6 +26,7 @@ import { danger, info, setVerbose } from "../globals.js";
import { loginWeb, logoutWeb } from "../provider-web.js";
import { defaultRuntime } from "../runtime.js";
import { VERSION } from "../version.js";
+import { registerCanvasCli } from "./canvas-cli.js";
import { registerCronCli } from "./cron-cli.js";
import { createDefaultDeps } from "./deps.js";
import { registerDnsCli } from "./dns-cli.js";
@@ -245,6 +246,7 @@ Examples:
}
});
+ registerCanvasCli(program);
registerGatewayCli(program);
registerNodesCli(program);
registerCronCli(program);