diff --git a/apps/android/app/src/main/AndroidManifest.xml b/apps/android/app/src/main/AndroidManifest.xml index 0588e4c4e..e8547b4eb 100644 --- a/apps/android/app/src/main/AndroidManifest.xml +++ b/apps/android/app/src/main/AndroidManifest.xml @@ -4,6 +4,7 @@ + + android:foregroundServiceType="dataSync|microphone|mediaProjection" /> diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/MainActivity.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/MainActivity.kt index 6478f7fba..609c08bf8 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/MainActivity.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/MainActivity.kt @@ -25,6 +25,7 @@ import kotlinx.coroutines.launch class MainActivity : ComponentActivity() { private val viewModel: MainViewModel by viewModels() private lateinit var permissionRequester: PermissionRequester + private lateinit var screenCaptureRequester: ScreenCaptureRequester override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) @@ -35,8 +36,10 @@ class MainActivity : ComponentActivity() { requestNotificationPermissionIfNeeded() NodeForegroundService.start(this) permissionRequester = PermissionRequester(this) + screenCaptureRequester = ScreenCaptureRequester(this) viewModel.camera.attachLifecycleOwner(this) viewModel.camera.attachPermissionRequester(permissionRequester) + viewModel.screenRecorder.attachScreenCaptureRequester(screenCaptureRequester) lifecycleScope.launch { repeatOnLifecycle(Lifecycle.State.STARTED) { diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt index 216a37aee..245e0156c 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt @@ -6,6 +6,7 @@ import com.steipete.clawdis.node.bridge.BridgeEndpoint import com.steipete.clawdis.node.chat.OutgoingAttachment import com.steipete.clawdis.node.node.CameraCaptureManager import com.steipete.clawdis.node.node.CanvasController +import com.steipete.clawdis.node.node.ScreenRecordManager import kotlinx.coroutines.flow.StateFlow class MainViewModel(app: Application) : AndroidViewModel(app) { @@ -13,6 +14,7 @@ class MainViewModel(app: Application) : AndroidViewModel(app) { val canvas: CanvasController = runtime.canvas val camera: CameraCaptureManager = runtime.camera + val screenRecorder: ScreenRecordManager = runtime.screenRecorder val bridges: StateFlow> = runtime.bridges val discoveryStatusText: StateFlow = runtime.discoveryStatusText diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt index c1cd428b5..5f6b4ec12 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt @@ -17,11 +17,13 @@ import com.steipete.clawdis.node.bridge.BridgePairingClient import com.steipete.clawdis.node.bridge.BridgeSession import com.steipete.clawdis.node.node.CameraCaptureManager import com.steipete.clawdis.node.node.CanvasController +import com.steipete.clawdis.node.node.ScreenRecordManager import com.steipete.clawdis.node.protocol.ClawdisCapability import com.steipete.clawdis.node.protocol.ClawdisCameraCommand import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UIAction import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UICommand import com.steipete.clawdis.node.protocol.ClawdisCanvasCommand +import com.steipete.clawdis.node.protocol.ClawdisScreenCommand import com.steipete.clawdis.node.voice.VoiceWakeManager import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers @@ -51,6 +53,7 @@ class NodeRuntime(context: Context) { val prefs = SecurePrefs(appContext) val canvas = CanvasController() val camera = CameraCaptureManager(appContext) + val screenRecorder = ScreenRecordManager(appContext) private val json = Json { ignoreUnknownKeys = true } private val externalAudioCaptureActive = MutableStateFlow(false) @@ -287,6 +290,7 @@ class NodeRuntime(context: Context) { add(ClawdisCanvasA2UICommand.Push.rawValue) add(ClawdisCanvasA2UICommand.PushJSONL.rawValue) add(ClawdisCanvasA2UICommand.Reset.rawValue) + add(ClawdisScreenCommand.Record.rawValue) if (cameraEnabled.value) { add(ClawdisCameraCommand.Snap.rawValue) add(ClawdisCameraCommand.Clip.rawValue) @@ -294,17 +298,18 @@ class NodeRuntime(context: Context) { } val resolved = if (storedToken.isNullOrBlank()) { - _statusText.value = "Pairing…" - val caps = buildList { - add(ClawdisCapability.Canvas.rawValue) - if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue) - if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) { - add(ClawdisCapability.VoiceWake.rawValue) - } - } - BridgePairingClient().pairAndHello( - endpoint = endpoint, - hello = + _statusText.value = "Pairing…" + val caps = buildList { + add(ClawdisCapability.Canvas.rawValue) + add(ClawdisCapability.Screen.rawValue) + if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue) + if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) { + add(ClawdisCapability.VoiceWake.rawValue) + } + } + BridgePairingClient().pairAndHello( + endpoint = endpoint, + hello = BridgePairingClient.Hello( nodeId = instanceId.value, displayName = displayName.value, @@ -342,6 +347,7 @@ class NodeRuntime(context: Context) { caps = buildList { add(ClawdisCapability.Canvas.rawValue) + add(ClawdisCapability.Screen.rawValue) if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue) if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) { add(ClawdisCapability.VoiceWake.rawValue) @@ -534,12 +540,13 @@ class NodeRuntime(context: Context) { if ( command.startsWith(ClawdisCanvasCommand.NamespacePrefix) || command.startsWith(ClawdisCanvasA2UICommand.NamespacePrefix) || - command.startsWith(ClawdisCameraCommand.NamespacePrefix) + command.startsWith(ClawdisCameraCommand.NamespacePrefix) || + command.startsWith(ClawdisScreenCommand.NamespacePrefix) ) { if (!isForeground.value) { return BridgeSession.InvokeResult.error( code = "NODE_BACKGROUND_UNAVAILABLE", - message = "NODE_BACKGROUND_UNAVAILABLE: canvas/camera commands require foreground", + message = "NODE_BACKGROUND_UNAVAILABLE: canvas/camera/screen commands require foreground", ) } } @@ -649,6 +656,16 @@ class NodeRuntime(context: Context) { if (includeAudio) externalAudioCaptureActive.value = false } } + ClawdisScreenCommand.Record.rawValue -> { + val res = + try { + screenRecorder.record(paramsJson) + } catch (err: Throwable) { + val (code, message) = invokeErrorFromThrowable(err) + return BridgeSession.InvokeResult.error(code = code, message = message) + } + BridgeSession.InvokeResult.ok(res.payloadJson) + } else -> BridgeSession.InvokeResult.error( code = "INVALID_REQUEST", diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/PermissionRequester.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/PermissionRequester.kt index 8c6f65e56..7879c85d5 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/PermissionRequester.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/PermissionRequester.kt @@ -2,6 +2,7 @@ package com.steipete.clawdis.node import android.content.pm.PackageManager import android.content.Intent +import android.Manifest import android.net.Uri import android.provider.Settings import androidx.appcompat.app.AlertDialog diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/ScreenCaptureRequester.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/ScreenCaptureRequester.kt new file mode 100644 index 000000000..53c9e173c --- /dev/null +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/ScreenCaptureRequester.kt @@ -0,0 +1,65 @@ +package com.steipete.clawdis.node + +import android.app.Activity +import android.content.Context +import android.content.Intent +import android.media.projection.MediaProjectionManager +import androidx.activity.ComponentActivity +import androidx.activity.result.ActivityResultLauncher +import androidx.activity.result.contract.ActivityResultContracts +import androidx.appcompat.app.AlertDialog +import kotlinx.coroutines.CompletableDeferred +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock +import kotlinx.coroutines.withContext +import kotlinx.coroutines.withTimeout +import kotlinx.coroutines.suspendCancellableCoroutine +import kotlin.coroutines.resume + +class ScreenCaptureRequester(private val activity: ComponentActivity) { + data class CaptureResult(val resultCode: Int, val data: Intent) + + private val mutex = Mutex() + private var pending: CompletableDeferred? = null + + private val launcher: ActivityResultLauncher = + activity.registerForActivityResult(ActivityResultContracts.StartActivityForResult()) { result -> + val p = pending + pending = null + val data = result.data + if (result.resultCode == Activity.RESULT_OK && data != null) { + p?.complete(CaptureResult(result.resultCode, data)) + } else { + p?.complete(null) + } + } + + suspend fun requestCapture(timeoutMs: Long = 20_000): CaptureResult? = + mutex.withLock { + val proceed = showRationaleDialog() + if (!proceed) return null + + val mgr = activity.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager + val intent = mgr.createScreenCaptureIntent() + + val deferred = CompletableDeferred() + pending = deferred + withContext(Dispatchers.Main) { launcher.launch(intent) } + + withContext(Dispatchers.Default) { withTimeout(timeoutMs) { deferred.await() } } + } + + private suspend fun showRationaleDialog(): Boolean = + withContext(Dispatchers.Main) { + suspendCancellableCoroutine { cont -> + AlertDialog.Builder(activity) + .setTitle("Screen recording required") + .setMessage("Clawdis needs to record the screen for this command.") + .setPositiveButton("Continue") { _, _ -> cont.resume(true) } + .setNegativeButton("Not now") { _, _ -> cont.resume(false) } + .setOnCancelListener { cont.resume(false) } + .show() + } + } +} diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/node/ScreenRecordManager.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/node/ScreenRecordManager.kt new file mode 100644 index 000000000..4de891ff3 --- /dev/null +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/node/ScreenRecordManager.kt @@ -0,0 +1,147 @@ +package com.steipete.clawdis.node.node + +import android.content.Context +import android.hardware.display.DisplayManager +import android.media.MediaRecorder +import android.media.projection.MediaProjectionManager +import android.util.Base64 +import com.steipete.clawdis.node.ScreenCaptureRequester +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.delay +import kotlinx.coroutines.withContext +import java.io.File +import kotlin.math.roundToInt + +class ScreenRecordManager(private val context: Context) { + data class Payload(val payloadJson: String) + + @Volatile private var screenCaptureRequester: ScreenCaptureRequester? = null + + fun attachScreenCaptureRequester(requester: ScreenCaptureRequester) { + screenCaptureRequester = requester + } + + suspend fun record(paramsJson: String?): Payload = + withContext(Dispatchers.Default) { + val requester = + screenCaptureRequester + ?: throw IllegalStateException( + "SCREEN_PERMISSION_REQUIRED: grant Screen Recording permission", + ) + + val durationMs = (parseDurationMs(paramsJson) ?: 10_000).coerceIn(250, 60_000) + val fps = (parseFps(paramsJson) ?: 10.0).coerceIn(1.0, 60.0) + val fpsInt = fps.roundToInt().coerceIn(1, 60) + val screenIndex = parseScreenIndex(paramsJson) + val format = parseString(paramsJson, key = "format") + if (format != null && format.lowercase() != "mp4") { + throw IllegalArgumentException("INVALID_REQUEST: screen format must be mp4") + } + if (screenIndex != null && screenIndex != 0) { + throw IllegalArgumentException("INVALID_REQUEST: screenIndex must be 0 on Android") + } + + val capture = requester.requestCapture() + ?: throw IllegalStateException( + "SCREEN_PERMISSION_REQUIRED: grant Screen Recording permission", + ) + + val mgr = + context.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager + val projection = mgr.getMediaProjection(capture.resultCode, capture.data) + ?: throw IllegalStateException("UNAVAILABLE: screen capture unavailable") + + val metrics = context.resources.displayMetrics + val width = metrics.widthPixels + val height = metrics.heightPixels + val densityDpi = metrics.densityDpi + + val file = File.createTempFile("clawdis-screen-", ".mp4") + val recorder = MediaRecorder() + var virtualDisplay: android.hardware.display.VirtualDisplay? = null + try { + recorder.setVideoSource(MediaRecorder.VideoSource.SURFACE) + recorder.setOutputFormat(MediaRecorder.OutputFormat.MPEG_4) + recorder.setVideoEncoder(MediaRecorder.VideoEncoder.H264) + recorder.setVideoSize(width, height) + recorder.setVideoFrameRate(fpsInt) + recorder.setVideoEncodingBitRate(estimateBitrate(width, height, fpsInt)) + recorder.setOutputFile(file.absolutePath) + recorder.prepare() + + val surface = recorder.surface + virtualDisplay = + projection.createVirtualDisplay( + "clawdis-screen", + width, + height, + densityDpi, + DisplayManager.VIRTUAL_DISPLAY_FLAG_AUTO_MIRROR, + surface, + null, + null, + ) + + recorder.start() + delay(durationMs.toLong()) + } finally { + try { + recorder.stop() + } catch (_: Throwable) { + // ignore + } + recorder.reset() + recorder.release() + virtualDisplay?.release() + projection.stop() + } + + val bytes = withContext(Dispatchers.IO) { file.readBytes() } + file.delete() + val base64 = Base64.encodeToString(bytes, Base64.NO_WRAP) + Payload( + """{"format":"mp4","base64":"$base64","durationMs":$durationMs,"fps":$fpsInt,"screenIndex":0}""", + ) + } + + private fun parseDurationMs(paramsJson: String?): Int? = + parseNumber(paramsJson, key = "durationMs")?.toIntOrNull() + + private fun parseFps(paramsJson: String?): Double? = + parseNumber(paramsJson, key = "fps")?.toDoubleOrNull() + + private fun parseScreenIndex(paramsJson: String?): Int? = + parseNumber(paramsJson, key = "screenIndex")?.toIntOrNull() + + private fun parseNumber(paramsJson: String?, key: String): String? { + val raw = paramsJson ?: return null + val needle = "\"$key\"" + val idx = raw.indexOf(needle) + if (idx < 0) return null + val colon = raw.indexOf(':', idx + needle.length) + if (colon < 0) return null + val tail = raw.substring(colon + 1).trimStart() + return tail.takeWhile { it.isDigit() || it == '.' || it == '-' } + } + + private fun parseString(paramsJson: String?, key: String): String? { + val raw = paramsJson ?: return null + val needle = "\"$key\"" + val idx = raw.indexOf(needle) + if (idx < 0) return null + val colon = raw.indexOf(':', idx + needle.length) + if (colon < 0) return null + val tail = raw.substring(colon + 1).trimStart() + if (!tail.startsWith('\"')) return null + val rest = tail.drop(1) + val end = rest.indexOf('\"') + if (end < 0) return null + return rest.substring(0, end) + } + + private fun estimateBitrate(width: Int, height: Int, fps: Int): Int { + val pixels = width.toLong() * height.toLong() + val raw = (pixels * fps.toLong() * 2L).toInt() + return raw.coerceIn(1_000_000, 12_000_000) + } +} diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstants.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstants.kt index 6494d5c79..fdd77e026 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstants.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstants.kt @@ -3,6 +3,7 @@ package com.steipete.clawdis.node.protocol enum class ClawdisCapability(val rawValue: String) { Canvas("canvas"), Camera("camera"), + Screen("screen"), VoiceWake("voiceWake"), } @@ -39,3 +40,12 @@ enum class ClawdisCameraCommand(val rawValue: String) { const val NamespacePrefix: String = "camera." } } + +enum class ClawdisScreenCommand(val rawValue: String) { + Record("screen.record"), + ; + + companion object { + const val NamespacePrefix: String = "screen." + } +} diff --git a/apps/android/app/src/test/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstantsTest.kt b/apps/android/app/src/test/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstantsTest.kt index 5ea73b4b1..05b1760ba 100644 --- a/apps/android/app/src/test/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstantsTest.kt +++ b/apps/android/app/src/test/java/com/steipete/clawdis/node/protocol/ClawdisProtocolConstantsTest.kt @@ -24,6 +24,12 @@ class ClawdisProtocolConstantsTest { fun capabilitiesUseStableStrings() { assertEquals("canvas", ClawdisCapability.Canvas.rawValue) assertEquals("camera", ClawdisCapability.Camera.rawValue) + assertEquals("screen", ClawdisCapability.Screen.rawValue) assertEquals("voiceWake", ClawdisCapability.VoiceWake.rawValue) } + + @Test + fun screenCommandsUseStableStrings() { + assertEquals("screen.record", ClawdisScreenCommand.Record.rawValue) + } } diff --git a/apps/ios/Sources/Bridge/BridgeConnectionController.swift b/apps/ios/Sources/Bridge/BridgeConnectionController.swift index e8421a2c1..d83780569 100644 --- a/apps/ios/Sources/Bridge/BridgeConnectionController.swift +++ b/apps/ios/Sources/Bridge/BridgeConnectionController.swift @@ -156,7 +156,7 @@ final class BridgeConnectionController { } private func currentCaps() -> [String] { - var caps = [ClawdisCapability.canvas.rawValue] + var caps = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue] // Default-on: if the key doesn't exist yet, treat it as enabled. let cameraEnabled = @@ -181,6 +181,7 @@ final class BridgeConnectionController { ClawdisCanvasA2UICommand.push.rawValue, ClawdisCanvasA2UICommand.pushJSONL.rawValue, ClawdisCanvasA2UICommand.reset.rawValue, + ClawdisScreenCommand.record.rawValue, ] let caps = Set(self.currentCaps()) diff --git a/apps/ios/Sources/Model/NodeAppModel.swift b/apps/ios/Sources/Model/NodeAppModel.swift index ca94a727b..1cb380f3b 100644 --- a/apps/ios/Sources/Model/NodeAppModel.swift +++ b/apps/ios/Sources/Model/NodeAppModel.swift @@ -17,6 +17,7 @@ final class NodeAppModel { var isBackgrounded: Bool = false let screen = ScreenController() let camera = CameraController() + private let screenRecorder = ScreenRecordService() var bridgeStatusText: String = "Offline" var bridgeServerName: String? var bridgeRemoteAddress: String? @@ -364,13 +365,15 @@ final class NodeAppModel { private func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse { let command = req.command - if command.hasPrefix("canvas.") || command.hasPrefix("camera."), self.isBackgrounded { + if (command.hasPrefix("canvas.") || command.hasPrefix("camera.") || command.hasPrefix("screen.")), + self.isBackgrounded + { return BridgeInvokeResponse( id: req.id, ok: false, error: ClawdisNodeError( code: .backgroundUnavailable, - message: "NODE_BACKGROUND_UNAVAILABLE: canvas/camera commands require foreground")) + message: "NODE_BACKGROUND_UNAVAILABLE: canvas/camera/screen commands require foreground")) } if command.hasPrefix("camera."), !self.isCameraEnabled() { @@ -524,6 +527,36 @@ final class NodeAppModel { self.showCameraHUD(text: "Clip captured", kind: .success, autoHideSeconds: 1.8) return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + case ClawdisScreenCommand.record.rawValue: + let params = (try? Self.decodeParams(ClawdisScreenRecordParams.self, from: req.paramsJSON)) ?? + ClawdisScreenRecordParams() + if let format = params.format, format.lowercased() != "mp4" { + throw NSError(domain: "Screen", code: 30, userInfo: [ + NSLocalizedDescriptionKey: "INVALID_REQUEST: screen format must be mp4", + ]) + } + let path = try await self.screenRecorder.record( + screenIndex: params.screenIndex, + durationMs: params.durationMs, + fps: params.fps, + outPath: nil) + defer { try? FileManager.default.removeItem(atPath: path) } + let data = try Data(contentsOf: URL(fileURLWithPath: path)) + struct Payload: Codable { + var format: String + var base64: String + var durationMs: Int? + var fps: Double? + var screenIndex: Int? + } + let payload = try Self.encodePayload(Payload( + format: "mp4", + base64: data.base64EncodedString(), + durationMs: params.durationMs, + fps: params.fps, + screenIndex: params.screenIndex)) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + default: return BridgeInvokeResponse( id: req.id, diff --git a/apps/ios/Sources/Screen/ScreenRecordService.swift b/apps/ios/Sources/Screen/ScreenRecordService.swift new file mode 100644 index 000000000..54224ec26 --- /dev/null +++ b/apps/ios/Sources/Screen/ScreenRecordService.swift @@ -0,0 +1,205 @@ +import AVFoundation +import UIKit + +@MainActor +final class ScreenRecordService { + enum ScreenRecordError: LocalizedError { + case noWindow + case invalidScreenIndex(Int) + case captureFailed(String) + case writeFailed(String) + + var errorDescription: String? { + switch self { + case .noWindow: + return "Screen capture unavailable" + case let .invalidScreenIndex(idx): + return "Invalid screen index \(idx)" + case let .captureFailed(msg): + return msg + case let .writeFailed(msg): + return msg + } + } + } + + func record( + screenIndex: Int?, + durationMs: Int?, + fps: Double?, + outPath: String?) async throws -> String + { + let durationMs = Self.clampDurationMs(durationMs) + let fps = Self.clampFps(fps) + let fpsInt = Int32(fps.rounded()) + let fpsValue = Double(fpsInt) + + let outURL: URL = { + if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + return URL(fileURLWithPath: outPath) + } + return FileManager.default.temporaryDirectory + .appendingPathComponent("clawdis-screen-record-\(UUID().uuidString).mp4") + }() + try? FileManager.default.removeItem(at: outURL) + + if let idx = screenIndex, idx != 0 { + throw ScreenRecordError.invalidScreenIndex(idx) + } + + guard let window = Self.resolveKeyWindow() else { + throw ScreenRecordError.noWindow + } + + let size = window.bounds.size + let scale = window.screen.scale + let widthPx = max(1, Int(size.width * scale)) + let heightPx = max(1, Int(size.height * scale)) + + let writer = try AVAssetWriter(outputURL: outURL, fileType: .mp4) + let settings: [String: Any] = [ + AVVideoCodecKey: AVVideoCodecType.h264, + AVVideoWidthKey: widthPx, + AVVideoHeightKey: heightPx, + ] + let input = AVAssetWriterInput(mediaType: .video, outputSettings: settings) + input.expectsMediaDataInRealTime = false + + let attrs: [String: Any] = [ + kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA, + kCVPixelBufferWidthKey as String: widthPx, + kCVPixelBufferHeightKey as String: heightPx, + kCVPixelBufferCGImageCompatibilityKey as String: true, + kCVPixelBufferCGBitmapContextCompatibilityKey as String: true, + ] + let adaptor = AVAssetWriterInputPixelBufferAdaptor( + assetWriterInput: input, + sourcePixelBufferAttributes: attrs) + + guard writer.canAdd(input) else { + throw ScreenRecordError.writeFailed("Cannot add video input") + } + writer.add(input) + + guard writer.startWriting() else { + throw ScreenRecordError.writeFailed(writer.error?.localizedDescription ?? "Failed to start writer") + } + writer.startSession(atSourceTime: .zero) + + let frameCount = max(1, Int((Double(durationMs) / 1000.0 * fpsValue).rounded(.up))) + let frameDuration = CMTime(value: 1, timescale: fpsInt) + let frameSleepNs = UInt64(1_000_000_000.0 / fpsValue) + + for frame in 0..) in + writer.finishWriting { + if let err = writer.error { + cont.resume(throwing: ScreenRecordError.writeFailed(err.localizedDescription)) + } else if writer.status != .completed { + cont.resume(throwing: ScreenRecordError.writeFailed("Failed to finalize video")) + } else { + cont.resume() + } + } + } + + return outURL.path + } + + private nonisolated static func clampDurationMs(_ ms: Int?) -> Int { + let v = ms ?? 10_000 + return min(60_000, max(250, v)) + } + + private nonisolated static func clampFps(_ fps: Double?) -> Double { + let v = fps ?? 10 + if !v.isFinite { return 10 } + return min(30, max(1, v)) + } + + private nonisolated static func resolveKeyWindow() -> UIWindow? { + let scenes = UIApplication.shared.connectedScenes + for scene in scenes { + guard let windowScene = scene as? UIWindowScene else { continue } + if let window = windowScene.windows.first(where: { $0.isKeyWindow }) { + return window + } + if let window = windowScene.windows.first { + return window + } + } + return nil + } + + private nonisolated static func captureImage(window: UIWindow, size: CGSize) -> CGImage? { + let format = UIGraphicsImageRendererFormat() + format.scale = window.screen.scale + let renderer = UIGraphicsImageRenderer(size: size, format: format) + let image = renderer.image { _ in + window.drawHierarchy(in: CGRect(origin: .zero, size: size), afterScreenUpdates: false) + } + return image.cgImage + } + + private nonisolated static func pixelBuffer(from image: CGImage, width: Int, height: Int) -> CVPixelBuffer? { + var buffer: CVPixelBuffer? + let status = CVPixelBufferCreate( + kCFAllocatorDefault, + width, + height, + kCVPixelFormatType_32BGRA, + [ + kCVPixelBufferCGImageCompatibilityKey: true, + kCVPixelBufferCGBitmapContextCompatibilityKey: true, + ] as CFDictionary, + &buffer) + guard status == kCVReturnSuccess, let buffer else { return nil } + + CVPixelBufferLockBaseAddress(buffer, []) + defer { CVPixelBufferUnlockBaseAddress(buffer, []) } + + guard let context = CGContext( + data: CVPixelBufferGetBaseAddress(buffer), + width: width, + height: height, + bitsPerComponent: 8, + bytesPerRow: CVPixelBufferGetBytesPerRow(buffer), + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue + ) else { + return nil + } + + context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height)) + return buffer + } +} diff --git a/apps/ios/Sources/Settings/SettingsTab.swift b/apps/ios/Sources/Settings/SettingsTab.swift index 09251fffa..02016243f 100644 --- a/apps/ios/Sources/Settings/SettingsTab.swift +++ b/apps/ios/Sources/Settings/SettingsTab.swift @@ -283,7 +283,7 @@ struct SettingsTab: View { } private func currentCaps() -> [String] { - var caps = [ClawdisCapability.canvas.rawValue] + var caps = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue] let cameraEnabled = UserDefaults.standard.object(forKey: "camera.enabled") == nil @@ -307,6 +307,7 @@ struct SettingsTab: View { ClawdisCanvasA2UICommand.push.rawValue, ClawdisCanvasA2UICommand.pushJSONL.rawValue, ClawdisCanvasA2UICommand.reset.rawValue, + ClawdisScreenCommand.record.rawValue, ] let caps = Set(self.currentCaps()) diff --git a/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift b/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift index 41b93b5c1..1effbd89e 100644 --- a/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift +++ b/apps/macos/Sources/Clawdis/NodeMode/MacNodeModeCoordinator.swift @@ -98,7 +98,7 @@ final class MacNodeModeCoordinator { } private func currentCaps() -> [String] { - var caps: [String] = [ClawdisCapability.canvas.rawValue] + var caps: [String] = [ClawdisCapability.canvas.rawValue, ClawdisCapability.screen.rawValue] if UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false { caps.append(ClawdisCapability.camera.rawValue) } diff --git a/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift b/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift index 157e0a62f..9696d34f7 100644 --- a/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift +++ b/apps/macos/Sources/Clawdis/NodeMode/MacNodeRuntime.swift @@ -74,6 +74,14 @@ actor MacNodeRuntime { return try await self.handleA2UIPush(req) case ClawdisCameraCommand.snap.rawValue: + guard Self.cameraEnabled() else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdisNodeError( + code: .unavailable, + message: "CAMERA_DISABLED: enable Camera in Settings")) + } let params = (try? Self.decodeParams(ClawdisCameraSnapParams.self, from: req.paramsJSON)) ?? ClawdisCameraSnapParams() let res = try await self.cameraCapture.snap( @@ -94,6 +102,14 @@ actor MacNodeRuntime { return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) case ClawdisCameraCommand.clip.rawValue: + guard Self.cameraEnabled() else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdisNodeError( + code: .unavailable, + message: "CAMERA_DISABLED: enable Camera in Settings")) + } let params = (try? Self.decodeParams(ClawdisCameraClipParams.self, from: req.paramsJSON)) ?? ClawdisCameraClipParams() let res = try await self.cameraCapture.clip( @@ -119,6 +135,12 @@ actor MacNodeRuntime { case MacNodeScreenCommand.record.rawValue: let params = (try? Self.decodeParams(MacNodeScreenRecordParams.self, from: req.paramsJSON)) ?? MacNodeScreenRecordParams() + if let format = params.format?.lowercased(), !format.isEmpty, format != "mp4" { + return Self.errorResponse( + req, + code: .invalidRequest, + message: "INVALID_REQUEST: screen format must be mp4") + } let path = try await self.screenRecorder.record( screenIndex: params.screenIndex, durationMs: params.durationMs, @@ -134,7 +156,7 @@ actor MacNodeRuntime { var screenIndex: Int? } let payload = try Self.encodePayload(ScreenPayload( - format: params.format ?? "mp4", + format: "mp4", base64: data.base64EncodedString(), durationMs: params.durationMs, fps: params.fps, @@ -224,6 +246,10 @@ actor MacNodeRuntime { return json } + private nonisolated static func cameraEnabled() -> Bool { + UserDefaults.standard.object(forKey: cameraEnabledKey) as? Bool ?? false + } + private static func errorResponse( _ req: BridgeInvokeRequest, code: ClawdisNodeErrorCode, diff --git a/apps/shared/ClawdisKit/Sources/ClawdisKit/Capabilities.swift b/apps/shared/ClawdisKit/Sources/ClawdisKit/Capabilities.swift index 25f98c301..2abd1a6ec 100644 --- a/apps/shared/ClawdisKit/Sources/ClawdisKit/Capabilities.swift +++ b/apps/shared/ClawdisKit/Sources/ClawdisKit/Capabilities.swift @@ -3,5 +3,6 @@ import Foundation public enum ClawdisCapability: String, Codable, Sendable { case canvas case camera + case screen case voiceWake } diff --git a/docs/nodes.md b/docs/nodes.md index 52fa095f3..d5383bc10 100644 --- a/docs/nodes.md +++ b/docs/nodes.md @@ -47,6 +47,13 @@ clawdis nodes canvas snapshot --node --format png clawdis nodes canvas snapshot --node --format jpg --max-width 1200 --quality 0.9 ``` +Simple shortcut (auto-picks a single connected node if possible): + +```bash +clawdis canvas snapshot --format png +clawdis canvas snapshot --format jpg --max-width 1200 --quality 0.9 +``` + ## Photos + videos (node camera) Photos (`jpg`): @@ -68,14 +75,19 @@ Notes: - Clip duration is clamped (currently `<= 60s`) to avoid oversized base64 payloads. - Android will prompt for `CAMERA`/`RECORD_AUDIO` permissions when possible; denied permissions fail with `*_PERMISSION_REQUIRED`. -## Screen recordings (mac node) +## Screen recordings (nodes) -Mac node mode exposes `screen.record` (mp4). Example: +Nodes expose `screen.record` (mp4). Example: ```bash clawdis nodes screen record --node --duration 10s --fps 10 ``` +Notes: +- `screen.record` requires the node app to be foregrounded. +- Android will show the system screen-capture prompt before recording. +- Screen recordings are clamped to `<= 60s`. + ## Mac node mode - The macOS menubar app connects to the Gateway bridge as a node (so `clawdis nodes …` works against this Mac). diff --git a/src/cli/canvas-cli.ts b/src/cli/canvas-cli.ts new file mode 100644 index 000000000..f9ceb2712 --- /dev/null +++ b/src/cli/canvas-cli.ts @@ -0,0 +1,244 @@ +import type { Command } from "commander"; +import { callGateway, randomIdempotencyKey } from "../gateway/call.js"; +import { defaultRuntime } from "../runtime.js"; +import { writeBase64ToFile } from "./nodes-camera.js"; +import { + canvasSnapshotTempPath, + parseCanvasSnapshotPayload, +} from "./nodes-canvas.js"; + +type CanvasOpts = { + url?: string; + token?: string; + timeout?: string; + json?: boolean; + node?: string; + format?: string; + maxWidth?: string; + quality?: string; +}; + +type NodeListNode = { + nodeId: string; + displayName?: string; + platform?: string; + remoteIp?: string; + caps?: string[]; + connected?: boolean; +}; + +type PendingRequest = { + requestId: string; + nodeId: string; + displayName?: string; + remoteIp?: string; +}; + +type PairedNode = { + nodeId: string; + displayName?: string; + remoteIp?: string; +}; + +type PairingList = { + pending: PendingRequest[]; + paired: PairedNode[]; +}; + +const canvasCallOpts = (cmd: Command) => + cmd + .option("--url ", "Gateway WebSocket URL", "ws://127.0.0.1:18789") + .option("--token ", "Gateway token (if required)") + .option("--timeout ", "Timeout in ms", "10000") + .option("--json", "Output JSON", false); + +const callGatewayCli = async ( + method: string, + opts: CanvasOpts, + params?: unknown, +) => + callGateway({ + url: opts.url, + token: opts.token, + method, + params, + timeoutMs: Number(opts.timeout ?? 10_000), + clientName: "cli", + mode: "cli", + }); + +function parseNodeList(value: unknown): NodeListNode[] { + const obj = + typeof value === "object" && value !== null + ? (value as Record) + : {}; + return Array.isArray(obj.nodes) ? (obj.nodes as NodeListNode[]) : []; +} + +function parsePairingList(value: unknown): PairingList { + const obj = + typeof value === "object" && value !== null + ? (value as Record) + : {}; + const pending = Array.isArray(obj.pending) + ? (obj.pending as PendingRequest[]) + : []; + const paired = Array.isArray(obj.paired) ? (obj.paired as PairedNode[]) : []; + return { pending, paired }; +} + +function normalizeNodeKey(value: string) { + return value + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+/, "") + .replace(/-+$/, ""); +} + +async function loadNodes(opts: CanvasOpts): Promise { + try { + const res = (await callGatewayCli("node.list", opts, {})) as unknown; + return parseNodeList(res); + } catch { + const res = (await callGatewayCli("node.pair.list", opts, {})) as unknown; + const { paired } = parsePairingList(res); + return paired.map((n) => ({ + nodeId: n.nodeId, + displayName: n.displayName, + remoteIp: n.remoteIp, + })); + } +} + +function pickDefaultNode(nodes: NodeListNode[]): NodeListNode | null { + const withCanvas = nodes.filter((n) => + Array.isArray(n.caps) ? n.caps.includes("canvas") : true, + ); + if (withCanvas.length === 0) return null; + + const connected = withCanvas.filter((n) => n.connected); + const candidates = connected.length > 0 ? connected : withCanvas; + if (candidates.length === 1) return candidates[0]; + + const local = candidates.filter((n) => + n.platform?.toLowerCase().startsWith("mac") && + typeof n.nodeId === "string" && + n.nodeId.startsWith("mac-"), + ); + if (local.length === 1) return local[0]; + + return null; +} + +async function resolveNodeId(opts: CanvasOpts, query?: string) { + const nodes = await loadNodes(opts); + const q = String(query ?? "").trim(); + if (!q) { + const picked = pickDefaultNode(nodes); + if (picked) return picked.nodeId; + throw new Error( + "node required (use --node or ensure only one connected node is available)", + ); + } + + const qNorm = normalizeNodeKey(q); + const matches = nodes.filter((n) => { + if (n.nodeId === q) return true; + if (typeof n.remoteIp === "string" && n.remoteIp === q) return true; + const name = typeof n.displayName === "string" ? n.displayName : ""; + if (name && normalizeNodeKey(name) === qNorm) return true; + if (q.length >= 6 && n.nodeId.startsWith(q)) return true; + return false; + }); + + if (matches.length === 1) return matches[0].nodeId; + if (matches.length === 0) { + const known = nodes + .map((n) => n.displayName || n.remoteIp || n.nodeId) + .filter(Boolean) + .join(", "); + throw new Error(`unknown node: ${q}${known ? ` (known: ${known})` : ""}`); + } + throw new Error( + `ambiguous node: ${q} (matches: ${matches + .map((n) => n.displayName || n.remoteIp || n.nodeId) + .join(", ")})`, + ); +} + +function normalizeFormat(format: string) { + const trimmed = format.trim().toLowerCase(); + if (trimmed === "jpg") return "jpeg"; + return trimmed; +} + +export function registerCanvasCli(program: Command) { + const canvas = program + .command("canvas") + .description("Render the canvas to a snapshot via nodes"); + + canvasCallOpts( + canvas + .command("snapshot") + .description("Capture a canvas snapshot (prints MEDIA:)") + .option("--node ", "Node id, name, or IP") + .option("--format ", "Output format", "png") + .option("--max-width ", "Max width (px)") + .option("--quality <0-1>", "JPEG quality (default 0.82)") + .action(async (opts: CanvasOpts) => { + try { + const nodeId = await resolveNodeId(opts, opts.node); + const format = normalizeFormat(String(opts.format ?? "png")); + if (format !== "png" && format !== "jpeg") { + throw new Error("invalid format (use png or jpg)"); + } + const maxWidth = opts.maxWidth + ? Number.parseInt(String(opts.maxWidth), 10) + : undefined; + const quality = opts.quality + ? Number.parseFloat(String(opts.quality)) + : undefined; + + const raw = (await callGatewayCli("node.invoke", opts, { + nodeId, + command: "canvas.snapshot", + params: { + format, + maxWidth: Number.isFinite(maxWidth) ? maxWidth : undefined, + quality: Number.isFinite(quality) ? quality : undefined, + }, + idempotencyKey: randomIdempotencyKey(), + })) as unknown; + + const res = + typeof raw === "object" && raw !== null + ? (raw as { payload?: unknown }) + : {}; + const payload = parseCanvasSnapshotPayload(res.payload); + const filePath = canvasSnapshotTempPath({ + ext: payload.format === "jpeg" ? "jpg" : payload.format, + }); + await writeBase64ToFile(filePath, payload.base64); + + if (opts.json) { + defaultRuntime.log( + JSON.stringify( + { + file: { + path: filePath, + }, + }, + null, + 2, + ), + ); + return; + } + defaultRuntime.log(`MEDIA:${filePath}`); + } catch (err) { + defaultRuntime.error(`canvas snapshot failed: ${String(err)}`); + defaultRuntime.exit(1); + } + }), + ); +} diff --git a/src/cli/program.test.ts b/src/cli/program.test.ts index 98888f4c8..0fbbd7f80 100644 --- a/src/cli/program.test.ts +++ b/src/cli/program.test.ts @@ -607,6 +607,44 @@ describe("cli program", () => { } }); + it("runs canvas snapshot and prints MEDIA path", async () => { + callGateway + .mockResolvedValueOnce({ + ts: Date.now(), + nodes: [ + { + nodeId: "mac-1", + displayName: "Mac Node", + platform: "macos", + connected: true, + caps: ["canvas"], + }, + ], + }) + .mockResolvedValueOnce({ + ok: true, + nodeId: "mac-1", + command: "canvas.snapshot", + payload: { format: "png", base64: "aGk=" }, + }); + + const program = buildProgram(); + runtime.log.mockClear(); + await program.parseAsync(["canvas", "snapshot", "--format", "png"], { + from: "user", + }); + + const out = String(runtime.log.mock.calls[0]?.[0] ?? ""); + const mediaPath = out.replace(/^MEDIA:/, "").trim(); + expect(mediaPath).toMatch(/clawdis-canvas-snapshot-.*\.png$/); + + try { + await expect(fs.readFile(mediaPath, "utf8")).resolves.toBe("hi"); + } finally { + await fs.unlink(mediaPath).catch(() => {}); + } + }); + it("fails nodes camera snap on invalid facing", async () => { callGateway.mockResolvedValueOnce({ ts: Date.now(), diff --git a/src/cli/program.ts b/src/cli/program.ts index df8bbab78..b3f73e933 100644 --- a/src/cli/program.ts +++ b/src/cli/program.ts @@ -26,6 +26,7 @@ import { danger, info, setVerbose } from "../globals.js"; import { loginWeb, logoutWeb } from "../provider-web.js"; import { defaultRuntime } from "../runtime.js"; import { VERSION } from "../version.js"; +import { registerCanvasCli } from "./canvas-cli.js"; import { registerCronCli } from "./cron-cli.js"; import { createDefaultDeps } from "./deps.js"; import { registerDnsCli } from "./dns-cli.js"; @@ -245,6 +246,7 @@ Examples: } }); + registerCanvasCli(program); registerGatewayCli(program); registerNodesCli(program); registerCronCli(program);