feat: add node screen recording across apps

This commit is contained in:
Peter Steinberger
2025-12-19 02:56:48 +01:00
parent b8012a2281
commit 7f3be083c1
20 changed files with 837 additions and 22 deletions

View File

@@ -4,6 +4,7 @@
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_DATA_SYNC" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PROJECTION" />
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
<uses-permission
android:name="android.permission.NEARBY_WIFI_DEVICES"
@@ -26,7 +27,7 @@
<service
android:name=".NodeForegroundService"
android:exported="false"
android:foregroundServiceType="dataSync|microphone" />
android:foregroundServiceType="dataSync|microphone|mediaProjection" />
<activity
android:name=".MainActivity"
android:exported="true">

View File

@@ -25,6 +25,7 @@ import kotlinx.coroutines.launch
class MainActivity : ComponentActivity() {
private val viewModel: MainViewModel by viewModels()
private lateinit var permissionRequester: PermissionRequester
private lateinit var screenCaptureRequester: ScreenCaptureRequester
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
@@ -35,8 +36,10 @@ class MainActivity : ComponentActivity() {
requestNotificationPermissionIfNeeded()
NodeForegroundService.start(this)
permissionRequester = PermissionRequester(this)
screenCaptureRequester = ScreenCaptureRequester(this)
viewModel.camera.attachLifecycleOwner(this)
viewModel.camera.attachPermissionRequester(permissionRequester)
viewModel.screenRecorder.attachScreenCaptureRequester(screenCaptureRequester)
lifecycleScope.launch {
repeatOnLifecycle(Lifecycle.State.STARTED) {

View File

@@ -6,6 +6,7 @@ import com.steipete.clawdis.node.bridge.BridgeEndpoint
import com.steipete.clawdis.node.chat.OutgoingAttachment
import com.steipete.clawdis.node.node.CameraCaptureManager
import com.steipete.clawdis.node.node.CanvasController
import com.steipete.clawdis.node.node.ScreenRecordManager
import kotlinx.coroutines.flow.StateFlow
class MainViewModel(app: Application) : AndroidViewModel(app) {
@@ -13,6 +14,7 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
val canvas: CanvasController = runtime.canvas
val camera: CameraCaptureManager = runtime.camera
val screenRecorder: ScreenRecordManager = runtime.screenRecorder
val bridges: StateFlow<List<BridgeEndpoint>> = runtime.bridges
val discoveryStatusText: StateFlow<String> = runtime.discoveryStatusText

View File

@@ -17,11 +17,13 @@ import com.steipete.clawdis.node.bridge.BridgePairingClient
import com.steipete.clawdis.node.bridge.BridgeSession
import com.steipete.clawdis.node.node.CameraCaptureManager
import com.steipete.clawdis.node.node.CanvasController
import com.steipete.clawdis.node.node.ScreenRecordManager
import com.steipete.clawdis.node.protocol.ClawdisCapability
import com.steipete.clawdis.node.protocol.ClawdisCameraCommand
import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UIAction
import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UICommand
import com.steipete.clawdis.node.protocol.ClawdisCanvasCommand
import com.steipete.clawdis.node.protocol.ClawdisScreenCommand
import com.steipete.clawdis.node.voice.VoiceWakeManager
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
@@ -51,6 +53,7 @@ class NodeRuntime(context: Context) {
val prefs = SecurePrefs(appContext)
val canvas = CanvasController()
val camera = CameraCaptureManager(appContext)
val screenRecorder = ScreenRecordManager(appContext)
private val json = Json { ignoreUnknownKeys = true }
private val externalAudioCaptureActive = MutableStateFlow(false)
@@ -287,6 +290,7 @@ class NodeRuntime(context: Context) {
add(ClawdisCanvasA2UICommand.Push.rawValue)
add(ClawdisCanvasA2UICommand.PushJSONL.rawValue)
add(ClawdisCanvasA2UICommand.Reset.rawValue)
add(ClawdisScreenCommand.Record.rawValue)
if (cameraEnabled.value) {
add(ClawdisCameraCommand.Snap.rawValue)
add(ClawdisCameraCommand.Clip.rawValue)
@@ -294,17 +298,18 @@ class NodeRuntime(context: Context) {
}
val resolved =
if (storedToken.isNullOrBlank()) {
_statusText.value = "Pairing…"
val caps = buildList {
add(ClawdisCapability.Canvas.rawValue)
if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
add(ClawdisCapability.VoiceWake.rawValue)
}
}
BridgePairingClient().pairAndHello(
endpoint = endpoint,
hello =
_statusText.value = "Pairing…"
val caps = buildList {
add(ClawdisCapability.Canvas.rawValue)
add(ClawdisCapability.Screen.rawValue)
if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
add(ClawdisCapability.VoiceWake.rawValue)
}
}
BridgePairingClient().pairAndHello(
endpoint = endpoint,
hello =
BridgePairingClient.Hello(
nodeId = instanceId.value,
displayName = displayName.value,
@@ -342,6 +347,7 @@ class NodeRuntime(context: Context) {
caps =
buildList {
add(ClawdisCapability.Canvas.rawValue)
add(ClawdisCapability.Screen.rawValue)
if (cameraEnabled.value) add(ClawdisCapability.Camera.rawValue)
if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) {
add(ClawdisCapability.VoiceWake.rawValue)
@@ -534,12 +540,13 @@ class NodeRuntime(context: Context) {
if (
command.startsWith(ClawdisCanvasCommand.NamespacePrefix) ||
command.startsWith(ClawdisCanvasA2UICommand.NamespacePrefix) ||
command.startsWith(ClawdisCameraCommand.NamespacePrefix)
command.startsWith(ClawdisCameraCommand.NamespacePrefix) ||
command.startsWith(ClawdisScreenCommand.NamespacePrefix)
) {
if (!isForeground.value) {
return BridgeSession.InvokeResult.error(
code = "NODE_BACKGROUND_UNAVAILABLE",
message = "NODE_BACKGROUND_UNAVAILABLE: canvas/camera commands require foreground",
message = "NODE_BACKGROUND_UNAVAILABLE: canvas/camera/screen commands require foreground",
)
}
}
@@ -649,6 +656,16 @@ class NodeRuntime(context: Context) {
if (includeAudio) externalAudioCaptureActive.value = false
}
}
ClawdisScreenCommand.Record.rawValue -> {
val res =
try {
screenRecorder.record(paramsJson)
} catch (err: Throwable) {
val (code, message) = invokeErrorFromThrowable(err)
return BridgeSession.InvokeResult.error(code = code, message = message)
}
BridgeSession.InvokeResult.ok(res.payloadJson)
}
else ->
BridgeSession.InvokeResult.error(
code = "INVALID_REQUEST",

View File

@@ -2,6 +2,7 @@ package com.steipete.clawdis.node
import android.content.pm.PackageManager
import android.content.Intent
import android.Manifest
import android.net.Uri
import android.provider.Settings
import androidx.appcompat.app.AlertDialog

View File

@@ -0,0 +1,65 @@
package com.steipete.clawdis.node
import android.app.Activity
import android.content.Context
import android.content.Intent
import android.media.projection.MediaProjectionManager
import androidx.activity.ComponentActivity
import androidx.activity.result.ActivityResultLauncher
import androidx.activity.result.contract.ActivityResultContracts
import androidx.appcompat.app.AlertDialog
import kotlinx.coroutines.CompletableDeferred
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import kotlinx.coroutines.withContext
import kotlinx.coroutines.withTimeout
import kotlinx.coroutines.suspendCancellableCoroutine
import kotlin.coroutines.resume
class ScreenCaptureRequester(private val activity: ComponentActivity) {
data class CaptureResult(val resultCode: Int, val data: Intent)
private val mutex = Mutex()
private var pending: CompletableDeferred<CaptureResult?>? = null
private val launcher: ActivityResultLauncher<Intent> =
activity.registerForActivityResult(ActivityResultContracts.StartActivityForResult()) { result ->
val p = pending
pending = null
val data = result.data
if (result.resultCode == Activity.RESULT_OK && data != null) {
p?.complete(CaptureResult(result.resultCode, data))
} else {
p?.complete(null)
}
}
suspend fun requestCapture(timeoutMs: Long = 20_000): CaptureResult? =
mutex.withLock {
val proceed = showRationaleDialog()
if (!proceed) return null
val mgr = activity.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager
val intent = mgr.createScreenCaptureIntent()
val deferred = CompletableDeferred<CaptureResult?>()
pending = deferred
withContext(Dispatchers.Main) { launcher.launch(intent) }
withContext(Dispatchers.Default) { withTimeout(timeoutMs) { deferred.await() } }
}
private suspend fun showRationaleDialog(): Boolean =
withContext(Dispatchers.Main) {
suspendCancellableCoroutine { cont ->
AlertDialog.Builder(activity)
.setTitle("Screen recording required")
.setMessage("Clawdis needs to record the screen for this command.")
.setPositiveButton("Continue") { _, _ -> cont.resume(true) }
.setNegativeButton("Not now") { _, _ -> cont.resume(false) }
.setOnCancelListener { cont.resume(false) }
.show()
}
}
}

View File

@@ -0,0 +1,147 @@
package com.steipete.clawdis.node.node
import android.content.Context
import android.hardware.display.DisplayManager
import android.media.MediaRecorder
import android.media.projection.MediaProjectionManager
import android.util.Base64
import com.steipete.clawdis.node.ScreenCaptureRequester
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay
import kotlinx.coroutines.withContext
import java.io.File
import kotlin.math.roundToInt
class ScreenRecordManager(private val context: Context) {
data class Payload(val payloadJson: String)
@Volatile private var screenCaptureRequester: ScreenCaptureRequester? = null
fun attachScreenCaptureRequester(requester: ScreenCaptureRequester) {
screenCaptureRequester = requester
}
suspend fun record(paramsJson: String?): Payload =
withContext(Dispatchers.Default) {
val requester =
screenCaptureRequester
?: throw IllegalStateException(
"SCREEN_PERMISSION_REQUIRED: grant Screen Recording permission",
)
val durationMs = (parseDurationMs(paramsJson) ?: 10_000).coerceIn(250, 60_000)
val fps = (parseFps(paramsJson) ?: 10.0).coerceIn(1.0, 60.0)
val fpsInt = fps.roundToInt().coerceIn(1, 60)
val screenIndex = parseScreenIndex(paramsJson)
val format = parseString(paramsJson, key = "format")
if (format != null && format.lowercase() != "mp4") {
throw IllegalArgumentException("INVALID_REQUEST: screen format must be mp4")
}
if (screenIndex != null && screenIndex != 0) {
throw IllegalArgumentException("INVALID_REQUEST: screenIndex must be 0 on Android")
}
val capture = requester.requestCapture()
?: throw IllegalStateException(
"SCREEN_PERMISSION_REQUIRED: grant Screen Recording permission",
)
val mgr =
context.getSystemService(Context.MEDIA_PROJECTION_SERVICE) as MediaProjectionManager
val projection = mgr.getMediaProjection(capture.resultCode, capture.data)
?: throw IllegalStateException("UNAVAILABLE: screen capture unavailable")
val metrics = context.resources.displayMetrics
val width = metrics.widthPixels
val height = metrics.heightPixels
val densityDpi = metrics.densityDpi
val file = File.createTempFile("clawdis-screen-", ".mp4")
val recorder = MediaRecorder()
var virtualDisplay: android.hardware.display.VirtualDisplay? = null
try {
recorder.setVideoSource(MediaRecorder.VideoSource.SURFACE)
recorder.setOutputFormat(MediaRecorder.OutputFormat.MPEG_4)
recorder.setVideoEncoder(MediaRecorder.VideoEncoder.H264)
recorder.setVideoSize(width, height)
recorder.setVideoFrameRate(fpsInt)
recorder.setVideoEncodingBitRate(estimateBitrate(width, height, fpsInt))
recorder.setOutputFile(file.absolutePath)
recorder.prepare()
val surface = recorder.surface
virtualDisplay =
projection.createVirtualDisplay(
"clawdis-screen",
width,
height,
densityDpi,
DisplayManager.VIRTUAL_DISPLAY_FLAG_AUTO_MIRROR,
surface,
null,
null,
)
recorder.start()
delay(durationMs.toLong())
} finally {
try {
recorder.stop()
} catch (_: Throwable) {
// ignore
}
recorder.reset()
recorder.release()
virtualDisplay?.release()
projection.stop()
}
val bytes = withContext(Dispatchers.IO) { file.readBytes() }
file.delete()
val base64 = Base64.encodeToString(bytes, Base64.NO_WRAP)
Payload(
"""{"format":"mp4","base64":"$base64","durationMs":$durationMs,"fps":$fpsInt,"screenIndex":0}""",
)
}
private fun parseDurationMs(paramsJson: String?): Int? =
parseNumber(paramsJson, key = "durationMs")?.toIntOrNull()
private fun parseFps(paramsJson: String?): Double? =
parseNumber(paramsJson, key = "fps")?.toDoubleOrNull()
private fun parseScreenIndex(paramsJson: String?): Int? =
parseNumber(paramsJson, key = "screenIndex")?.toIntOrNull()
private fun parseNumber(paramsJson: String?, key: String): String? {
val raw = paramsJson ?: return null
val needle = "\"$key\""
val idx = raw.indexOf(needle)
if (idx < 0) return null
val colon = raw.indexOf(':', idx + needle.length)
if (colon < 0) return null
val tail = raw.substring(colon + 1).trimStart()
return tail.takeWhile { it.isDigit() || it == '.' || it == '-' }
}
private fun parseString(paramsJson: String?, key: String): String? {
val raw = paramsJson ?: return null
val needle = "\"$key\""
val idx = raw.indexOf(needle)
if (idx < 0) return null
val colon = raw.indexOf(':', idx + needle.length)
if (colon < 0) return null
val tail = raw.substring(colon + 1).trimStart()
if (!tail.startsWith('\"')) return null
val rest = tail.drop(1)
val end = rest.indexOf('\"')
if (end < 0) return null
return rest.substring(0, end)
}
private fun estimateBitrate(width: Int, height: Int, fps: Int): Int {
val pixels = width.toLong() * height.toLong()
val raw = (pixels * fps.toLong() * 2L).toInt()
return raw.coerceIn(1_000_000, 12_000_000)
}
}

View File

@@ -3,6 +3,7 @@ package com.steipete.clawdis.node.protocol
enum class ClawdisCapability(val rawValue: String) {
Canvas("canvas"),
Camera("camera"),
Screen("screen"),
VoiceWake("voiceWake"),
}
@@ -39,3 +40,12 @@ enum class ClawdisCameraCommand(val rawValue: String) {
const val NamespacePrefix: String = "camera."
}
}
enum class ClawdisScreenCommand(val rawValue: String) {
Record("screen.record"),
;
companion object {
const val NamespacePrefix: String = "screen."
}
}

View File

@@ -24,6 +24,12 @@ class ClawdisProtocolConstantsTest {
fun capabilitiesUseStableStrings() {
assertEquals("canvas", ClawdisCapability.Canvas.rawValue)
assertEquals("camera", ClawdisCapability.Camera.rawValue)
assertEquals("screen", ClawdisCapability.Screen.rawValue)
assertEquals("voiceWake", ClawdisCapability.VoiceWake.rawValue)
}
@Test
fun screenCommandsUseStableStrings() {
assertEquals("screen.record", ClawdisScreenCommand.Record.rawValue)
}
}