From 60321352aa69285cd68f3923d647683042f3238c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 18 Dec 2025 02:08:57 +0100 Subject: [PATCH] Android: add Voice Wake (foreground/always) --- apps/android/app/src/main/AndroidManifest.xml | 3 +- .../steipete/clawdis/node/MainViewModel.kt | 7 + .../clawdis/node/NodeForegroundService.kt | 70 ++++++- .../com/steipete/clawdis/node/NodeRuntime.kt | 93 +++++++++- .../com/steipete/clawdis/node/SecurePrefs.kt | 23 +++ .../steipete/clawdis/node/VoiceWakeMode.kt | 15 ++ .../steipete/clawdis/node/ui/SettingsSheet.kt | 70 +++++++ .../node/voice/VoiceWakeCommandExtractor.kt | 40 ++++ .../clawdis/node/voice/VoiceWakeManager.kt | 173 ++++++++++++++++++ .../node/bridge/BridgePairingClientTest.kt | 2 + .../clawdis/node/bridge/BridgeSessionTest.kt | 9 + .../voice/VoiceWakeCommandExtractorTest.kt | 26 +++ apps/android/gradle.properties | 4 +- apps/android/gradlew | 2 +- apps/android/gradlew.bat | 2 +- 15 files changed, 522 insertions(+), 17 deletions(-) create mode 100644 apps/android/app/src/main/java/com/steipete/clawdis/node/VoiceWakeMode.kt create mode 100644 apps/android/app/src/main/java/com/steipete/clawdis/node/voice/VoiceWakeCommandExtractor.kt create mode 100644 apps/android/app/src/main/java/com/steipete/clawdis/node/voice/VoiceWakeManager.kt create mode 100644 apps/android/app/src/test/java/com/steipete/clawdis/node/voice/VoiceWakeCommandExtractorTest.kt diff --git a/apps/android/app/src/main/AndroidManifest.xml b/apps/android/app/src/main/AndroidManifest.xml index 530e7f689..7906ca052 100644 --- a/apps/android/app/src/main/AndroidManifest.xml +++ b/apps/android/app/src/main/AndroidManifest.xml @@ -3,6 +3,7 @@ + + android:foregroundServiceType="dataSync|microphone" /> diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt index 14ce19b87..1fe4b9877 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/MainViewModel.kt @@ -27,6 +27,9 @@ class MainViewModel(app: Application) : AndroidViewModel(app) { val cameraEnabled: StateFlow = runtime.cameraEnabled val preventSleep: StateFlow = runtime.preventSleep val wakeWords: StateFlow> = runtime.wakeWords + val voiceWakeMode: StateFlow = runtime.voiceWakeMode + val voiceWakeStatusText: StateFlow = runtime.voiceWakeStatusText + val voiceWakeIsListening: StateFlow = runtime.voiceWakeIsListening val manualEnabled: StateFlow = runtime.manualEnabled val manualHost: StateFlow = runtime.manualHost val manualPort: StateFlow = runtime.manualPort @@ -78,6 +81,10 @@ class MainViewModel(app: Application) : AndroidViewModel(app) { runtime.resetWakeWordsDefaults() } + fun setVoiceWakeMode(mode: VoiceWakeMode) { + runtime.setVoiceWakeMode(mode) + } + fun connect(endpoint: BridgeEndpoint) { runtime.connect(endpoint) } diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeForegroundService.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeForegroundService.kt index 660e20e75..86c31e491 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeForegroundService.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeForegroundService.kt @@ -5,11 +5,14 @@ import android.app.NotificationChannel import android.app.NotificationManager import android.app.Service import android.app.PendingIntent +import android.Manifest import android.content.Context import android.content.Intent +import android.content.pm.PackageManager import android.content.pm.ServiceInfo import android.os.Build import androidx.core.app.NotificationCompat +import androidx.core.content.ContextCompat import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job @@ -21,26 +24,42 @@ import kotlinx.coroutines.launch class NodeForegroundService : Service() { private val scope: CoroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Main) private var notificationJob: Job? = null + private var lastRequiresMic = false + private var didStartForeground = false override fun onCreate() { super.onCreate() ensureChannel() val initial = buildNotification(title = "Clawdis Node", text = "Starting…") - if (Build.VERSION.SDK_INT >= 29) { - startForeground(NOTIFICATION_ID, initial, ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC) - } else { - startForeground(NOTIFICATION_ID, initial) - } + startForegroundWithTypes(notification = initial, requiresMic = false) val runtime = (application as NodeApp).runtime notificationJob = scope.launch { - combine(runtime.statusText, runtime.serverName, runtime.isConnected) { status, server, connected -> - Triple(status, server, connected) - }.collect { (status, server, connected) -> + combine( + runtime.statusText, + runtime.serverName, + runtime.isConnected, + runtime.voiceWakeMode, + runtime.voiceWakeIsListening, + ) { status, server, connected, voiceMode, voiceListening -> + Quint(status, server, connected, voiceMode, voiceListening) + }.collect { (status, server, connected, voiceMode, voiceListening) -> val title = if (connected) "Clawdis Node · Connected" else "Clawdis Node" - val text = server?.let { "$status · $it" } ?: status - updateNotification(buildNotification(title = title, text = text)) + val voiceSuffix = + if (voiceMode == VoiceWakeMode.Always) { + if (voiceListening) " · Voice Wake: Listening" else " · Voice Wake: Paused" + } else { + "" + } + val text = (server?.let { "$status · $it" } ?: status) + voiceSuffix + + val requiresMic = + voiceMode == VoiceWakeMode.Always && hasRecordAudioPermission() + startForegroundWithTypes( + notification = buildNotification(title = title, text = text), + requiresMic = requiresMic, + ) } } } @@ -106,6 +125,35 @@ class NodeForegroundService : Service() { mgr.notify(NOTIFICATION_ID, notification) } + private fun startForegroundWithTypes(notification: Notification, requiresMic: Boolean) { + if (Build.VERSION.SDK_INT < 29) { + startForeground(NOTIFICATION_ID, notification) + return + } + + if (didStartForeground && requiresMic == lastRequiresMic) { + updateNotification(notification) + return + } + + lastRequiresMic = requiresMic + val types = + if (requiresMic) { + ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC or ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE + } else { + ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC + } + startForeground(NOTIFICATION_ID, notification, types) + didStartForeground = true + } + + private fun hasRecordAudioPermission(): Boolean { + return ( + ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) == + PackageManager.PERMISSION_GRANTED + ) + } + companion object { private const val CHANNEL_ID = "connection" private const val NOTIFICATION_ID = 1 @@ -127,3 +175,5 @@ class NodeForegroundService : Service() { } } } + +private data class Quint(val first: A, val second: B, val third: C, val fourth: D, val fifth: E) diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt index 8f43a621e..61110a578 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/NodeRuntime.kt @@ -1,7 +1,10 @@ package com.steipete.clawdis.node +import android.Manifest import android.content.Context +import android.content.pm.PackageManager import android.os.Build +import androidx.core.content.ContextCompat import com.steipete.clawdis.node.chat.ChatController import com.steipete.clawdis.node.chat.ChatMessage import com.steipete.clawdis.node.chat.ChatPendingToolCall @@ -13,6 +16,7 @@ import com.steipete.clawdis.node.bridge.BridgePairingClient import com.steipete.clawdis.node.bridge.BridgeSession import com.steipete.clawdis.node.node.CameraCaptureManager import com.steipete.clawdis.node.node.CanvasController +import com.steipete.clawdis.node.voice.VoiceWakeManager import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job @@ -21,7 +25,9 @@ import kotlinx.coroutines.delay import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.flow.asStateFlow +import kotlinx.coroutines.flow.combine import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.flow.distinctUntilChanged import kotlinx.coroutines.launch import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonArray @@ -29,6 +35,7 @@ import kotlinx.serialization.json.JsonElement import kotlinx.serialization.json.JsonNull import kotlinx.serialization.json.JsonObject import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.buildJsonObject class NodeRuntime(context: Context) { private val appContext = context.applicationContext @@ -39,6 +46,33 @@ class NodeRuntime(context: Context) { val camera = CameraCaptureManager(appContext) private val json = Json { ignoreUnknownKeys = true } + private val externalAudioCaptureActive = MutableStateFlow(false) + + private val voiceWake: VoiceWakeManager by lazy { + VoiceWakeManager( + context = appContext, + scope = scope, + onCommand = { command -> + session.sendEvent( + event = "agent.request", + payloadJson = + buildJsonObject { + put("message", JsonPrimitive(command)) + put("sessionKey", JsonPrimitive("main")) + put("thinking", JsonPrimitive(chatThinkingLevel.value)) + put("deliver", JsonPrimitive(false)) + }.toString(), + ) + }, + ) + } + + val voiceWakeIsListening: StateFlow + get() = voiceWake.isListening + + val voiceWakeStatusText: StateFlow + get() = voiceWake.statusText + private val discovery = BridgeDiscovery(appContext, scope = scope) val bridges: StateFlow> = discovery.bridges val discoveryStatusText: StateFlow = discovery.statusText @@ -92,6 +126,7 @@ class NodeRuntime(context: Context) { val cameraEnabled: StateFlow = prefs.cameraEnabled val preventSleep: StateFlow = prefs.preventSleep val wakeWords: StateFlow> = prefs.wakeWords + val voiceWakeMode: StateFlow = prefs.voiceWakeMode val manualEnabled: StateFlow = prefs.manualEnabled val manualHost: StateFlow = prefs.manualHost val manualPort: StateFlow = prefs.manualPort @@ -113,6 +148,39 @@ class NodeRuntime(context: Context) { val pendingRunCount: StateFlow = chat.pendingRunCount init { + scope.launch { + combine( + voiceWakeMode, + isForeground, + externalAudioCaptureActive, + wakeWords, + ) { mode, foreground, externalAudio, words -> + Quad(mode, foreground, externalAudio, words) + }.distinctUntilChanged() + .collect { (mode, foreground, externalAudio, words) -> + voiceWake.setTriggerWords(words) + + val shouldListen = + when (mode) { + VoiceWakeMode.Off -> false + VoiceWakeMode.Foreground -> foreground + VoiceWakeMode.Always -> true + } && !externalAudio + + if (!shouldListen) { + voiceWake.stop(statusText = if (mode == VoiceWakeMode.Off) "Off" else "Paused") + return@collect + } + + if (!hasRecordAudioPermission()) { + voiceWake.stop(statusText = "Microphone permission required") + return@collect + } + + voiceWake.start() + } + } + scope.launch(Dispatchers.Default) { bridges.collect { list -> if (list.isNotEmpty()) { @@ -182,6 +250,10 @@ class NodeRuntime(context: Context) { setWakeWords(SecurePrefs.defaultWakeWords) } + fun setVoiceWakeMode(mode: VoiceWakeMode) { + prefs.setVoiceWakeMode(mode) + } + fun connect(endpoint: BridgeEndpoint) { scope.launch { _statusText.value = "Connecting…" @@ -196,6 +268,7 @@ class NodeRuntime(context: Context) { val caps = buildList { add("canvas") if (cameraEnabled.value) add("camera") + if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) add("voiceWake") } BridgePairingClient().pairAndHello( endpoint = endpoint, @@ -237,12 +310,20 @@ class NodeRuntime(context: Context) { buildList { add("canvas") if (cameraEnabled.value) add("camera") + if (voiceWakeMode.value != VoiceWakeMode.Off && hasRecordAudioPermission()) add("voiceWake") }, ), ) } } + private fun hasRecordAudioPermission(): Boolean { + return ( + ContextCompat.checkSelfPermission(appContext, Manifest.permission.RECORD_AUDIO) == + PackageManager.PERMISSION_GRANTED + ) + } + fun connectManual() { val host = manualHost.value.trim() val port = manualPort.value @@ -405,8 +486,14 @@ class NodeRuntime(context: Context) { BridgeSession.InvokeResult.ok(res.payloadJson) } "camera.clip" -> { - val res = camera.clip(paramsJson) - BridgeSession.InvokeResult.ok(res.payloadJson) + val includeAudio = paramsJson?.contains("\"includeAudio\":true") != false + if (includeAudio) externalAudioCaptureActive.value = true + try { + val res = camera.clip(paramsJson) + BridgeSession.InvokeResult.ok(res.payloadJson) + } finally { + if (includeAudio) externalAudioCaptureActive.value = false + } } else -> BridgeSession.InvokeResult.error( @@ -417,6 +504,8 @@ class NodeRuntime(context: Context) { } } +private data class Quad(val first: A, val second: B, val third: C, val fourth: D) + private fun String.toJsonString(): String { val escaped = this.replace("\\", "\\\\") diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/SecurePrefs.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/SecurePrefs.kt index eb25fa508..51545095f 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/SecurePrefs.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/SecurePrefs.kt @@ -1,3 +1,5 @@ +@file:Suppress("DEPRECATION") + package com.steipete.clawdis.node import android.content.Context @@ -15,6 +17,7 @@ class SecurePrefs(context: Context) { companion object { val defaultWakeWords: List = listOf("clawd", "claude") private const val displayNameKey = "node.displayName" + private const val voiceWakeModeKey = "voiceWake.mode" } private val json = Json { ignoreUnknownKeys = true } @@ -62,6 +65,9 @@ class SecurePrefs(context: Context) { private val _wakeWords = MutableStateFlow(loadWakeWords()) val wakeWords: StateFlow> = _wakeWords + private val _voiceWakeMode = MutableStateFlow(loadVoiceWakeMode()) + val voiceWakeMode: StateFlow = _voiceWakeMode + fun setLastDiscoveredStableId(value: String) { val trimmed = value.trim() prefs.edit().putString("bridge.lastDiscoveredStableId", trimmed).apply() @@ -137,6 +143,23 @@ class SecurePrefs(context: Context) { _wakeWords.value = sanitized } + fun setVoiceWakeMode(mode: VoiceWakeMode) { + prefs.edit().putString(voiceWakeModeKey, mode.rawValue).apply() + _voiceWakeMode.value = mode + } + + private fun loadVoiceWakeMode(): VoiceWakeMode { + val raw = prefs.getString(voiceWakeModeKey, null) + val resolved = VoiceWakeMode.fromRawValue(raw) + + // Default ON (foreground) when unset. + if (raw.isNullOrBlank()) { + prefs.edit().putString(voiceWakeModeKey, resolved.rawValue).apply() + } + + return resolved + } + private fun loadWakeWords(): List { val raw = prefs.getString("voiceWake.triggerWords", null)?.trim() if (raw.isNullOrEmpty()) return defaultWakeWords diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/VoiceWakeMode.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/VoiceWakeMode.kt new file mode 100644 index 000000000..0b97a85cb --- /dev/null +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/VoiceWakeMode.kt @@ -0,0 +1,15 @@ +package com.steipete.clawdis.node + +enum class VoiceWakeMode(val rawValue: String) { + Off("off"), + Foreground("foreground"), + Always("always"), + ; + + companion object { + fun fromRawValue(raw: String?): VoiceWakeMode { + return entries.firstOrNull { it.rawValue == raw?.trim()?.lowercase() } ?: Foreground + } + } +} + diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/ui/SettingsSheet.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/ui/SettingsSheet.kt index e271aa5c7..2266a2dc7 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/ui/SettingsSheet.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/ui/SettingsSheet.kt @@ -32,6 +32,7 @@ import androidx.compose.material3.Icon import androidx.compose.material3.ListItem import androidx.compose.material3.MaterialTheme import androidx.compose.material3.OutlinedTextField +import androidx.compose.material3.RadioButton import androidx.compose.material3.Switch import androidx.compose.material3.Text import androidx.compose.runtime.Composable @@ -47,6 +48,7 @@ import androidx.compose.ui.unit.dp import androidx.core.content.ContextCompat import com.steipete.clawdis.node.MainViewModel import com.steipete.clawdis.node.NodeForegroundService +import com.steipete.clawdis.node.VoiceWakeMode @Composable fun SettingsSheet(viewModel: MainViewModel) { @@ -56,6 +58,8 @@ fun SettingsSheet(viewModel: MainViewModel) { val cameraEnabled by viewModel.cameraEnabled.collectAsState() val preventSleep by viewModel.preventSleep.collectAsState() val wakeWords by viewModel.wakeWords.collectAsState() + val voiceWakeMode by viewModel.voiceWakeMode.collectAsState() + val voiceWakeStatusText by viewModel.voiceWakeStatusText.collectAsState() val isConnected by viewModel.isConnected.collectAsState() val manualEnabled by viewModel.manualEnabled.collectAsState() val manualHost by viewModel.manualHost.collectAsState() @@ -78,6 +82,11 @@ fun SettingsSheet(viewModel: MainViewModel) { viewModel.setCameraEnabled(cameraOk) } + val audioPermissionLauncher = + rememberLauncherForActivityResult(ActivityResultContracts.RequestPermission()) { _ -> + // Status text is handled by NodeRuntime. + } + fun setCameraEnabledChecked(checked: Boolean) { if (!checked) { viewModel.setCameraEnabled(false) @@ -242,6 +251,67 @@ fun SettingsSheet(viewModel: MainViewModel) { // Voice item { Text("Voice", style = MaterialTheme.typography.titleSmall) } + item { + val enabled = voiceWakeMode != VoiceWakeMode.Off + ListItem( + headlineContent = { Text("Voice Wake") }, + supportingContent = { Text(voiceWakeStatusText) }, + trailingContent = { + Switch( + checked = enabled, + onCheckedChange = { on -> + if (on) { + val micOk = + ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) == + PackageManager.PERMISSION_GRANTED + if (!micOk) audioPermissionLauncher.launch(Manifest.permission.RECORD_AUDIO) + viewModel.setVoiceWakeMode(VoiceWakeMode.Foreground) + } else { + viewModel.setVoiceWakeMode(VoiceWakeMode.Off) + } + }, + ) + }, + ) + } + item { + AnimatedVisibility(visible = voiceWakeMode != VoiceWakeMode.Off) { + Column(verticalArrangement = Arrangement.spacedBy(6.dp), modifier = Modifier.fillMaxWidth()) { + ListItem( + headlineContent = { Text("Foreground Only") }, + supportingContent = { Text("Listens only while Clawdis is open.") }, + trailingContent = { + RadioButton( + selected = voiceWakeMode == VoiceWakeMode.Foreground, + onClick = { + val micOk = + ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) == + PackageManager.PERMISSION_GRANTED + if (!micOk) audioPermissionLauncher.launch(Manifest.permission.RECORD_AUDIO) + viewModel.setVoiceWakeMode(VoiceWakeMode.Foreground) + }, + ) + }, + ) + ListItem( + headlineContent = { Text("Always") }, + supportingContent = { Text("Keeps listening in the background (shows a persistent notification).") }, + trailingContent = { + RadioButton( + selected = voiceWakeMode == VoiceWakeMode.Always, + onClick = { + val micOk = + ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) == + PackageManager.PERMISSION_GRANTED + if (!micOk) audioPermissionLauncher.launch(Manifest.permission.RECORD_AUDIO) + viewModel.setVoiceWakeMode(VoiceWakeMode.Always) + }, + ) + }, + ) + } + } + } item { OutlinedTextField( value = wakeWordsText, diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/voice/VoiceWakeCommandExtractor.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/voice/VoiceWakeCommandExtractor.kt new file mode 100644 index 000000000..6564e0e3c --- /dev/null +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/voice/VoiceWakeCommandExtractor.kt @@ -0,0 +1,40 @@ +package com.steipete.clawdis.node.voice + +object VoiceWakeCommandExtractor { + fun extractCommand(text: String, triggerWords: List): String? { + val raw = text.trim() + if (raw.isEmpty()) return null + + val triggers = + triggerWords + .map { it.trim().lowercase() } + .filter { it.isNotEmpty() } + .distinct() + if (triggers.isEmpty()) return null + + val alternation = triggers.joinToString("|") { Regex.escape(it) } + // Match: " " + val regex = Regex("(?i)(?:^|\\s)($alternation)\\b[\\s\\p{Punct}]*([\\s\\S]+)$") + val match = regex.find(raw) ?: return null + val extracted = match.groupValues.getOrNull(2)?.trim().orEmpty() + if (extracted.isEmpty()) return null + + val cleaned = extracted.trimStart { it.isWhitespace() || it.isPunctuation() }.trim() + if (cleaned.isEmpty()) return null + return cleaned + } +} + +private fun Char.isPunctuation(): Boolean { + return when (Character.getType(this)) { + Character.CONNECTOR_PUNCTUATION.toInt(), + Character.DASH_PUNCTUATION.toInt(), + Character.START_PUNCTUATION.toInt(), + Character.END_PUNCTUATION.toInt(), + Character.INITIAL_QUOTE_PUNCTUATION.toInt(), + Character.FINAL_QUOTE_PUNCTUATION.toInt(), + Character.OTHER_PUNCTUATION.toInt(), + -> true + else -> false + } +} diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/voice/VoiceWakeManager.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/voice/VoiceWakeManager.kt new file mode 100644 index 000000000..fd9322567 --- /dev/null +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/voice/VoiceWakeManager.kt @@ -0,0 +1,173 @@ +package com.steipete.clawdis.node.voice + +import android.content.Context +import android.content.Intent +import android.os.Bundle +import android.os.Handler +import android.os.Looper +import android.speech.RecognitionListener +import android.speech.RecognizerIntent +import android.speech.SpeechRecognizer +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Job +import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.StateFlow +import kotlinx.coroutines.launch + +class VoiceWakeManager( + private val context: Context, + private val scope: CoroutineScope, + private val onCommand: suspend (String) -> Unit, +) { + private val mainHandler = Handler(Looper.getMainLooper()) + + private val _isListening = MutableStateFlow(false) + val isListening: StateFlow = _isListening + + private val _statusText = MutableStateFlow("Off") + val statusText: StateFlow = _statusText + + var triggerWords: List = emptyList() + private set + + private var recognizer: SpeechRecognizer? = null + private var restartJob: Job? = null + private var lastDispatched: String? = null + private var stopRequested = false + + fun setTriggerWords(words: List) { + triggerWords = words + } + + fun start() { + mainHandler.post { + if (_isListening.value) return@post + stopRequested = false + + if (!SpeechRecognizer.isRecognitionAvailable(context)) { + _isListening.value = false + _statusText.value = "Speech recognizer unavailable" + return@post + } + + try { + recognizer?.destroy() + recognizer = SpeechRecognizer.createSpeechRecognizer(context).also { it.setRecognitionListener(listener) } + startListeningInternal() + } catch (err: Throwable) { + _isListening.value = false + _statusText.value = "Start failed: ${err.message ?: err::class.simpleName}" + } + } + } + + fun stop(statusText: String = "Off") { + stopRequested = true + restartJob?.cancel() + restartJob = null + mainHandler.post { + _isListening.value = false + _statusText.value = statusText + recognizer?.cancel() + recognizer?.destroy() + recognizer = null + } + } + + private fun startListeningInternal() { + val r = recognizer ?: return + val intent = + Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply { + putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) + putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true) + putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3) + putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE, context.packageName) + } + + _statusText.value = "Listening" + _isListening.value = true + r.startListening(intent) + } + + private fun scheduleRestart(delayMs: Long = 350) { + if (stopRequested) return + restartJob?.cancel() + restartJob = + scope.launch { + delay(delayMs) + mainHandler.post { + if (stopRequested) return@post + try { + recognizer?.cancel() + startListeningInternal() + } catch (_: Throwable) { + // Will be picked up by onError and retry again. + } + } + } + } + + private fun handleTranscription(text: String) { + val command = VoiceWakeCommandExtractor.extractCommand(text, triggerWords) ?: return + if (command == lastDispatched) return + lastDispatched = command + + scope.launch { onCommand(command) } + _statusText.value = "Triggered" + scheduleRestart(delayMs = 650) + } + + private val listener = + object : RecognitionListener { + override fun onReadyForSpeech(params: Bundle?) { + _statusText.value = "Listening" + } + + override fun onBeginningOfSpeech() {} + + override fun onRmsChanged(rmsdB: Float) {} + + override fun onBufferReceived(buffer: ByteArray?) {} + + override fun onEndOfSpeech() { + scheduleRestart() + } + + override fun onError(error: Int) { + if (stopRequested) return + _isListening.value = false + if (error == SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS) { + _statusText.value = "Microphone permission required" + return + } + + _statusText.value = + when (error) { + SpeechRecognizer.ERROR_AUDIO -> "Audio error" + SpeechRecognizer.ERROR_CLIENT -> "Client error" + SpeechRecognizer.ERROR_NETWORK -> "Network error" + SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout" + SpeechRecognizer.ERROR_NO_MATCH -> "Listening" + SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Recognizer busy" + SpeechRecognizer.ERROR_SERVER -> "Server error" + SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "Listening" + else -> "Speech error ($error)" + } + scheduleRestart(delayMs = 600) + } + + override fun onResults(results: Bundle?) { + val list = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION).orEmpty() + list.firstOrNull()?.let(::handleTranscription) + scheduleRestart() + } + + override fun onPartialResults(partialResults: Bundle?) { + val list = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION).orEmpty() + list.firstOrNull()?.let(::handleTranscription) + } + + override fun onEvent(eventType: Int, params: Bundle?) {} + } +} diff --git a/apps/android/app/src/test/java/com/steipete/clawdis/node/bridge/BridgePairingClientTest.kt b/apps/android/app/src/test/java/com/steipete/clawdis/node/bridge/BridgePairingClientTest.kt index aae427c88..8df358a8d 100644 --- a/apps/android/app/src/test/java/com/steipete/clawdis/node/bridge/BridgePairingClientTest.kt +++ b/apps/android/app/src/test/java/com/steipete/clawdis/node/bridge/BridgePairingClientTest.kt @@ -48,6 +48,7 @@ class BridgePairingClientTest { version = "test", deviceFamily = "Android", modelIdentifier = "SM-X000", + caps = null, ), ) assertTrue(res.ok) @@ -95,6 +96,7 @@ class BridgePairingClientTest { version = "test", deviceFamily = "Android", modelIdentifier = "SM-X000", + caps = null, ), ) assertTrue(res.ok) diff --git a/apps/android/app/src/test/java/com/steipete/clawdis/node/bridge/BridgeSessionTest.kt b/apps/android/app/src/test/java/com/steipete/clawdis/node/bridge/BridgeSessionTest.kt index b7d535f1a..bf8f661bd 100644 --- a/apps/android/app/src/test/java/com/steipete/clawdis/node/bridge/BridgeSessionTest.kt +++ b/apps/android/app/src/test/java/com/steipete/clawdis/node/bridge/BridgeSessionTest.kt @@ -67,6 +67,9 @@ class BridgeSessionTest { token = null, platform = "Android", version = "test", + deviceFamily = null, + modelIdentifier = null, + caps = null, ), ) @@ -129,6 +132,9 @@ class BridgeSessionTest { token = null, platform = "Android", version = "test", + deviceFamily = null, + modelIdentifier = null, + caps = null, ), ) connected.await() @@ -196,6 +202,9 @@ class BridgeSessionTest { token = null, platform = "Android", version = "test", + deviceFamily = null, + modelIdentifier = null, + caps = null, ), ) connected.await() diff --git a/apps/android/app/src/test/java/com/steipete/clawdis/node/voice/VoiceWakeCommandExtractorTest.kt b/apps/android/app/src/test/java/com/steipete/clawdis/node/voice/VoiceWakeCommandExtractorTest.kt new file mode 100644 index 000000000..b50582397 --- /dev/null +++ b/apps/android/app/src/test/java/com/steipete/clawdis/node/voice/VoiceWakeCommandExtractorTest.kt @@ -0,0 +1,26 @@ +package com.steipete.clawdis.node.voice + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertNull +import org.junit.Test + +class VoiceWakeCommandExtractorTest { + @Test + fun extractsCommandAfterTriggerWord() { + val res = VoiceWakeCommandExtractor.extractCommand("Claude take a photo", listOf("clawd", "claude")) + assertEquals("take a photo", res) + } + + @Test + fun extractsCommandWithPunctuation() { + val res = VoiceWakeCommandExtractor.extractCommand("hey clawd, what's the weather?", listOf("clawd")) + assertEquals("what's the weather?", res) + } + + @Test + fun returnsNullWhenNoCommandProvided() { + assertNull(VoiceWakeCommandExtractor.extractCommand("claude", listOf("claude"))) + assertNull(VoiceWakeCommandExtractor.extractCommand("hey claude!", listOf("claude"))) + } +} + diff --git a/apps/android/gradle.properties b/apps/android/gradle.properties index 47d0e718d..0742f09d5 100644 --- a/apps/android/gradle.properties +++ b/apps/android/gradle.properties @@ -1,4 +1,4 @@ -org.gradle.jvmargs=-Xmx3g -Dfile.encoding=UTF-8 +org.gradle.jvmargs=-Xmx3g -Dfile.encoding=UTF-8 --enable-native-access=ALL-UNNAMED +org.gradle.warning.mode=none android.useAndroidX=true android.nonTransitiveRClass=true - diff --git a/apps/android/gradlew b/apps/android/gradlew index 1aa94a426..6e5806dcc 100755 --- a/apps/android/gradlew +++ b/apps/android/gradlew @@ -200,7 +200,7 @@ fi # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m" "--enable-native-access=ALL-UNNAMED"' # Collect all arguments for the java command: # * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, diff --git a/apps/android/gradlew.bat b/apps/android/gradlew.bat index 7101f8e46..6f8e90665 100644 --- a/apps/android/gradlew.bat +++ b/apps/android/gradlew.bat @@ -34,7 +34,7 @@ set APP_HOME=%DIRNAME% for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" "--enable-native-access=ALL-UNNAMED" @rem Find java.exe if defined JAVA_HOME goto findJavaFromJavaHome