feat: add talk mode across nodes
This commit is contained in:
@@ -35,6 +35,10 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
|
||||
val voiceWakeMode: StateFlow<VoiceWakeMode> = runtime.voiceWakeMode
|
||||
val voiceWakeStatusText: StateFlow<String> = runtime.voiceWakeStatusText
|
||||
val voiceWakeIsListening: StateFlow<Boolean> = runtime.voiceWakeIsListening
|
||||
val talkEnabled: StateFlow<Boolean> = runtime.talkEnabled
|
||||
val talkStatusText: StateFlow<String> = runtime.talkStatusText
|
||||
val talkIsListening: StateFlow<Boolean> = runtime.talkIsListening
|
||||
val talkIsSpeaking: StateFlow<Boolean> = runtime.talkIsSpeaking
|
||||
val manualEnabled: StateFlow<Boolean> = runtime.manualEnabled
|
||||
val manualHost: StateFlow<String> = runtime.manualHost
|
||||
val manualPort: StateFlow<Int> = runtime.manualPort
|
||||
@@ -95,6 +99,10 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
|
||||
runtime.setVoiceWakeMode(mode)
|
||||
}
|
||||
|
||||
fun setTalkEnabled(enabled: Boolean) {
|
||||
runtime.setTalkEnabled(enabled)
|
||||
}
|
||||
|
||||
fun connect(endpoint: BridgeEndpoint) {
|
||||
runtime.connect(endpoint)
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UIAction
|
||||
import com.steipete.clawdis.node.protocol.ClawdisCanvasA2UICommand
|
||||
import com.steipete.clawdis.node.protocol.ClawdisCanvasCommand
|
||||
import com.steipete.clawdis.node.protocol.ClawdisScreenCommand
|
||||
import com.steipete.clawdis.node.voice.TalkModeManager
|
||||
import com.steipete.clawdis.node.voice.VoiceWakeManager
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
@@ -84,6 +85,15 @@ class NodeRuntime(context: Context) {
|
||||
val voiceWakeStatusText: StateFlow<String>
|
||||
get() = voiceWake.statusText
|
||||
|
||||
val talkStatusText: StateFlow<String>
|
||||
get() = talkMode.statusText
|
||||
|
||||
val talkIsListening: StateFlow<Boolean>
|
||||
get() = talkMode.isListening
|
||||
|
||||
val talkIsSpeaking: StateFlow<Boolean>
|
||||
get() = talkMode.isSpeaking
|
||||
|
||||
private val discovery = BridgeDiscovery(appContext, scope = scope)
|
||||
val bridges: StateFlow<List<BridgeEndpoint>> = discovery.bridges
|
||||
val discoveryStatusText: StateFlow<String> = discovery.statusText
|
||||
@@ -133,6 +143,9 @@ class NodeRuntime(context: Context) {
|
||||
)
|
||||
|
||||
private val chat = ChatController(scope = scope, session = session, json = json)
|
||||
private val talkMode: TalkModeManager by lazy {
|
||||
TalkModeManager(context = appContext, scope = scope).also { it.attachSession(session) }
|
||||
}
|
||||
|
||||
private fun handleSessionDisconnected(message: String) {
|
||||
_statusText.value = message
|
||||
@@ -163,6 +176,7 @@ class NodeRuntime(context: Context) {
|
||||
val preventSleep: StateFlow<Boolean> = prefs.preventSleep
|
||||
val wakeWords: StateFlow<List<String>> = prefs.wakeWords
|
||||
val voiceWakeMode: StateFlow<VoiceWakeMode> = prefs.voiceWakeMode
|
||||
val talkEnabled: StateFlow<Boolean> = prefs.talkEnabled
|
||||
val manualEnabled: StateFlow<Boolean> = prefs.manualEnabled
|
||||
val manualHost: StateFlow<String> = prefs.manualHost
|
||||
val manualPort: StateFlow<Int> = prefs.manualPort
|
||||
@@ -218,6 +232,13 @@ class NodeRuntime(context: Context) {
|
||||
}
|
||||
}
|
||||
|
||||
scope.launch {
|
||||
talkEnabled.collect { enabled ->
|
||||
talkMode.setEnabled(enabled)
|
||||
externalAudioCaptureActive.value = enabled
|
||||
}
|
||||
}
|
||||
|
||||
scope.launch(Dispatchers.Default) {
|
||||
bridges.collect { list ->
|
||||
if (list.isNotEmpty()) {
|
||||
@@ -311,6 +332,10 @@ class NodeRuntime(context: Context) {
|
||||
prefs.setVoiceWakeMode(mode)
|
||||
}
|
||||
|
||||
fun setTalkEnabled(value: Boolean) {
|
||||
prefs.setTalkEnabled(value)
|
||||
}
|
||||
|
||||
fun connect(endpoint: BridgeEndpoint) {
|
||||
scope.launch {
|
||||
_statusText.value = "Connecting…"
|
||||
@@ -548,6 +573,7 @@ class NodeRuntime(context: Context) {
|
||||
return
|
||||
}
|
||||
|
||||
talkMode.handleBridgeEvent(event, payloadJson)
|
||||
chat.handleBridgeEvent(event, payloadJson)
|
||||
}
|
||||
|
||||
|
||||
@@ -73,6 +73,9 @@ class SecurePrefs(context: Context) {
|
||||
private val _voiceWakeMode = MutableStateFlow(loadVoiceWakeMode())
|
||||
val voiceWakeMode: StateFlow<VoiceWakeMode> = _voiceWakeMode
|
||||
|
||||
private val _talkEnabled = MutableStateFlow(prefs.getBoolean("talk.enabled", false))
|
||||
val talkEnabled: StateFlow<Boolean> = _talkEnabled
|
||||
|
||||
fun setLastDiscoveredStableId(value: String) {
|
||||
val trimmed = value.trim()
|
||||
prefs.edit { putString("bridge.lastDiscoveredStableId", trimmed) }
|
||||
@@ -158,6 +161,11 @@ class SecurePrefs(context: Context) {
|
||||
_voiceWakeMode.value = mode
|
||||
}
|
||||
|
||||
fun setTalkEnabled(value: Boolean) {
|
||||
prefs.edit { putBoolean("talk.enabled", value) }
|
||||
_talkEnabled.value = value
|
||||
}
|
||||
|
||||
private fun loadVoiceWakeMode(): VoiceWakeMode {
|
||||
val raw = prefs.getString(voiceWakeModeKey, null)
|
||||
val resolved = VoiceWakeMode.fromRawValue(raw)
|
||||
|
||||
@@ -62,6 +62,8 @@ fun SettingsSheet(viewModel: MainViewModel) {
|
||||
val wakeWords by viewModel.wakeWords.collectAsState()
|
||||
val voiceWakeMode by viewModel.voiceWakeMode.collectAsState()
|
||||
val voiceWakeStatusText by viewModel.voiceWakeStatusText.collectAsState()
|
||||
val talkEnabled by viewModel.talkEnabled.collectAsState()
|
||||
val talkStatusText by viewModel.talkStatusText.collectAsState()
|
||||
val isConnected by viewModel.isConnected.collectAsState()
|
||||
val manualEnabled by viewModel.manualEnabled.collectAsState()
|
||||
val manualHost by viewModel.manualHost.collectAsState()
|
||||
@@ -307,6 +309,28 @@ fun SettingsSheet(viewModel: MainViewModel) {
|
||||
|
||||
// Voice
|
||||
item { Text("Voice", style = MaterialTheme.typography.titleSmall) }
|
||||
item {
|
||||
ListItem(
|
||||
headlineContent = { Text("Talk Mode") },
|
||||
supportingContent = { Text(talkStatusText) },
|
||||
trailingContent = {
|
||||
Switch(
|
||||
checked = talkEnabled,
|
||||
onCheckedChange = { on ->
|
||||
if (on) {
|
||||
val micOk =
|
||||
ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) ==
|
||||
PackageManager.PERMISSION_GRANTED
|
||||
if (!micOk) audioPermissionLauncher.launch(Manifest.permission.RECORD_AUDIO)
|
||||
viewModel.setTalkEnabled(true)
|
||||
} else {
|
||||
viewModel.setTalkEnabled(false)
|
||||
}
|
||||
},
|
||||
)
|
||||
},
|
||||
)
|
||||
}
|
||||
item {
|
||||
val enabled = voiceWakeMode != VoiceWakeMode.Off
|
||||
ListItem(
|
||||
|
||||
@@ -0,0 +1,194 @@
|
||||
package com.steipete.clawdis.node.voice
|
||||
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonElement
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
|
||||
private val directiveJson = Json { ignoreUnknownKeys = true }
|
||||
|
||||
data class TalkDirective(
|
||||
val voiceId: String? = null,
|
||||
val modelId: String? = null,
|
||||
val speed: Double? = null,
|
||||
val rateWpm: Int? = null,
|
||||
val stability: Double? = null,
|
||||
val similarity: Double? = null,
|
||||
val style: Double? = null,
|
||||
val speakerBoost: Boolean? = null,
|
||||
val seed: Long? = null,
|
||||
val normalize: String? = null,
|
||||
val language: String? = null,
|
||||
val outputFormat: String? = null,
|
||||
val latencyTier: Int? = null,
|
||||
val once: Boolean? = null,
|
||||
)
|
||||
|
||||
data class TalkDirectiveParseResult(
|
||||
val directive: TalkDirective?,
|
||||
val stripped: String,
|
||||
val unknownKeys: List<String>,
|
||||
)
|
||||
|
||||
object TalkDirectiveParser {
|
||||
fun parse(text: String): TalkDirectiveParseResult {
|
||||
val normalized = text.replace("\r\n", "\n")
|
||||
val lines = normalized.split("\n").toMutableList()
|
||||
if (lines.isEmpty()) return TalkDirectiveParseResult(null, text, emptyList())
|
||||
|
||||
val firstNonEmpty = lines.indexOfFirst { it.trim().isNotEmpty() }
|
||||
if (firstNonEmpty == -1) return TalkDirectiveParseResult(null, text, emptyList())
|
||||
|
||||
val head = lines[firstNonEmpty].trim()
|
||||
if (!head.startsWith("{") || !head.endsWith("}")) {
|
||||
return TalkDirectiveParseResult(null, text, emptyList())
|
||||
}
|
||||
|
||||
val obj = parseJsonObject(head) ?: return TalkDirectiveParseResult(null, text, emptyList())
|
||||
|
||||
val speakerBoost =
|
||||
boolValue(obj, listOf("speaker_boost", "speakerBoost"))
|
||||
?: boolValue(obj, listOf("no_speaker_boost", "noSpeakerBoost"))?.not()
|
||||
|
||||
val directive = TalkDirective(
|
||||
voiceId = stringValue(obj, listOf("voice", "voice_id", "voiceId")),
|
||||
modelId = stringValue(obj, listOf("model", "model_id", "modelId")),
|
||||
speed = doubleValue(obj, listOf("speed")),
|
||||
rateWpm = intValue(obj, listOf("rate", "wpm")),
|
||||
stability = doubleValue(obj, listOf("stability")),
|
||||
similarity = doubleValue(obj, listOf("similarity", "similarity_boost", "similarityBoost")),
|
||||
style = doubleValue(obj, listOf("style")),
|
||||
speakerBoost = speakerBoost,
|
||||
seed = longValue(obj, listOf("seed")),
|
||||
normalize = stringValue(obj, listOf("normalize", "apply_text_normalization")),
|
||||
language = stringValue(obj, listOf("lang", "language_code", "language")),
|
||||
outputFormat = stringValue(obj, listOf("output_format", "format")),
|
||||
latencyTier = intValue(obj, listOf("latency", "latency_tier", "latencyTier")),
|
||||
once = boolValue(obj, listOf("once")),
|
||||
)
|
||||
|
||||
val hasDirective = listOf(
|
||||
directive.voiceId,
|
||||
directive.modelId,
|
||||
directive.speed,
|
||||
directive.rateWpm,
|
||||
directive.stability,
|
||||
directive.similarity,
|
||||
directive.style,
|
||||
directive.speakerBoost,
|
||||
directive.seed,
|
||||
directive.normalize,
|
||||
directive.language,
|
||||
directive.outputFormat,
|
||||
directive.latencyTier,
|
||||
directive.once,
|
||||
).any { it != null }
|
||||
|
||||
if (!hasDirective) return TalkDirectiveParseResult(null, text, emptyList())
|
||||
|
||||
val knownKeys = setOf(
|
||||
"voice", "voice_id", "voiceid",
|
||||
"model", "model_id", "modelid",
|
||||
"speed", "rate", "wpm",
|
||||
"stability", "similarity", "similarity_boost", "similarityboost",
|
||||
"style",
|
||||
"speaker_boost", "speakerboost",
|
||||
"no_speaker_boost", "nospeakerboost",
|
||||
"seed",
|
||||
"normalize", "apply_text_normalization",
|
||||
"lang", "language_code", "language",
|
||||
"output_format", "format",
|
||||
"latency", "latency_tier", "latencytier",
|
||||
"once",
|
||||
)
|
||||
val unknownKeys = obj.keys.filter { !knownKeys.contains(it.lowercase()) }.sorted()
|
||||
|
||||
lines.removeAt(firstNonEmpty)
|
||||
if (firstNonEmpty < lines.size) {
|
||||
if (lines[firstNonEmpty].trim().isEmpty()) {
|
||||
lines.removeAt(firstNonEmpty)
|
||||
}
|
||||
}
|
||||
|
||||
return TalkDirectiveParseResult(directive, lines.joinToString("\n"), unknownKeys)
|
||||
}
|
||||
|
||||
private fun parseJsonObject(line: String): JsonObject? {
|
||||
return try {
|
||||
directiveJson.parseToJsonElement(line) as? JsonObject
|
||||
} catch (_: Throwable) {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
private fun stringValue(obj: JsonObject, keys: List<String>): String? {
|
||||
for (key in keys) {
|
||||
val value = obj[key].asStringOrNull()?.trim()
|
||||
if (!value.isNullOrEmpty()) return value
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private fun doubleValue(obj: JsonObject, keys: List<String>): Double? {
|
||||
for (key in keys) {
|
||||
val value = obj[key].asDoubleOrNull()
|
||||
if (value != null) return value
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private fun intValue(obj: JsonObject, keys: List<String>): Int? {
|
||||
for (key in keys) {
|
||||
val value = obj[key].asIntOrNull()
|
||||
if (value != null) return value
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private fun longValue(obj: JsonObject, keys: List<String>): Long? {
|
||||
for (key in keys) {
|
||||
val value = obj[key].asLongOrNull()
|
||||
if (value != null) return value
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private fun boolValue(obj: JsonObject, keys: List<String>): Boolean? {
|
||||
for (key in keys) {
|
||||
val value = obj[key].asBooleanOrNull()
|
||||
if (value != null) return value
|
||||
}
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
private fun JsonElement?.asStringOrNull(): String? = (this as? JsonPrimitive)?.contentOrNull
|
||||
|
||||
private fun JsonElement?.asDoubleOrNull(): Double? {
|
||||
val primitive = this as? JsonPrimitive ?: return null
|
||||
if (primitive.isString) return primitive.content.toDoubleOrNull()
|
||||
return primitive.doubleOrNull
|
||||
}
|
||||
|
||||
private fun JsonElement?.asIntOrNull(): Int? {
|
||||
val primitive = this as? JsonPrimitive ?: return null
|
||||
if (primitive.isString) return primitive.content.toIntOrNull()
|
||||
return primitive.intOrNull
|
||||
}
|
||||
|
||||
private fun JsonElement?.asLongOrNull(): Long? {
|
||||
val primitive = this as? JsonPrimitive ?: return null
|
||||
if (primitive.isString) return primitive.content.toLongOrNull()
|
||||
return primitive.longOrNull
|
||||
}
|
||||
|
||||
private fun JsonElement?.asBooleanOrNull(): Boolean? {
|
||||
val primitive = this as? JsonPrimitive ?: return null
|
||||
if (primitive.booleanOrNull != null) return primitive.booleanOrNull
|
||||
val content = primitive.contentOrNull?.trim()?.lowercase() ?: return null
|
||||
return when (content) {
|
||||
"true", "yes", "1" -> true
|
||||
"false", "no", "0" -> false
|
||||
else -> null
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,713 @@
|
||||
package com.steipete.clawdis.node.voice
|
||||
|
||||
import android.Manifest
|
||||
import android.content.Context
|
||||
import android.content.Intent
|
||||
import android.content.pm.PackageManager
|
||||
import android.media.AudioAttributes
|
||||
import android.media.MediaPlayer
|
||||
import android.os.Bundle
|
||||
import android.os.Handler
|
||||
import android.os.Looper
|
||||
import android.os.SystemClock
|
||||
import android.speech.RecognitionListener
|
||||
import android.speech.RecognizerIntent
|
||||
import android.speech.SpeechRecognizer
|
||||
import android.util.Log
|
||||
import androidx.core.content.ContextCompat
|
||||
import com.steipete.clawdis.node.bridge.BridgeSession
|
||||
import java.io.File
|
||||
import java.net.HttpURLConnection
|
||||
import java.net.URL
|
||||
import java.util.UUID
|
||||
import kotlinx.coroutines.CompletableDeferred
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.Job
|
||||
import kotlinx.coroutines.delay
|
||||
import kotlinx.coroutines.flow.MutableStateFlow
|
||||
import kotlinx.coroutines.flow.StateFlow
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.withContext
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonArray
|
||||
import kotlinx.serialization.json.JsonElement
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
import kotlinx.serialization.json.buildJsonObject
|
||||
|
||||
class TalkModeManager(
|
||||
private val context: Context,
|
||||
private val scope: CoroutineScope,
|
||||
) {
|
||||
companion object {
|
||||
private const val tag = "TalkMode"
|
||||
}
|
||||
|
||||
private val mainHandler = Handler(Looper.getMainLooper())
|
||||
private val json = Json { ignoreUnknownKeys = true }
|
||||
|
||||
private val _isEnabled = MutableStateFlow(false)
|
||||
val isEnabled: StateFlow<Boolean> = _isEnabled
|
||||
|
||||
private val _isListening = MutableStateFlow(false)
|
||||
val isListening: StateFlow<Boolean> = _isListening
|
||||
|
||||
private val _isSpeaking = MutableStateFlow(false)
|
||||
val isSpeaking: StateFlow<Boolean> = _isSpeaking
|
||||
|
||||
private val _statusText = MutableStateFlow("Off")
|
||||
val statusText: StateFlow<String> = _statusText
|
||||
|
||||
private var recognizer: SpeechRecognizer? = null
|
||||
private var restartJob: Job? = null
|
||||
private var stopRequested = false
|
||||
private var listeningMode = false
|
||||
|
||||
private var silenceJob: Job? = null
|
||||
private val silenceWindowMs = 700L
|
||||
private var lastTranscript: String = ""
|
||||
private var lastHeardAtMs: Long? = null
|
||||
private var lastSpokenText: String? = null
|
||||
private var lastInterruptedAtSeconds: Double? = null
|
||||
|
||||
private var defaultVoiceId: String? = null
|
||||
private var currentVoiceId: String? = null
|
||||
private var defaultModelId: String? = null
|
||||
private var currentModelId: String? = null
|
||||
private var defaultOutputFormat: String? = null
|
||||
private var interruptOnSpeech: Boolean = true
|
||||
private var voiceOverrideActive = false
|
||||
private var modelOverrideActive = false
|
||||
|
||||
private var session: BridgeSession? = null
|
||||
private var pendingRunId: String? = null
|
||||
private var pendingFinal: CompletableDeferred<Boolean>? = null
|
||||
|
||||
private var player: MediaPlayer? = null
|
||||
private var currentAudioFile: File? = null
|
||||
|
||||
fun attachSession(session: BridgeSession) {
|
||||
this.session = session
|
||||
}
|
||||
|
||||
fun setEnabled(enabled: Boolean) {
|
||||
if (_isEnabled.value == enabled) return
|
||||
_isEnabled.value = enabled
|
||||
if (enabled) {
|
||||
start()
|
||||
} else {
|
||||
stop()
|
||||
}
|
||||
}
|
||||
|
||||
fun handleBridgeEvent(event: String, payloadJson: String?) {
|
||||
if (event != "chat") return
|
||||
if (payloadJson.isNullOrBlank()) return
|
||||
val pending = pendingRunId ?: return
|
||||
val obj =
|
||||
try {
|
||||
json.parseToJsonElement(payloadJson).asObjectOrNull()
|
||||
} catch (_: Throwable) {
|
||||
null
|
||||
} ?: return
|
||||
val runId = obj["runId"].asStringOrNull() ?: return
|
||||
if (runId != pending) return
|
||||
val state = obj["state"].asStringOrNull() ?: return
|
||||
if (state == "final") {
|
||||
pendingFinal?.complete(true)
|
||||
pendingFinal = null
|
||||
pendingRunId = null
|
||||
}
|
||||
}
|
||||
|
||||
private fun start() {
|
||||
mainHandler.post {
|
||||
if (_isListening.value) return@post
|
||||
stopRequested = false
|
||||
listeningMode = true
|
||||
|
||||
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
|
||||
_statusText.value = "Speech recognizer unavailable"
|
||||
return@post
|
||||
}
|
||||
|
||||
val micOk =
|
||||
ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) ==
|
||||
PackageManager.PERMISSION_GRANTED
|
||||
if (!micOk) {
|
||||
_statusText.value = "Microphone permission required"
|
||||
return@post
|
||||
}
|
||||
|
||||
try {
|
||||
recognizer?.destroy()
|
||||
recognizer = SpeechRecognizer.createSpeechRecognizer(context).also { it.setRecognitionListener(listener) }
|
||||
startListeningInternal(markListening = true)
|
||||
startSilenceMonitor()
|
||||
} catch (err: Throwable) {
|
||||
_statusText.value = "Start failed: ${err.message ?: err::class.simpleName}"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun stop() {
|
||||
stopRequested = true
|
||||
listeningMode = false
|
||||
restartJob?.cancel()
|
||||
restartJob = null
|
||||
silenceJob?.cancel()
|
||||
silenceJob = null
|
||||
lastTranscript = ""
|
||||
lastHeardAtMs = null
|
||||
_isListening.value = false
|
||||
_statusText.value = "Off"
|
||||
stopSpeaking()
|
||||
|
||||
mainHandler.post {
|
||||
recognizer?.cancel()
|
||||
recognizer?.destroy()
|
||||
recognizer = null
|
||||
}
|
||||
}
|
||||
|
||||
private fun startListeningInternal(markListening: Boolean) {
|
||||
val r = recognizer ?: return
|
||||
val intent =
|
||||
Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
|
||||
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
|
||||
putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
|
||||
putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3)
|
||||
putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE, context.packageName)
|
||||
}
|
||||
|
||||
if (markListening) {
|
||||
_statusText.value = "Listening"
|
||||
_isListening.value = true
|
||||
}
|
||||
r.startListening(intent)
|
||||
}
|
||||
|
||||
private fun scheduleRestart(delayMs: Long = 350) {
|
||||
if (stopRequested) return
|
||||
restartJob?.cancel()
|
||||
restartJob =
|
||||
scope.launch {
|
||||
delay(delayMs)
|
||||
mainHandler.post {
|
||||
if (stopRequested) return@post
|
||||
try {
|
||||
recognizer?.cancel()
|
||||
val shouldListen = listeningMode
|
||||
val shouldInterrupt = _isSpeaking.value && interruptOnSpeech
|
||||
if (!shouldListen && !shouldInterrupt) return@post
|
||||
startListeningInternal(markListening = shouldListen)
|
||||
} catch (_: Throwable) {
|
||||
// handled by onError
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun handleTranscript(text: String, isFinal: Boolean) {
|
||||
val trimmed = text.trim()
|
||||
if (_isSpeaking.value && interruptOnSpeech) {
|
||||
if (shouldInterrupt(trimmed)) {
|
||||
stopSpeaking()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (!_isListening.value) return
|
||||
|
||||
if (trimmed.isNotEmpty()) {
|
||||
lastTranscript = trimmed
|
||||
lastHeardAtMs = SystemClock.elapsedRealtime()
|
||||
}
|
||||
|
||||
if (isFinal) {
|
||||
lastTranscript = trimmed
|
||||
}
|
||||
}
|
||||
|
||||
private fun startSilenceMonitor() {
|
||||
silenceJob?.cancel()
|
||||
silenceJob =
|
||||
scope.launch {
|
||||
while (_isEnabled.value) {
|
||||
delay(200)
|
||||
checkSilence()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun checkSilence() {
|
||||
if (!_isListening.value) return
|
||||
val transcript = lastTranscript.trim()
|
||||
if (transcript.isEmpty()) return
|
||||
val lastHeard = lastHeardAtMs ?: return
|
||||
val elapsed = SystemClock.elapsedRealtime() - lastHeard
|
||||
if (elapsed < silenceWindowMs) return
|
||||
scope.launch { finalizeTranscript(transcript) }
|
||||
}
|
||||
|
||||
private suspend fun finalizeTranscript(transcript: String) {
|
||||
listeningMode = false
|
||||
_isListening.value = false
|
||||
_statusText.value = "Thinking…"
|
||||
lastTranscript = ""
|
||||
lastHeardAtMs = null
|
||||
|
||||
reloadConfig()
|
||||
val prompt = buildPrompt(transcript)
|
||||
val bridge = session
|
||||
if (bridge == null) {
|
||||
_statusText.value = "Bridge not connected"
|
||||
start()
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
val runId = sendChat(prompt, bridge)
|
||||
val ok = waitForChatFinal(runId)
|
||||
if (!ok) {
|
||||
_statusText.value = "No reply"
|
||||
start()
|
||||
return
|
||||
}
|
||||
val assistant = fetchLatestAssistantText(bridge)
|
||||
if (assistant.isNullOrBlank()) {
|
||||
_statusText.value = "No reply"
|
||||
start()
|
||||
return
|
||||
}
|
||||
playAssistant(assistant)
|
||||
} catch (err: Throwable) {
|
||||
_statusText.value = "Talk failed: ${err.message ?: err::class.simpleName}"
|
||||
}
|
||||
|
||||
if (_isEnabled.value) {
|
||||
start()
|
||||
}
|
||||
}
|
||||
|
||||
private fun buildPrompt(transcript: String): String {
|
||||
val lines = mutableListOf(
|
||||
"Talk Mode active. Reply in a concise, spoken tone.",
|
||||
"You may optionally prefix the response with JSON (first line) to set ElevenLabs voice, e.g. {\"voice\":\"<id>\",\"once\":true}.",
|
||||
)
|
||||
lastInterruptedAtSeconds?.let {
|
||||
lines.add("Assistant speech interrupted at ${"%.1f".format(it)}s.")
|
||||
lastInterruptedAtSeconds = null
|
||||
}
|
||||
lines.add("")
|
||||
lines.add(transcript)
|
||||
return lines.joinToString("\n")
|
||||
}
|
||||
|
||||
private suspend fun sendChat(message: String, bridge: BridgeSession): String {
|
||||
val runId = UUID.randomUUID().toString()
|
||||
val params =
|
||||
buildJsonObject {
|
||||
put("sessionKey", JsonPrimitive("main"))
|
||||
put("message", JsonPrimitive(message))
|
||||
put("thinking", JsonPrimitive("low"))
|
||||
put("timeoutMs", JsonPrimitive(30_000))
|
||||
put("idempotencyKey", JsonPrimitive(runId))
|
||||
}
|
||||
val res = bridge.request("chat.send", params.toString())
|
||||
val parsed = parseRunId(res) ?: runId
|
||||
if (parsed != runId) {
|
||||
pendingRunId = parsed
|
||||
}
|
||||
return parsed
|
||||
}
|
||||
|
||||
private suspend fun waitForChatFinal(runId: String): Boolean {
|
||||
pendingFinal?.cancel()
|
||||
val deferred = CompletableDeferred<Boolean>()
|
||||
pendingRunId = runId
|
||||
pendingFinal = deferred
|
||||
|
||||
val result =
|
||||
withContext(Dispatchers.IO) {
|
||||
try {
|
||||
kotlinx.coroutines.withTimeout(120_000) { deferred.await() }
|
||||
} catch (_: Throwable) {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
if (!result) {
|
||||
pendingFinal = null
|
||||
pendingRunId = null
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
private suspend fun fetchLatestAssistantText(bridge: BridgeSession): String? {
|
||||
val res = bridge.request("chat.history", "{\"sessionKey\":\"main\"}")
|
||||
val root = json.parseToJsonElement(res).asObjectOrNull() ?: return null
|
||||
val messages = root["messages"] as? JsonArray ?: return null
|
||||
for (item in messages.reversed()) {
|
||||
val obj = item.asObjectOrNull() ?: continue
|
||||
if (obj["role"].asStringOrNull() != "assistant") continue
|
||||
val content = obj["content"] as? JsonArray ?: continue
|
||||
val text =
|
||||
content.mapNotNull { entry ->
|
||||
entry.asObjectOrNull()?.get("text")?.asStringOrNull()?.trim()
|
||||
}.filter { it.isNotEmpty() }
|
||||
if (text.isNotEmpty()) return text.joinToString("\n")
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private suspend fun playAssistant(text: String) {
|
||||
val parsed = TalkDirectiveParser.parse(text)
|
||||
if (parsed.unknownKeys.isNotEmpty()) {
|
||||
Log.w(tag, "Unknown talk directive keys: ${parsed.unknownKeys}")
|
||||
}
|
||||
val directive = parsed.directive
|
||||
val cleaned = parsed.stripped.trim()
|
||||
if (cleaned.isEmpty()) return
|
||||
|
||||
if (directive?.voiceId != null) {
|
||||
if (directive.once != true) {
|
||||
currentVoiceId = directive.voiceId
|
||||
voiceOverrideActive = true
|
||||
}
|
||||
}
|
||||
if (directive?.modelId != null) {
|
||||
if (directive.once != true) {
|
||||
currentModelId = directive.modelId
|
||||
modelOverrideActive = true
|
||||
}
|
||||
}
|
||||
|
||||
val voiceId = directive?.voiceId ?: currentVoiceId ?: defaultVoiceId
|
||||
if (voiceId.isNullOrBlank()) {
|
||||
_statusText.value = "Missing voice ID"
|
||||
return
|
||||
}
|
||||
|
||||
val apiKey = System.getenv("ELEVENLABS_API_KEY")?.trim()
|
||||
if (apiKey.isNullOrEmpty()) {
|
||||
_statusText.value = "Missing ELEVENLABS_API_KEY"
|
||||
return
|
||||
}
|
||||
|
||||
_statusText.value = "Speaking…"
|
||||
_isSpeaking.value = true
|
||||
lastSpokenText = cleaned
|
||||
ensureInterruptListener()
|
||||
|
||||
try {
|
||||
val request =
|
||||
ElevenLabsRequest(
|
||||
text = cleaned,
|
||||
modelId = directive?.modelId ?: currentModelId ?: defaultModelId,
|
||||
outputFormat = directive?.outputFormat ?: defaultOutputFormat,
|
||||
speed = TalkModeRuntime.resolveSpeed(directive?.speed, directive?.rateWpm),
|
||||
stability = TalkModeRuntime.validatedUnit(directive?.stability),
|
||||
similarity = TalkModeRuntime.validatedUnit(directive?.similarity),
|
||||
style = TalkModeRuntime.validatedUnit(directive?.style),
|
||||
speakerBoost = directive?.speakerBoost,
|
||||
seed = TalkModeRuntime.validatedSeed(directive?.seed),
|
||||
normalize = TalkModeRuntime.validatedNormalize(directive?.normalize),
|
||||
language = TalkModeRuntime.validatedLanguage(directive?.language),
|
||||
)
|
||||
val audio = synthesize(voiceId = voiceId, apiKey = apiKey, request = request)
|
||||
playAudio(audio)
|
||||
} catch (err: Throwable) {
|
||||
_statusText.value = "Speak failed: ${err.message ?: err::class.simpleName}"
|
||||
}
|
||||
|
||||
_isSpeaking.value = false
|
||||
}
|
||||
|
||||
private suspend fun playAudio(data: ByteArray) {
|
||||
stopSpeaking(resetInterrupt = false)
|
||||
val file = File.createTempFile("talk-", ".mp3", context.cacheDir)
|
||||
file.writeBytes(data)
|
||||
currentAudioFile = file
|
||||
|
||||
val player = MediaPlayer()
|
||||
this.player = player
|
||||
|
||||
val finished = CompletableDeferred<Unit>()
|
||||
player.setAudioAttributes(
|
||||
AudioAttributes.Builder()
|
||||
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||
.setUsage(AudioAttributes.USAGE_ASSISTANT)
|
||||
.build(),
|
||||
)
|
||||
player.setOnCompletionListener {
|
||||
finished.complete(Unit)
|
||||
}
|
||||
player.setOnErrorListener { _, _, _ ->
|
||||
finished.completeExceptionally(IllegalStateException("MediaPlayer error"))
|
||||
true
|
||||
}
|
||||
|
||||
player.setDataSource(file.absolutePath)
|
||||
withContext(Dispatchers.Main) {
|
||||
player.setOnPreparedListener { it.start() }
|
||||
player.prepareAsync()
|
||||
}
|
||||
|
||||
try {
|
||||
finished.await()
|
||||
} finally {
|
||||
cleanupPlayer()
|
||||
}
|
||||
}
|
||||
|
||||
private fun stopSpeaking(resetInterrupt: Boolean = true) {
|
||||
if (!_isSpeaking.value) {
|
||||
cleanupPlayer()
|
||||
return
|
||||
}
|
||||
if (resetInterrupt) {
|
||||
val currentMs = player?.currentPosition?.toDouble() ?: 0.0
|
||||
lastInterruptedAtSeconds = currentMs / 1000.0
|
||||
}
|
||||
cleanupPlayer()
|
||||
_isSpeaking.value = false
|
||||
}
|
||||
|
||||
private fun cleanupPlayer() {
|
||||
player?.stop()
|
||||
player?.release()
|
||||
player = null
|
||||
currentAudioFile?.delete()
|
||||
currentAudioFile = null
|
||||
}
|
||||
|
||||
private fun shouldInterrupt(transcript: String): Boolean {
|
||||
val trimmed = transcript.trim()
|
||||
if (trimmed.length < 3) return false
|
||||
val spoken = lastSpokenText?.lowercase()
|
||||
if (spoken != null && spoken.contains(trimmed.lowercase())) return false
|
||||
return true
|
||||
}
|
||||
|
||||
private suspend fun reloadConfig() {
|
||||
val bridge = session ?: return
|
||||
val envVoice = System.getenv("ELEVENLABS_VOICE_ID")?.trim()
|
||||
val sagVoice = System.getenv("SAG_VOICE_ID")?.trim()
|
||||
try {
|
||||
val res = bridge.request("config.get", "{}")
|
||||
val root = json.parseToJsonElement(res).asObjectOrNull()
|
||||
val config = root?.get("config").asObjectOrNull()
|
||||
val talk = config?.get("talk").asObjectOrNull()
|
||||
val voice = talk?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val model = talk?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val outputFormat = talk?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
|
||||
|
||||
defaultVoiceId = voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
|
||||
if (!voiceOverrideActive) currentVoiceId = defaultVoiceId
|
||||
defaultModelId = model
|
||||
if (!modelOverrideActive) currentModelId = defaultModelId
|
||||
defaultOutputFormat = outputFormat
|
||||
if (interrupt != null) interruptOnSpeech = interrupt
|
||||
} catch (_: Throwable) {
|
||||
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
|
||||
}
|
||||
}
|
||||
|
||||
private fun parseRunId(jsonString: String): String? {
|
||||
val obj = json.parseToJsonElement(jsonString).asObjectOrNull() ?: return null
|
||||
return obj["runId"].asStringOrNull()
|
||||
}
|
||||
|
||||
private suspend fun synthesize(voiceId: String, apiKey: String, request: ElevenLabsRequest): ByteArray {
|
||||
return withContext(Dispatchers.IO) {
|
||||
val url = URL("https://api.elevenlabs.io/v1/text-to-speech/$voiceId")
|
||||
val conn = url.openConnection() as HttpURLConnection
|
||||
conn.requestMethod = "POST"
|
||||
conn.setRequestProperty("Content-Type", "application/json")
|
||||
conn.setRequestProperty("Accept", "audio/mpeg")
|
||||
conn.setRequestProperty("xi-api-key", apiKey)
|
||||
conn.doOutput = true
|
||||
|
||||
val payload = buildRequestPayload(request)
|
||||
conn.outputStream.use { it.write(payload.toByteArray()) }
|
||||
|
||||
val code = conn.responseCode
|
||||
val stream = if (code >= 400) conn.errorStream else conn.inputStream
|
||||
val data = stream.readBytes()
|
||||
if (code >= 400) {
|
||||
val message = String(data)
|
||||
throw IllegalStateException("ElevenLabs failed: $code $message")
|
||||
}
|
||||
data
|
||||
}
|
||||
}
|
||||
|
||||
private fun buildRequestPayload(request: ElevenLabsRequest): String {
|
||||
val voiceSettingsEntries =
|
||||
buildJsonObject {
|
||||
request.speed?.let { put("speed", JsonPrimitive(it)) }
|
||||
request.stability?.let { put("stability", JsonPrimitive(it)) }
|
||||
request.similarity?.let { put("similarity_boost", JsonPrimitive(it)) }
|
||||
request.style?.let { put("style", JsonPrimitive(it)) }
|
||||
request.speakerBoost?.let { put("use_speaker_boost", JsonPrimitive(it)) }
|
||||
}
|
||||
|
||||
val payload =
|
||||
buildJsonObject {
|
||||
put("text", JsonPrimitive(request.text))
|
||||
request.modelId?.takeIf { it.isNotEmpty() }?.let { put("model_id", JsonPrimitive(it)) }
|
||||
request.outputFormat?.takeIf { it.isNotEmpty() }?.let { put("output_format", JsonPrimitive(it)) }
|
||||
request.seed?.let { put("seed", JsonPrimitive(it)) }
|
||||
request.normalize?.let { put("apply_text_normalization", JsonPrimitive(it)) }
|
||||
request.language?.let { put("language_code", JsonPrimitive(it)) }
|
||||
if (voiceSettingsEntries.isNotEmpty()) {
|
||||
put("voice_settings", voiceSettingsEntries)
|
||||
}
|
||||
}
|
||||
|
||||
return payload.toString()
|
||||
}
|
||||
|
||||
private data class ElevenLabsRequest(
|
||||
val text: String,
|
||||
val modelId: String?,
|
||||
val outputFormat: String?,
|
||||
val speed: Double?,
|
||||
val stability: Double?,
|
||||
val similarity: Double?,
|
||||
val style: Double?,
|
||||
val speakerBoost: Boolean?,
|
||||
val seed: Long?,
|
||||
val normalize: String?,
|
||||
val language: String?,
|
||||
)
|
||||
|
||||
private object TalkModeRuntime {
|
||||
fun resolveSpeed(speed: Double?, rateWpm: Int?): Double? {
|
||||
if (rateWpm != null && rateWpm > 0) {
|
||||
val resolved = rateWpm.toDouble() / 175.0
|
||||
if (resolved <= 0.5 || resolved >= 2.0) return null
|
||||
return resolved
|
||||
}
|
||||
if (speed != null) {
|
||||
if (speed <= 0.5 || speed >= 2.0) return null
|
||||
return speed
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
fun validatedUnit(value: Double?): Double? {
|
||||
if (value == null) return null
|
||||
if (value < 0 || value > 1) return null
|
||||
return value
|
||||
}
|
||||
|
||||
fun validatedSeed(value: Long?): Long? {
|
||||
if (value == null) return null
|
||||
if (value < 0 || value > 4294967295L) return null
|
||||
return value
|
||||
}
|
||||
|
||||
fun validatedNormalize(value: String?): String? {
|
||||
val normalized = value?.trim()?.lowercase() ?: return null
|
||||
return if (normalized in listOf("auto", "on", "off")) normalized else null
|
||||
}
|
||||
|
||||
fun validatedLanguage(value: String?): String? {
|
||||
val normalized = value?.trim()?.lowercase() ?: return null
|
||||
if (normalized.length != 2) return null
|
||||
if (!normalized.all { it in 'a'..'z' }) return null
|
||||
return normalized
|
||||
}
|
||||
}
|
||||
|
||||
private fun ensureInterruptListener() {
|
||||
if (!interruptOnSpeech || !_isEnabled.value) return
|
||||
mainHandler.post {
|
||||
if (stopRequested) return@post
|
||||
if (!SpeechRecognizer.isRecognitionAvailable(context)) return@post
|
||||
try {
|
||||
if (recognizer == null) {
|
||||
recognizer = SpeechRecognizer.createSpeechRecognizer(context).also { it.setRecognitionListener(listener) }
|
||||
}
|
||||
recognizer?.cancel()
|
||||
startListeningInternal(markListening = false)
|
||||
} catch (_: Throwable) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private val listener =
|
||||
object : RecognitionListener {
|
||||
override fun onReadyForSpeech(params: Bundle?) {
|
||||
if (_isEnabled.value) {
|
||||
_statusText.value = if (_isListening.value) "Listening" else _statusText.value
|
||||
}
|
||||
}
|
||||
|
||||
override fun onBeginningOfSpeech() {}
|
||||
|
||||
override fun onRmsChanged(rmsdB: Float) {}
|
||||
|
||||
override fun onBufferReceived(buffer: ByteArray?) {}
|
||||
|
||||
override fun onEndOfSpeech() {
|
||||
scheduleRestart()
|
||||
}
|
||||
|
||||
override fun onError(error: Int) {
|
||||
if (stopRequested) return
|
||||
_isListening.value = false
|
||||
if (error == SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS) {
|
||||
_statusText.value = "Microphone permission required"
|
||||
return
|
||||
}
|
||||
|
||||
_statusText.value =
|
||||
when (error) {
|
||||
SpeechRecognizer.ERROR_AUDIO -> "Audio error"
|
||||
SpeechRecognizer.ERROR_CLIENT -> "Client error"
|
||||
SpeechRecognizer.ERROR_NETWORK -> "Network error"
|
||||
SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout"
|
||||
SpeechRecognizer.ERROR_NO_MATCH -> "Listening"
|
||||
SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Recognizer busy"
|
||||
SpeechRecognizer.ERROR_SERVER -> "Server error"
|
||||
SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "Listening"
|
||||
else -> "Speech error ($error)"
|
||||
}
|
||||
scheduleRestart(delayMs = 600)
|
||||
}
|
||||
|
||||
override fun onResults(results: Bundle?) {
|
||||
val list = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION).orEmpty()
|
||||
list.firstOrNull()?.let { handleTranscript(it, isFinal = true) }
|
||||
scheduleRestart()
|
||||
}
|
||||
|
||||
override fun onPartialResults(partialResults: Bundle?) {
|
||||
val list = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION).orEmpty()
|
||||
list.firstOrNull()?.let { handleTranscript(it, isFinal = false) }
|
||||
}
|
||||
|
||||
override fun onEvent(eventType: Int, params: Bundle?) {}
|
||||
}
|
||||
}
|
||||
|
||||
private fun JsonElement?.asObjectOrNull(): JsonObject? = this as? JsonObject
|
||||
|
||||
private fun JsonElement?.asStringOrNull(): String? = (this as? JsonPrimitive)?.contentOrNull
|
||||
|
||||
private fun JsonElement?.asBooleanOrNull(): Boolean? {
|
||||
val primitive = this as? JsonPrimitive ?: return null
|
||||
if (primitive.booleanOrNull != null) return primitive.booleanOrNull
|
||||
val content = primitive.contentOrNull?.trim()?.lowercase() ?: return null
|
||||
return when (content) {
|
||||
"true", "yes", "1" -> true
|
||||
"false", "no", "0" -> false
|
||||
else -> null
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
package com.steipete.clawdis.node.voice
|
||||
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertNull
|
||||
import org.junit.Assert.assertTrue
|
||||
import org.junit.Test
|
||||
|
||||
class TalkDirectiveParserTest {
|
||||
@Test
|
||||
fun parsesDirectiveAndStripsHeader() {
|
||||
val input = """
|
||||
{"voice":"voice-123","once":true}
|
||||
Hello from talk mode.
|
||||
""".trimIndent()
|
||||
val result = TalkDirectiveParser.parse(input)
|
||||
assertEquals("voice-123", result.directive?.voiceId)
|
||||
assertEquals(true, result.directive?.once)
|
||||
assertEquals("Hello from talk mode.", result.stripped.trim())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun ignoresUnknownKeysButReportsThem() {
|
||||
val input = """
|
||||
{"voice":"abc","foo":1,"bar":"baz"}
|
||||
Hi there.
|
||||
""".trimIndent()
|
||||
val result = TalkDirectiveParser.parse(input)
|
||||
assertEquals("abc", result.directive?.voiceId)
|
||||
assertTrue(result.unknownKeys.containsAll(listOf("bar", "foo")))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun parsesAlternateKeys() {
|
||||
val input = """
|
||||
{"model_id":"eleven_v3","similarity_boost":0.4,"no_speaker_boost":true,"rate":200}
|
||||
Speak.
|
||||
""".trimIndent()
|
||||
val result = TalkDirectiveParser.parse(input)
|
||||
assertEquals("eleven_v3", result.directive?.modelId)
|
||||
assertEquals(0.4, result.directive?.similarity)
|
||||
assertEquals(false, result.directive?.speakerBoost)
|
||||
assertEquals(200, result.directive?.rateWpm)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun returnsNullWhenNoDirectivePresent() {
|
||||
val input = """
|
||||
{}
|
||||
Hello.
|
||||
""".trimIndent()
|
||||
val result = TalkDirectiveParser.parse(input)
|
||||
assertNull(result.directive)
|
||||
assertEquals(input, result.stripped)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user