feat: add talk voice alias map

2025-12-30 11:35:29 +01:00
parent ab27586674
commit 2814815312
7 changed files with 237 additions and 35 deletions
--- a/apps/android/app/src/main/java/com/steipete/clawdis/node/voice/TalkModeManager.kt
+++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/voice/TalkModeManager.kt
@@ -61,6 +61,12 @@ class TalkModeManager(
  private val _statusText = MutableStateFlow("Off")
  val statusText: StateFlow<String> = _statusText

+  private val _lastAssistantText = MutableStateFlow<String?>(null)
+  val lastAssistantText: StateFlow<String?> = _lastAssistantText
+
+  private val _usingFallbackTts = MutableStateFlow(false)
+  val usingFallbackTts: StateFlow<Boolean> = _usingFallbackTts
+
  private var recognizer: SpeechRecognizer? = null
  private var restartJob: Job? = null
  private var stopRequested = false
@@ -79,6 +85,7 @@ class TalkModeManager(
  private var currentModelId: String? = null
  private var defaultOutputFormat: String? = null
  private var apiKey: String? = null
+  private var voiceAliases: Map<String, String> = emptyMap()
  private var interruptOnSpeech: Boolean = true
  private var voiceOverrideActive = false
  private var modelOverrideActive = false
@@ -179,6 +186,7 @@ class TalkModeManager(
    _isListening.value = false
    _statusText.value = "Off"
    stopSpeaking()
+    _usingFallbackTts.value = false
    chatSubscribedSessionKey = null

    mainHandler.post {
@@ -334,7 +342,7 @@ class TalkModeManager(
  private fun buildPrompt(transcript: String): String {
    val lines = mutableListOf(
      "Talk Mode active. Reply in a concise, spoken tone.",
-      "You may optionally prefix the response with JSON (first line) to set ElevenLabs voice, e.g. {\"voice\":\"<id>\",\"once\":true}.",
+      "You may optionally prefix the response with JSON (first line) to set ElevenLabs voice (id or alias), e.g. {\"voice\":\"<id>\",\"once\":true}.",
    )
    lastInterruptedAtSeconds?.let {
      lines.add("Assistant speech interrupted at ${"%.1f".format(it)}s.")
@@ -432,10 +440,17 @@ class TalkModeManager(
    val directive = parsed.directive
    val cleaned = parsed.stripped.trim()
    if (cleaned.isEmpty()) return
+    _lastAssistantText.value = cleaned
+
+    val requestedVoice = directive?.voiceId?.trim()?.takeIf { it.isNotEmpty() }
+    val resolvedVoice = resolveVoiceAlias(requestedVoice)
+    if (requestedVoice != null && resolvedVoice == null) {
+      Log.w(tag, "unknown voice alias: $requestedVoice")
+    }

    if (directive?.voiceId != null) {
      if (directive.once != true) {
-        currentVoiceId = directive.voiceId
+        currentVoiceId = resolvedVoice
        voiceOverrideActive = true
      }
    }
@@ -449,7 +464,7 @@ class TalkModeManager(
    val apiKey =
      apiKey?.trim()?.takeIf { it.isNotEmpty() }
        ?: System.getenv("ELEVENLABS_API_KEY")?.trim()
-    val voiceId = directive?.voiceId ?: currentVoiceId ?: defaultVoiceId
+    val voiceId = resolvedVoice ?: currentVoiceId ?: defaultVoiceId

    _statusText.value = "Speaking…"
    _isSpeaking.value = true
@@ -465,9 +480,11 @@ class TalkModeManager(
        if (apiKey.isNullOrEmpty()) {
          Log.w(tag, "missing ELEVENLABS_API_KEY; falling back to system voice")
        }
+        _usingFallbackTts.value = true
        _statusText.value = "Speaking (System)…"
        speakWithSystemTts(cleaned)
      } else {
+        _usingFallbackTts.value = false
        val ttsStarted = SystemClock.elapsedRealtime()
        val request =
          ElevenLabsRequest(
@@ -491,6 +508,7 @@ class TalkModeManager(
    } catch (err: Throwable) {
      Log.w(tag, "speak failed: ${err.message ?: err::class.simpleName}; falling back to system voice")
      try {
+        _usingFallbackTts.value = true
        _statusText.value = "Speaking (System)…"
        speakWithSystemTts(cleaned)
      } catch (fallbackErr: Throwable) {
@@ -681,6 +699,11 @@ class TalkModeManager(
      val sessionCfg = config?.get("session").asObjectOrNull()
      val mainKey = sessionCfg?.get("mainKey").asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } ?: "main"
      val voice = talk?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
+      val aliases =
+        talk?.get("voiceAliases").asObjectOrNull()?.entries?.mapNotNull { (key, value) ->
+          val id = value.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } ?: return@mapNotNull null
+          normalizeAliasKey(key).takeIf { it.isNotEmpty() }?.let { it to id }
+        }?.toMap().orEmpty()
      val model = talk?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
      val outputFormat = talk?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
      val key = talk?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
@@ -688,6 +711,7 @@ class TalkModeManager(

      mainSessionKey = mainKey
      defaultVoiceId = voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
+      voiceAliases = aliases
      if (!voiceOverrideActive) currentVoiceId = defaultVoiceId
      defaultModelId = model
      if (!modelOverrideActive) currentModelId = defaultModelId
@@ -697,6 +721,7 @@ class TalkModeManager(
    } catch (_: Throwable) {
      defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
      apiKey = envKey?.takeIf { it.isNotEmpty() }
+      voiceAliases = emptyMap()
    }
  }

@@ -842,6 +867,23 @@ class TalkModeManager(
    }
  }

+  private fun resolveVoiceAlias(value: String?): String? {
+    val trimmed = value?.trim().orEmpty()
+    if (trimmed.isEmpty()) return null
+    val normalized = normalizeAliasKey(trimmed)
+    voiceAliases[normalized]?.let { return it }
+    if (voiceAliases.values.any { it.equals(trimmed, ignoreCase = true) }) return trimmed
+    return if (isLikelyVoiceId(trimmed)) trimmed else null
+  }
+
+  private fun isLikelyVoiceId(value: String): Boolean {
+    if (value.length < 10) return false
+    return value.all { it.isLetterOrDigit() || it == '-' || it == '_' }
+  }
+
+  private fun normalizeAliasKey(value: String): String =
+    value.trim().lowercase()
+
  private val listener =
    object : RecognitionListener {
      override fun onReadyForSpeech(params: Bundle?) {
--- a/apps/ios/Sources/Voice/TalkModeManager.swift
+++ b/apps/ios/Sources/Voice/TalkModeManager.swift
@@ -33,6 +33,7 @@ final class TalkModeManager: NSObject {
    private var modelOverrideActive = false
    private var defaultOutputFormat: String?
    private var apiKey: String?
+    private var voiceAliases: [String: String] = [:]
    private var interruptOnSpeech: Bool = true
    private var mainSessionKey: String = "main"

@@ -419,7 +420,12 @@ final class TalkModeManager: NSObject {
        let cleaned = parsed.stripped.trimmingCharacters(in: .whitespacesAndNewlines)
        guard !cleaned.isEmpty else { return }

-        if let voice = directive?.voiceId {
+        let requestedVoice = directive?.voiceId?.trimmingCharacters(in: .whitespacesAndNewlines)
+        let resolvedVoice = self.resolveVoiceAlias(requestedVoice)
+        if requestedVoice?.isEmpty == false, resolvedVoice == nil {
+            self.logger.warning("unknown voice alias \(requestedVoice ?? "?", privacy: .public)")
+        }
+        if let voice = resolvedVoice {
            if directive?.once != true {
                self.currentVoiceId = voice
                self.voiceOverrideActive = true
@@ -440,8 +446,7 @@ final class TalkModeManager: NSObject {
            let started = Date()
            let language = ElevenLabsTTSClient.validatedLanguage(directive?.language)

-            let voiceId = (directive?.voiceId ?? self.currentVoiceId ?? self.defaultVoiceId)?
-                .trimmingCharacters(in: .whitespacesAndNewlines)
+            let voiceId = resolvedVoice ?? self.currentVoiceId ?? self.defaultVoiceId
            let resolvedKey =
                (self.apiKey?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? self.apiKey : nil) ??
                ProcessInfo.processInfo.environment["ELEVENLABS_API_KEY"]
@@ -565,6 +570,22 @@ final class TalkModeManager: NSObject {
        return true
    }

+    private func resolveVoiceAlias(_ value: String?) -> String? {
+        let trimmed = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !trimmed.isEmpty else { return nil }
+        let normalized = trimmed.lowercased()
+        if let mapped = self.voiceAliases[normalized] { return mapped }
+        if self.voiceAliases.values.contains(where: { $0.caseInsensitiveCompare(trimmed) == .orderedSame }) {
+            return trimmed
+        }
+        return Self.isLikelyVoiceId(trimmed) ? trimmed : nil
+    }
+
+    private static func isLikelyVoiceId(_ value: String) -> Bool {
+        guard value.count >= 10 else { return false }
+        return value.allSatisfy { $0.isLetter || $0.isNumber || $0 == "-" || $0 == "_" }
+    }
+
    private func reloadConfig() async {
        guard let bridge else { return }
        do {
@@ -576,6 +597,19 @@ final class TalkModeManager: NSObject {
            let rawMainKey = (session?["mainKey"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
            self.mainSessionKey = rawMainKey.isEmpty ? "main" : rawMainKey
            self.defaultVoiceId = (talk?["voiceId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
+            if let aliases = talk?["voiceAliases"] as? [String: Any] {
+                self.voiceAliases =
+                    aliases.compactMap { key, value in
+                        guard let id = value as? String else { return nil }
+                        let normalizedKey = key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
+                        let trimmedId = id.trimmingCharacters(in: .whitespacesAndNewlines)
+                        guard !normalizedKey.isEmpty, !trimmedId.isEmpty else { return nil }
+                        return (normalizedKey, trimmedId)
+                    }
+                    .reduce(into: [:]) { $0[$1.0] = $1.1 }
+            } else {
+                self.voiceAliases = [:]
+            }
            if !self.voiceOverrideActive {
                self.currentVoiceId = self.defaultVoiceId
            }
--- a/apps/macos/Sources/Clawdis/TalkModeRuntime.swift
+++ b/apps/macos/Sources/Clawdis/TalkModeRuntime.swift
@@ -41,6 +41,7 @@ actor TalkModeRuntime {
    private var silenceTask: Task<Void, Never>?
    private var phase: TalkModePhase = .idle
    private var isEnabled = false
+    private var isPaused = false
    private var lifecycleGeneration: Int = 0

    private var lastHeard: Date?
@@ -57,6 +58,7 @@ actor TalkModeRuntime {
    private var defaultOutputFormat: String?
    private var interruptOnSpeech: Bool = true
    private var lastInterruptedAtSeconds: Double?
+    private var voiceAliases: [String: String] = [:]
    private var lastSpokenText: String?
    private var apiKey: String?
    private var fallbackVoiceId: String?
@@ -78,6 +80,29 @@ actor TalkModeRuntime {
        }
    }

+    func setPaused(_ paused: Bool) async {
+        guard paused != self.isPaused else { return }
+        self.isPaused = paused
+        await MainActor.run { TalkModeController.shared.updateLevel(0) }
+
+        guard self.isEnabled else { return }
+
+        if paused {
+            self.lastTranscript = ""
+            self.lastHeard = nil
+            self.lastSpeechEnergyAt = nil
+            await self.stopRecognition()
+            return
+        }
+
+        if self.phase == .idle || self.phase == .listening {
+            await self.startRecognition()
+            self.phase = .listening
+            await MainActor.run { TalkModeController.shared.updatePhase(.listening) }
+            self.startSilenceMonitor()
+        }
+    }
+
    private func isCurrent(_ generation: Int) -> Bool {
        generation == self.lifecycleGeneration && self.isEnabled
    }
@@ -91,6 +116,14 @@ actor TalkModeRuntime {
        }
        await self.reloadConfig()
        guard self.isCurrent(gen) else { return }
+        if self.isPaused {
+            self.phase = .idle
+            await MainActor.run {
+                TalkModeController.shared.updateLevel(0)
+                TalkModeController.shared.updatePhase(.idle)
+            }
+            return
+        }
        await self.startRecognition()
        guard self.isCurrent(gen) else { return }
        self.phase = .listening
@@ -211,6 +244,7 @@ actor TalkModeRuntime {

    private func handleRecognition(_ update: RecognitionUpdate) async {
        guard update.generation == self.recognitionGeneration else { return }
+        guard !self.isPaused else { return }
        if let errorDescription = update.errorDescription {
            self.logger.debug("talk recognition error: \(errorDescription, privacy: .public)")
        }
@@ -256,6 +290,7 @@ actor TalkModeRuntime {
    }

    private func checkSilence() async {
+        guard !self.isPaused else { return }
        guard self.phase == .listening else { return }
        let transcript = self.lastTranscript.trimmingCharacters(in: .whitespacesAndNewlines)
        guard !transcript.isEmpty else { return }
@@ -292,11 +327,10 @@ actor TalkModeRuntime {
        guard self.isCurrent(gen) else { return }
        let prompt = self.buildPrompt(transcript: transcript)
        let activeSessionKey = await MainActor.run { WebChatManager.shared.activeSessionKey }
-        let sessionKey: String
-        if let activeSessionKey {
-            sessionKey = activeSessionKey
+        let sessionKey: String = if let activeSessionKey {
+            activeSessionKey
        } else {
-            sessionKey = await GatewayConnection.shared.mainSessionKey()
+            await GatewayConnection.shared.mainSessionKey()
        }
        let runId = UUID().uuidString
        let startedAt = Date().timeIntervalSince1970
@@ -329,17 +363,29 @@ actor TalkModeRuntime {
            self.logger.info("talk assistant text len=\(assistantText.count, privacy: .public)")
            await self.playAssistant(text: assistantText)
            guard self.isCurrent(gen) else { return }
-            await self.startListening()
-            await self.startRecognition()
+            await self.resumeListeningIfNeeded()
            return
        } catch {
            self.logger.error("talk chat.send failed: \(error.localizedDescription, privacy: .public)")
-            await self.startListening()
-            await self.startRecognition()
+            await self.resumeListeningIfNeeded()
            return
        }
    }

+    private func resumeListeningIfNeeded() async {
+        if self.isPaused {
+            self.lastTranscript = ""
+            self.lastHeard = nil
+            self.lastSpeechEnergyAt = nil
+            await MainActor.run {
+                TalkModeController.shared.updateLevel(0)
+            }
+            return
+        }
+        await self.startListening()
+        await self.startRecognition()
+    }
+
    private func buildPrompt(transcript: String) -> String {
        let interrupted = self.lastInterruptedAtSeconds
        self.lastInterruptedAtSeconds = nil
@@ -376,7 +422,7 @@ actor TalkModeRuntime {
                return TalkHistoryTimestamp.isAfter(timestamp, sinceSeconds: since)
            }
            guard let assistant else { return nil }
-            let text = assistant.content.compactMap { $0.text }.joined(separator: "\n")
+            let text = assistant.content.compactMap(\.text).joined(separator: "\n")
            let trimmed = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
            return trimmed.isEmpty ? nil : trimmed
        } catch {
@@ -394,10 +440,16 @@ actor TalkModeRuntime {
        guard self.isCurrent(gen) else { return }

        if !parse.unknownKeys.isEmpty {
-            self.logger.warning("talk directive ignored keys: \(parse.unknownKeys.joined(separator: ","), privacy: .public)")
+            self.logger
+                .warning("talk directive ignored keys: \(parse.unknownKeys.joined(separator: ","), privacy: .public)")
        }

-        if let voice = directive?.voiceId {
+        let requestedVoice = directive?.voiceId?.trimmingCharacters(in: .whitespacesAndNewlines)
+        let resolvedVoice = self.resolveVoiceAlias(requestedVoice)
+        if let requestedVoice, !requestedVoice.isEmpty, resolvedVoice == nil {
+            self.logger.warning("talk unknown voice alias \(requestedVoice, privacy: .public)")
+        }
+        if let voice = resolvedVoice {
            if directive?.once == true {
                self.logger.info("talk voice override (once) voiceId=\(voice, privacy: .public)")
            } else {
@@ -417,18 +469,17 @@ actor TalkModeRuntime {
        }

        let apiKey = self.apiKey?.trimmingCharacters(in: .whitespacesAndNewlines)
-        let requestedVoice =
-            directive?.voiceId ??
+        let preferredVoice =
+            resolvedVoice ??
            self.currentVoiceId ??
            self.defaultVoiceId

        let language = ElevenLabsTTSClient.validatedLanguage(directive?.language)

-        let voiceId: String?
-        if let apiKey, !apiKey.isEmpty {
-            voiceId = await self.resolveVoiceId(preferred: requestedVoice, apiKey: apiKey)
+        let voiceId: String? = if let apiKey, !apiKey.isEmpty {
+            await self.resolveVoiceId(preferred: preferredVoice, apiKey: apiKey)
        } else {
-            voiceId = nil
+            nil
        }

        if apiKey?.isEmpty != false {
@@ -436,7 +487,8 @@ actor TalkModeRuntime {
        } else if voiceId == nil {
            self.ttsLogger.warning("talk missing voiceId; falling back to system voice")
        } else if let voiceId {
-            self.ttsLogger.info("talk TTS request voiceId=\(voiceId, privacy: .public) chars=\(cleaned.count, privacy: .public)")
+            self.ttsLogger
+                .info("talk TTS request voiceId=\(voiceId, privacy: .public) chars=\(cleaned.count, privacy: .public)")
        }
        self.lastSpokenText = cleaned

@@ -447,7 +499,9 @@ actor TalkModeRuntime {
                let desiredOutputFormat = directive?.outputFormat ?? self.defaultOutputFormat
                let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(desiredOutputFormat)
                if outputFormat == nil, let desiredOutputFormat, !desiredOutputFormat.isEmpty {
-                    self.logger.warning("talk output_format unsupported for local playback: \(desiredOutputFormat, privacy: .public)")
+                    self.logger
+                        .warning(
+                            "talk output_format unsupported for local playback: \(desiredOutputFormat, privacy: .public)")
                }

                let request = ElevenLabsTTSRequest(
@@ -481,7 +535,9 @@ actor TalkModeRuntime {
                self.phase = .speaking

                let result = await TalkAudioPlayer.shared.play(data: audio)
-                self.ttsLogger.info("talk audio result finished=\(result.finished, privacy: .public) interruptedAt=\(String(describing: result.interruptedAt), privacy: .public)")
+                self.ttsLogger
+                    .info(
+                        "talk audio result finished=\(result.finished, privacy: .public) interruptedAt=\(String(describing: result.interruptedAt), privacy: .public)")
                if !result.finished, result.interruptedAt == nil {
                    throw NSError(domain: "TalkAudioPlayer", code: 1, userInfo: [
                        NSLocalizedDescriptionKey: "audio playback failed",
@@ -505,7 +561,8 @@ actor TalkModeRuntime {
                self.ttsLogger.info("talk system voice done")
            }
        } catch {
-            self.ttsLogger.error("talk TTS failed: \(error.localizedDescription, privacy: .public); falling back to system voice")
+            self.ttsLogger
+                .error("talk TTS failed: \(error.localizedDescription, privacy: .public); falling back to system voice")
            do {
                if self.interruptOnSpeech {
                    await self.startRecognition()
@@ -528,7 +585,10 @@ actor TalkModeRuntime {

    private func resolveVoiceId(preferred: String?, apiKey: String) async -> String? {
        let trimmed = preferred?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
-        if !trimmed.isEmpty { return trimmed }
+        if !trimmed.isEmpty {
+            if let resolved = self.resolveVoiceAlias(trimmed) { return resolved }
+            self.ttsLogger.warning("talk unknown voice alias \(trimmed, privacy: .public)")
+        }
        if let fallbackVoiceId { return fallbackVoiceId }

        do {
@@ -545,7 +605,8 @@ actor TalkModeRuntime {
                self.currentVoiceId = first.voiceId
            }
            let name = first.name ?? "unknown"
-            self.ttsLogger.info("talk default voice selected \(name, privacy: .public) (\(first.voiceId, privacy: .public))")
+            self.ttsLogger
+                .info("talk default voice selected \(name, privacy: .public) (\(first.voiceId, privacy: .public))")
            return first.voiceId
        } catch {
            self.ttsLogger.error("elevenlabs list voices failed: \(error.localizedDescription, privacy: .public)")
@@ -553,6 +614,22 @@ actor TalkModeRuntime {
        }
    }

+    private func resolveVoiceAlias(_ value: String?) -> String? {
+        let trimmed = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !trimmed.isEmpty else { return nil }
+        let normalized = trimmed.lowercased()
+        if let mapped = self.voiceAliases[normalized] { return mapped }
+        if self.voiceAliases.values.contains(where: { $0.caseInsensitiveCompare(trimmed) == .orderedSame }) {
+            return trimmed
+        }
+        return Self.isLikelyVoiceId(trimmed) ? trimmed : nil
+    }
+
+    private static func isLikelyVoiceId(_ value: String) -> Bool {
+        guard value.count >= 10 else { return false }
+        return value.allSatisfy { $0.isLetter || $0.isNumber || $0 == "-" || $0 == "_" }
+    }
+
    func stopSpeaking(reason: TalkStopReason) async {
        let interruptedAt = await MainActor.run { TalkAudioPlayer.shared.stop() }
        await TalkSystemSpeechSynthesizer.shared.stop()
@@ -576,6 +653,7 @@ actor TalkModeRuntime {
    private func reloadConfig() async {
        let cfg = await self.fetchTalkConfig()
        self.defaultVoiceId = cfg.voiceId
+        self.voiceAliases = cfg.voiceAliases
        if !self.voiceOverrideActive {
            self.currentVoiceId = cfg.voiceId
        }
@@ -589,11 +667,14 @@ actor TalkModeRuntime {
        let hasApiKey = (cfg.apiKey?.isEmpty == false)
        let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none"
        let modelLabel = (cfg.modelId?.isEmpty == false) ? cfg.modelId! : "none"
-        self.logger.info("talk config voiceId=\(voiceLabel, privacy: .public) modelId=\(modelLabel, privacy: .public) apiKey=\(hasApiKey, privacy: .public) interrupt=\(cfg.interruptOnSpeech, privacy: .public)")
+        self.logger
+            .info(
+                "talk config voiceId=\(voiceLabel, privacy: .public) modelId=\(modelLabel, privacy: .public) apiKey=\(hasApiKey, privacy: .public) interrupt=\(cfg.interruptOnSpeech, privacy: .public)")
    }

    private struct TalkRuntimeConfig {
        let voiceId: String?
+        let voiceAliases: [String: String]
        let modelId: String?
        let outputFormat: String?
        let interruptOnSpeech: Bool
@@ -618,6 +699,14 @@ actor TalkModeRuntime {
                AppStateStore.shared.seamColorHex = rawSeam.isEmpty ? nil : rawSeam
            }
            let voice = talk?["voiceId"]?.stringValue
+            let rawAliases = talk?["voiceAliases"]?.dictionaryValue
+            let resolvedAliases: [String: String] =
+                rawAliases?.reduce(into: [:]) { acc, entry in
+                    let key = entry.key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
+                    let value = entry.value.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
+                    guard !key.isEmpty, !value.isEmpty else { return }
+                    acc[key] = value
+                } ?? [:]
            let model = talk?["modelId"]?.stringValue
            let outputFormat = talk?["outputFormat"]?.stringValue
            let interrupt = talk?["interruptOnSpeech"]?.boolValue
@@ -631,6 +720,7 @@ actor TalkModeRuntime {
                (apiKey?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? apiKey : nil)
            return TalkRuntimeConfig(
                voiceId: resolvedVoice,
+                voiceAliases: resolvedAliases,
                modelId: model,
                outputFormat: outputFormat,
                interruptOnSpeech: interrupt ?? true,
@@ -642,6 +732,7 @@ actor TalkModeRuntime {
            let resolvedApiKey = envApiKey?.isEmpty == false ? envApiKey : nil
            return TalkRuntimeConfig(
                voiceId: resolvedVoice,
+                voiceAliases: [:],
                modelId: nil,
                outputFormat: nil,
                interruptOnSpeech: true,
@@ -652,7 +743,7 @@ actor TalkModeRuntime {
    // MARK: - Audio level handling

    private func noteAudioLevel(rms: Double) async {
-        if self.phase != .listening && self.phase != .speaking { return }
+        if self.phase != .listening, self.phase != .speaking { return }
        let alpha: Double = rms < self.noiseFloorRMS ? 0.08 : 0.01
        self.noiseFloorRMS = max(1e-7, self.noiseFloorRMS + (rms - self.noiseFloorRMS) * alpha)

@@ -731,7 +822,7 @@ actor TalkModeRuntime {

    private static func validatedSeed(_ value: Int?, logger: Logger) -> UInt32? {
        guard let value else { return nil }
-        if value < 0 || value > 4294967295 {
+        if value < 0 || value > 4_294_967_295 {
            logger.warning("talk seed out of range: \(value, privacy: .public)")
            return nil
        }
@@ -747,5 +838,4 @@ actor TalkModeRuntime {
        }
        return normalized
    }
-
 }
--- a/apps/shared/ClawdisKit/Sources/ClawdisKit/TalkPromptBuilder.swift
+++ b/apps/shared/ClawdisKit/Sources/ClawdisKit/TalkPromptBuilder.swift
@@ -2,7 +2,7 @@ public enum TalkPromptBuilder: Sendable {
    public static func build(transcript: String, interruptedAtSeconds: Double?) -> String {
        var lines: [String] = [
            "Talk Mode active. Reply in a concise, spoken tone.",
-            "You may optionally prefix the response with JSON (first line) to set ElevenLabs voice, e.g. {\"voice\":\"<id>\",\"once\":true}.",
+            "You may optionally prefix the response with JSON (first line) to set ElevenLabs voice (id or alias), e.g. {\"voice\":\"<id>\",\"once\":true}.",
        ]

        if let interruptedAtSeconds {
@@ -15,4 +15,3 @@ public enum TalkPromptBuilder: Sendable {
        return lines.joined(separator: "\n")
    }
 }
-
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -199,11 +199,16 @@ Controls inbound/outbound prefixes and timestamps.

 Defaults for Talk mode (macOS/iOS/Android). Voice IDs fall back to `ELEVENLABS_VOICE_ID` or `SAG_VOICE_ID` when unset.
 `apiKey` falls back to `ELEVENLABS_API_KEY` (or the gateway’s shell profile) when unset.
+`voiceAliases` lets Talk directives use friendly names (e.g. `"voice":"Clawd"`).

 ```json5
 {
  talk: {
    voiceId: "elevenlabs_voice_id",
+    voiceAliases: {
+      Clawd: "EXAVITQu4vr4xnSDxMaL",
+      Roger: "CwhRBWXzGAHq8TQ4Fs17"
+    },
    modelId: "eleven_v3",
    outputFormat: "mp3_44100_128",
    apiKey: "elevenlabs_api_key",
--- a/src/config/config.test.ts
+++ b/src/config/config.test.ts
@@ -221,3 +221,32 @@ describe("talk api key fallback", () => {
    });
  });
 });
+
+describe("talk.voiceAliases", () => {
+  it("accepts a string map of voice aliases", async () => {
+    vi.resetModules();
+    const { validateConfigObject } = await import("./config.js");
+    const res = validateConfigObject({
+      talk: {
+        voiceAliases: {
+          Clawd: "EXAVITQu4vr4xnSDxMaL",
+          Roger: "CwhRBWXzGAHq8TQ4Fs17",
+        },
+      },
+    });
+    expect(res.ok).toBe(true);
+  });
+
+  it("rejects non-string voice alias values", async () => {
+    vi.resetModules();
+    const { validateConfigObject } = await import("./config.js");
+    const res = validateConfigObject({
+      talk: {
+        voiceAliases: {
+          Clawd: 123,
+        },
+      },
+    });
+    expect(res.ok).toBe(false);
+  });
+});
--- a/src/config/config.ts
+++ b/src/config/config.ts
@@ -222,6 +222,8 @@ export type CanvasHostConfig = {
 export type TalkConfig = {
  /** Default ElevenLabs voice ID for Talk mode. */
  voiceId?: string;
+  /** Optional voice name -> ElevenLabs voice ID map. */
+  voiceAliases?: Record<string, string>;
  /** Default ElevenLabs model ID for Talk mode. */
  modelId?: string;
  /** Default ElevenLabs output format (e.g. mp3_44100_128). */
@@ -815,6 +817,7 @@ const ClawdisSchema = z.object({
  talk: z
    .object({
      voiceId: z.string().optional(),
+      voiceAliases: z.record(z.string(), z.string()).optional(),
      modelId: z.string().optional(),
      outputFormat: z.string().optional(),
      apiKey: z.string().optional(),