Mac: stabilize voice wake test flow

Why: voice wake tests often delivered partial/final transcripts without reliable word timings, so trigger matching failed, timeouts overwrote detections, and test runs/mic capture kept running after UI changes. What: add text-only/prefix fallback and silence-based detection in the test flow, stop/clean up any prior test, cancel timeout on detection/stop, and tear down meter/test when the Voice Wake tab is inactive. Runtime detection now falls back on final text-only matches when timing is missing. UI state now reflects finalizing and prevents hanging tests.
2026-01-07 14:48:37 -07:00
parent 2140caaf67
commit 0f1a262ae1
5 changed files with 393 additions and 18 deletions
--- a/apps/macos/Sources/Clawdbot/VoiceWakeRuntime.swift
+++ b/apps/macos/Sources/Clawdbot/VoiceWakeRuntime.swift
@@ -128,6 +128,7 @@ actor VoiceWakeRuntime {

            self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
            self.recognitionRequest?.shouldReportPartialResults = true
+            self.recognitionRequest?.taskHint = .dictation
            guard let request = self.recognitionRequest else { return }

            // Lazily create the engine here so app launch doesn't grab audio resources / trigger Bluetooth HFP.
@@ -217,6 +218,7 @@ actor VoiceWakeRuntime {
    private func configureSession(localeID: String?) {
        let locale = localeID.flatMap { Locale(identifier: $0) } ?? Locale(identifier: Locale.current.identifier)
        self.recognizer = SFSpeechRecognizer(locale: locale)
+        self.recognizer?.defaultTaskHint = .dictation
    }

    private func handleRecognition(_ update: RecognitionUpdate, config: RuntimeConfig) async {
@@ -271,10 +273,21 @@ actor VoiceWakeRuntime {
                return
            }
            await self.beginCapture(command: match.command, triggerEndTime: match.triggerEndTime, config: config)
+        } else if update.isFinal {
+            let trimmed = Self.trimmedAfterTrigger(transcript, triggers: config.triggers)
+            if WakeWordGate.matchesTextOnly(text: transcript, triggers: config.triggers),
+               Self.startsWithTrigger(transcript: transcript, triggers: config.triggers),
+               !trimmed.isEmpty
+            {
+                if let cooldown = cooldownUntil, now < cooldown {
+                    return
+                }
+                await self.beginCapture(command: trimmed, triggerEndTime: nil, config: config)
+            }
        }
    }

-    private func beginCapture(command: String, triggerEndTime: TimeInterval, config: RuntimeConfig) async {
+    private func beginCapture(command: String, triggerEndTime: TimeInterval?, config: RuntimeConfig) async {
        self.listeningState = .voiceWake
        self.isCapturing = true
        DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "beginCapture")
@@ -472,6 +485,34 @@ actor VoiceWakeRuntime {
        return text
    }

+    private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool {
+        let tokens = transcript
+            .split(whereSeparator: { $0.isWhitespace })
+            .map { normalizeToken(String($0)) }
+            .filter { !$0.isEmpty }
+        guard !tokens.isEmpty else { return false }
+        for trigger in triggers {
+            let triggerTokens = trigger
+                .split(whereSeparator: { $0.isWhitespace })
+                .map { normalizeToken(String($0)) }
+                .filter { !$0.isEmpty }
+            guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue }
+            if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) {
+                return true
+            }
+        }
+        return false
+    }
+
+    private static func normalizeToken(_ token: String) -> String {
+        token
+            .trimmingCharacters(in: Self.whitespaceAndPunctuation)
+            .lowercased()
+    }
+
+    private static let whitespaceAndPunctuation = CharacterSet.whitespacesAndNewlines
+        .union(.punctuationCharacters)
+
    private static func commandAfterTrigger(
        transcript: String,
        segments: [WakeWordSegment],