Mac: stabilize voice wake test flow
Why: voice wake tests often delivered partial/final transcripts without reliable word timings, so trigger matching failed, timeouts overwrote detections, and test runs/mic capture kept running after UI changes. What: add text-only/prefix fallback and silence-based detection in the test flow, stop/clean up any prior test, cancel timeout on detection/stop, and tear down meter/test when the Voice Wake tab is inactive. Runtime detection now falls back on final text-only matches when timing is missing. UI state now reflects finalizing and prevents hanging tests.
This commit is contained in:
committed by
Peter Steinberger
parent
2140caaf67
commit
0f1a262ae1
@@ -31,7 +31,7 @@ struct SettingsRootView: View {
|
||||
.tabItem { Label("Connections", systemImage: "link") }
|
||||
.tag(SettingsTab.connections)
|
||||
|
||||
VoiceWakeSettings(state: self.state)
|
||||
VoiceWakeSettings(state: self.state, isActive: self.selectedTab == .voiceWake)
|
||||
.tabItem { Label("Voice Wake", systemImage: "waveform.circle") }
|
||||
.tag(SettingsTab.voiceWake)
|
||||
|
||||
|
||||
@@ -128,6 +128,7 @@ actor VoiceWakeRuntime {
|
||||
|
||||
self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
|
||||
self.recognitionRequest?.shouldReportPartialResults = true
|
||||
self.recognitionRequest?.taskHint = .dictation
|
||||
guard let request = self.recognitionRequest else { return }
|
||||
|
||||
// Lazily create the engine here so app launch doesn't grab audio resources / trigger Bluetooth HFP.
|
||||
@@ -217,6 +218,7 @@ actor VoiceWakeRuntime {
|
||||
private func configureSession(localeID: String?) {
|
||||
let locale = localeID.flatMap { Locale(identifier: $0) } ?? Locale(identifier: Locale.current.identifier)
|
||||
self.recognizer = SFSpeechRecognizer(locale: locale)
|
||||
self.recognizer?.defaultTaskHint = .dictation
|
||||
}
|
||||
|
||||
private func handleRecognition(_ update: RecognitionUpdate, config: RuntimeConfig) async {
|
||||
@@ -271,10 +273,21 @@ actor VoiceWakeRuntime {
|
||||
return
|
||||
}
|
||||
await self.beginCapture(command: match.command, triggerEndTime: match.triggerEndTime, config: config)
|
||||
} else if update.isFinal {
|
||||
let trimmed = Self.trimmedAfterTrigger(transcript, triggers: config.triggers)
|
||||
if WakeWordGate.matchesTextOnly(text: transcript, triggers: config.triggers),
|
||||
Self.startsWithTrigger(transcript: transcript, triggers: config.triggers),
|
||||
!trimmed.isEmpty
|
||||
{
|
||||
if let cooldown = cooldownUntil, now < cooldown {
|
||||
return
|
||||
}
|
||||
await self.beginCapture(command: trimmed, triggerEndTime: nil, config: config)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func beginCapture(command: String, triggerEndTime: TimeInterval, config: RuntimeConfig) async {
|
||||
private func beginCapture(command: String, triggerEndTime: TimeInterval?, config: RuntimeConfig) async {
|
||||
self.listeningState = .voiceWake
|
||||
self.isCapturing = true
|
||||
DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "beginCapture")
|
||||
@@ -472,6 +485,34 @@ actor VoiceWakeRuntime {
|
||||
return text
|
||||
}
|
||||
|
||||
private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool {
|
||||
let tokens = transcript
|
||||
.split(whereSeparator: { $0.isWhitespace })
|
||||
.map { normalizeToken(String($0)) }
|
||||
.filter { !$0.isEmpty }
|
||||
guard !tokens.isEmpty else { return false }
|
||||
for trigger in triggers {
|
||||
let triggerTokens = trigger
|
||||
.split(whereSeparator: { $0.isWhitespace })
|
||||
.map { normalizeToken(String($0)) }
|
||||
.filter { !$0.isEmpty }
|
||||
guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue }
|
||||
if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
private static func normalizeToken(_ token: String) -> String {
|
||||
token
|
||||
.trimmingCharacters(in: Self.whitespaceAndPunctuation)
|
||||
.lowercased()
|
||||
}
|
||||
|
||||
private static let whitespaceAndPunctuation = CharacterSet.whitespacesAndNewlines
|
||||
.union(.punctuationCharacters)
|
||||
|
||||
private static func commandAfterTrigger(
|
||||
transcript: String,
|
||||
segments: [WakeWordSegment],
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
import AppKit
|
||||
import AVFoundation
|
||||
import Observation
|
||||
import SwabbleKit
|
||||
import Speech
|
||||
import SwiftUI
|
||||
import UniformTypeIdentifiers
|
||||
|
||||
struct VoiceWakeSettings: View {
|
||||
@Bindable var state: AppState
|
||||
let isActive: Bool
|
||||
@State private var testState: VoiceWakeTestState = .idle
|
||||
@State private var tester = VoiceWakeTester()
|
||||
@State private var isTesting = false
|
||||
@State private var testTimeoutTask: Task<Void, Never>?
|
||||
@State private var availableMics: [AudioInputDevice] = []
|
||||
@State private var loadingMics = false
|
||||
@State private var meterLevel: Double = 0
|
||||
@@ -101,8 +104,22 @@ struct VoiceWakeSettings: View {
|
||||
guard !self.isPreview else { return }
|
||||
Task { await self.restartMeter() }
|
||||
}
|
||||
.onChange(of: self.isActive) { _, active in
|
||||
guard !self.isPreview else { return }
|
||||
if !active {
|
||||
self.tester.stop()
|
||||
self.isTesting = false
|
||||
self.testState = .idle
|
||||
self.testTimeoutTask?.cancel()
|
||||
Task { await self.meter.stop() }
|
||||
}
|
||||
}
|
||||
.onDisappear {
|
||||
guard !self.isPreview else { return }
|
||||
self.tester.stop()
|
||||
self.isTesting = false
|
||||
self.testState = .idle
|
||||
self.testTimeoutTask?.cancel()
|
||||
Task { await self.meter.stop() }
|
||||
}
|
||||
}
|
||||
@@ -205,13 +222,23 @@ struct VoiceWakeSettings: View {
|
||||
return
|
||||
}
|
||||
if self.isTesting {
|
||||
self.tester.stop()
|
||||
self.tester.finalize()
|
||||
self.isTesting = false
|
||||
self.testState = .idle
|
||||
self.testState = .finalizing
|
||||
Task { @MainActor in
|
||||
try? await Task.sleep(nanoseconds: 2_000_000_000)
|
||||
if self.testState == .finalizing {
|
||||
self.tester.stop()
|
||||
self.testState = .failed("Stopped")
|
||||
}
|
||||
}
|
||||
self.testTimeoutTask?.cancel()
|
||||
return
|
||||
}
|
||||
|
||||
let triggers = self.sanitizedTriggers()
|
||||
self.tester.stop()
|
||||
self.testTimeoutTask?.cancel()
|
||||
self.isTesting = true
|
||||
self.testState = .requesting
|
||||
Task { @MainActor in
|
||||
@@ -225,18 +252,31 @@ struct VoiceWakeSettings: View {
|
||||
self.testState = newState
|
||||
if case .detected = newState { self.isTesting = false }
|
||||
if case .failed = newState { self.isTesting = false }
|
||||
if case .detected = newState { self.testTimeoutTask?.cancel() }
|
||||
if case .failed = newState { self.testTimeoutTask?.cancel() }
|
||||
}
|
||||
})
|
||||
try await Task.sleep(nanoseconds: 10 * 1_000_000_000)
|
||||
if self.isTesting {
|
||||
self.tester.stop()
|
||||
self.testState = .failed("Timeout: no trigger heard")
|
||||
self.isTesting = false
|
||||
self.testTimeoutTask?.cancel()
|
||||
self.testTimeoutTask = Task { @MainActor in
|
||||
try? await Task.sleep(nanoseconds: 10 * 1_000_000_000)
|
||||
guard !Task.isCancelled else { return }
|
||||
if self.isTesting {
|
||||
self.tester.stop()
|
||||
if case let .hearing(text) = self.testState,
|
||||
let command = Self.textOnlyCommand(from: text, triggers: triggers)
|
||||
{
|
||||
self.testState = .detected(command)
|
||||
} else {
|
||||
self.testState = .failed("Timeout: no trigger heard")
|
||||
}
|
||||
self.isTesting = false
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
self.tester.stop()
|
||||
self.testState = .failed(error.localizedDescription)
|
||||
self.isTesting = false
|
||||
self.testTimeoutTask?.cancel()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -314,6 +354,44 @@ struct VoiceWakeSettings: View {
|
||||
sanitizeVoiceWakeTriggers(self.state.swabbleTriggerWords)
|
||||
}
|
||||
|
||||
private static func textOnlyCommand(from transcript: String, triggers: [String]) -> String? {
|
||||
guard !transcript.isEmpty else { return nil }
|
||||
let normalized = normalizeToken(transcript)
|
||||
guard !normalized.isEmpty else { return nil }
|
||||
guard startsWithTrigger(transcript: transcript, triggers: triggers) else { return nil }
|
||||
guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return nil }
|
||||
let trimmed = WakeWordGate.stripWake(text: transcript, triggers: triggers)
|
||||
return trimmed.isEmpty ? nil : trimmed
|
||||
}
|
||||
|
||||
private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool {
|
||||
let tokens = transcript
|
||||
.split(whereSeparator: { $0.isWhitespace })
|
||||
.map { normalizeToken(String($0)) }
|
||||
.filter { !$0.isEmpty }
|
||||
guard !tokens.isEmpty else { return false }
|
||||
for trigger in triggers {
|
||||
let triggerTokens = trigger
|
||||
.split(whereSeparator: { $0.isWhitespace })
|
||||
.map { normalizeToken(String($0)) }
|
||||
.filter { !$0.isEmpty }
|
||||
guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue }
|
||||
if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
private static func normalizeToken(_ token: String) -> String {
|
||||
token
|
||||
.trimmingCharacters(in: Self.whitespaceAndPunctuation)
|
||||
.lowercased()
|
||||
}
|
||||
|
||||
private static let whitespaceAndPunctuation = CharacterSet.whitespacesAndNewlines
|
||||
.union(.punctuationCharacters)
|
||||
|
||||
private var micPicker: some View {
|
||||
VStack(alignment: .leading, spacing: 6) {
|
||||
HStack(alignment: .firstTextBaseline, spacing: 10) {
|
||||
@@ -506,7 +584,7 @@ struct VoiceWakeSettings: View {
|
||||
#if DEBUG
|
||||
struct VoiceWakeSettings_Previews: PreviewProvider {
|
||||
static var previews: some View {
|
||||
VoiceWakeSettings(state: .preview)
|
||||
VoiceWakeSettings(state: .preview, isActive: true)
|
||||
.frame(width: SettingsTab.windowWidth, height: SettingsTab.windowHeight)
|
||||
}
|
||||
}
|
||||
@@ -519,7 +597,7 @@ extension VoiceWakeSettings {
|
||||
state.voicePushToTalkEnabled = true
|
||||
state.swabbleTriggerWords = ["Claude", "Hey"]
|
||||
|
||||
let view = VoiceWakeSettings(state: state)
|
||||
let view = VoiceWakeSettings(state: state, isActive: true)
|
||||
view.availableMics = [AudioInputDevice(uid: "mic-1", name: "Built-in")]
|
||||
view.availableLocales = [Locale(identifier: "en_US")]
|
||||
view.meterLevel = 0.42
|
||||
|
||||
@@ -57,6 +57,9 @@ struct VoiceWakeTestCard: View {
|
||||
.symbolEffect(.pulse)
|
||||
.foregroundStyle(Color.accentColor))
|
||||
|
||||
case .finalizing:
|
||||
AnyView(ProgressView().controlSize(.small))
|
||||
|
||||
case .detected:
|
||||
AnyView(Image(systemName: "checkmark.circle.fill").foregroundStyle(.green))
|
||||
|
||||
@@ -79,6 +82,9 @@ struct VoiceWakeTestCard: View {
|
||||
case let .hearing(text):
|
||||
"Heard: \(text)"
|
||||
|
||||
case .finalizing:
|
||||
"Finalizing…"
|
||||
|
||||
case .detected:
|
||||
"Voice wake detected!"
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ enum VoiceWakeTestState: Equatable {
|
||||
case requesting
|
||||
case listening
|
||||
case hearing(String)
|
||||
case finalizing
|
||||
case detected(String)
|
||||
case failed(String)
|
||||
}
|
||||
@@ -18,8 +19,15 @@ final class VoiceWakeTester {
|
||||
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
||||
private var recognitionTask: SFSpeechRecognitionTask?
|
||||
private var isStopping = false
|
||||
private var isFinalizing = false
|
||||
private var detectionStart: Date?
|
||||
private var lastHeard: Date?
|
||||
private var lastLoggedText: String?
|
||||
private var lastLoggedAt: Date?
|
||||
private var lastTranscript: String?
|
||||
private var lastTranscriptAt: Date?
|
||||
private var silenceTask: Task<Void, Never>?
|
||||
private var currentTriggers: [String] = []
|
||||
private var holdingAfterDetect = false
|
||||
private var detectedText: String?
|
||||
private let logger = Logger(subsystem: "com.clawdbot", category: "voicewake")
|
||||
@@ -37,6 +45,17 @@ final class VoiceWakeTester {
|
||||
{
|
||||
guard self.recognitionTask == nil else { return }
|
||||
self.isStopping = false
|
||||
self.isFinalizing = false
|
||||
self.holdingAfterDetect = false
|
||||
self.detectedText = nil
|
||||
self.lastHeard = nil
|
||||
self.lastLoggedText = nil
|
||||
self.lastLoggedAt = nil
|
||||
self.lastTranscript = nil
|
||||
self.lastTranscriptAt = nil
|
||||
self.silenceTask?.cancel()
|
||||
self.silenceTask = nil
|
||||
self.currentTriggers = triggers
|
||||
let chosenLocale = localeID.flatMap { Locale(identifier: $0) } ?? Locale.current
|
||||
let recognizer = SFSpeechRecognizer(locale: chosenLocale)
|
||||
guard let recognizer, recognizer.isAvailable else {
|
||||
@@ -45,6 +64,7 @@ final class VoiceWakeTester {
|
||||
code: 1,
|
||||
userInfo: [NSLocalizedDescriptionKey: "Speech recognition unavailable"])
|
||||
}
|
||||
recognizer.defaultTaskHint = .dictation
|
||||
|
||||
guard Self.hasPrivacyStrings else {
|
||||
throw NSError(
|
||||
@@ -70,6 +90,7 @@ final class VoiceWakeTester {
|
||||
|
||||
self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
|
||||
self.recognitionRequest?.shouldReportPartialResults = true
|
||||
self.recognitionRequest?.taskHint = .dictation
|
||||
let request = self.recognitionRequest
|
||||
|
||||
let inputNode = self.audioEngine.inputNode
|
||||
@@ -96,9 +117,21 @@ final class VoiceWakeTester {
|
||||
let segments = result.map { WakeWordSpeechSegments.from(
|
||||
transcription: $0.bestTranscription,
|
||||
transcript: text) } ?? []
|
||||
let gateConfig = WakeWordGateConfig(triggers: triggers)
|
||||
let match = WakeWordGate.match(transcript: text, segments: segments, config: gateConfig)
|
||||
let isFinal = result?.isFinal ?? false
|
||||
let gateConfig = WakeWordGateConfig(triggers: triggers)
|
||||
var match = WakeWordGate.match(transcript: text, segments: segments, config: gateConfig)
|
||||
if match == nil, isFinal {
|
||||
match = self.textOnlyFallbackMatch(
|
||||
transcript: text,
|
||||
triggers: triggers,
|
||||
config: gateConfig)
|
||||
}
|
||||
self.maybeLogDebug(
|
||||
transcript: text,
|
||||
segments: segments,
|
||||
triggers: triggers,
|
||||
match: match,
|
||||
isFinal: isFinal)
|
||||
let errorMessage = error?.localizedDescription
|
||||
|
||||
Task { [weak self] in
|
||||
@@ -114,13 +147,47 @@ final class VoiceWakeTester {
|
||||
}
|
||||
|
||||
func stop() {
|
||||
self.isStopping = true
|
||||
self.stop(force: true)
|
||||
}
|
||||
|
||||
func finalize(timeout: TimeInterval = 1.5) {
|
||||
guard self.recognitionTask != nil else {
|
||||
self.stop(force: true)
|
||||
return
|
||||
}
|
||||
self.isFinalizing = true
|
||||
self.audioEngine.inputNode.removeTap(onBus: 0)
|
||||
self.recognitionRequest?.endAudio()
|
||||
self.audioEngine.stop()
|
||||
Task { [weak self] in
|
||||
guard let self else { return }
|
||||
try? await Task.sleep(nanoseconds: UInt64(timeout * 1_000_000_000))
|
||||
if !self.isStopping {
|
||||
self.stop(force: true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func stop(force: Bool) {
|
||||
if force { self.isStopping = true }
|
||||
self.isFinalizing = false
|
||||
self.audioEngine.stop()
|
||||
self.recognitionRequest?.endAudio()
|
||||
self.recognitionTask?.cancel()
|
||||
self.recognitionTask = nil
|
||||
self.recognitionRequest = nil
|
||||
self.audioEngine.inputNode.removeTap(onBus: 0)
|
||||
self.holdingAfterDetect = false
|
||||
self.detectedText = nil
|
||||
self.lastHeard = nil
|
||||
self.detectionStart = nil
|
||||
self.lastLoggedText = nil
|
||||
self.lastLoggedAt = nil
|
||||
self.lastTranscript = nil
|
||||
self.lastTranscriptAt = nil
|
||||
self.silenceTask?.cancel()
|
||||
self.silenceTask = nil
|
||||
self.currentTriggers = []
|
||||
}
|
||||
|
||||
private func handleResult(
|
||||
@@ -132,6 +199,11 @@ final class VoiceWakeTester {
|
||||
{
|
||||
if !text.isEmpty {
|
||||
self.lastHeard = Date()
|
||||
self.lastTranscript = text
|
||||
self.lastTranscriptAt = Date()
|
||||
}
|
||||
if self.holdingAfterDetect {
|
||||
return
|
||||
}
|
||||
if let match, !match.command.isEmpty {
|
||||
self.holdingAfterDetect = true
|
||||
@@ -141,17 +213,28 @@ final class VoiceWakeTester {
|
||||
Task.detached {
|
||||
await VoiceWakeForwarder.forward(transcript: match.command)
|
||||
}
|
||||
Task { @MainActor in onUpdate(.detected(match.command)) }
|
||||
self.holdUntilSilence(onUpdate: onUpdate)
|
||||
self.stop()
|
||||
await MainActor.run {
|
||||
AppStateStore.shared.stopVoiceEars()
|
||||
onUpdate(.detected(match.command))
|
||||
}
|
||||
return
|
||||
}
|
||||
if !isFinal, !text.isEmpty {
|
||||
self.scheduleSilenceCheck(
|
||||
triggers: self.currentTriggers,
|
||||
onUpdate: onUpdate)
|
||||
}
|
||||
if self.isFinalizing {
|
||||
Task { @MainActor in onUpdate(.finalizing) }
|
||||
}
|
||||
if let errorMessage {
|
||||
self.stop()
|
||||
self.stop(force: true)
|
||||
Task { @MainActor in onUpdate(.failed(errorMessage)) }
|
||||
return
|
||||
}
|
||||
if isFinal {
|
||||
self.stop()
|
||||
self.stop(force: true)
|
||||
let state: VoiceWakeTestState = text.isEmpty
|
||||
? .failed("No speech detected")
|
||||
: .failed("No trigger heard: “\(text)”")
|
||||
@@ -162,6 +245,139 @@ final class VoiceWakeTester {
|
||||
}
|
||||
}
|
||||
|
||||
private func maybeLogDebug(
|
||||
transcript: String,
|
||||
segments: [WakeWordSegment],
|
||||
triggers: [String],
|
||||
match: WakeWordGateMatch?,
|
||||
isFinal: Bool) {
|
||||
guard !transcript.isEmpty else { return }
|
||||
if transcript == self.lastLoggedText, !isFinal {
|
||||
if let last = self.lastLoggedAt, Date().timeIntervalSince(last) < 0.25 {
|
||||
return
|
||||
}
|
||||
}
|
||||
self.lastLoggedText = transcript
|
||||
self.lastLoggedAt = Date()
|
||||
|
||||
let textOnly = WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers)
|
||||
let gaps = Self.debugCandidateGaps(triggers: triggers, segments: segments)
|
||||
let segmentSummary = Self.debugSegments(segments)
|
||||
let timingCount = segments.filter { $0.start > 0 || $0.duration > 0 }.count
|
||||
let matchSummary = match.map {
|
||||
"match=true gap=\(String(format: "%.2f", $0.postGap))s cmdLen=\($0.command.count)"
|
||||
} ?? "match=false"
|
||||
|
||||
self.logger.info(
|
||||
"voicewake test transcript='\(transcript, privacy: .public)' textOnly=\(textOnly) " +
|
||||
"isFinal=\(isFinal) timing=\(timingCount)/\(segments.count) " +
|
||||
"\(matchSummary) gaps=[\(gaps, privacy: .public)] segments=[\(segmentSummary, privacy: .public)]")
|
||||
}
|
||||
|
||||
private static func debugSegments(_ segments: [WakeWordSegment]) -> String {
|
||||
segments.map { seg in
|
||||
let start = String(format: "%.2f", seg.start)
|
||||
let end = String(format: "%.2f", seg.end)
|
||||
return "\(seg.text)@\(start)-\(end)"
|
||||
}.joined(separator: ", ")
|
||||
}
|
||||
|
||||
private static func debugCandidateGaps(triggers: [String], segments: [WakeWordSegment]) -> String {
|
||||
let tokens = normalizeSegments(segments)
|
||||
guard !tokens.isEmpty else { return "" }
|
||||
let triggerTokens = normalizeTriggers(triggers)
|
||||
var gaps: [String] = []
|
||||
|
||||
for trigger in triggerTokens {
|
||||
let count = trigger.tokens.count
|
||||
guard count > 0, tokens.count > count else { continue }
|
||||
for i in 0...(tokens.count - count - 1) {
|
||||
let matched = (0..<count).allSatisfy { tokens[i + $0].normalized == trigger.tokens[$0] }
|
||||
if !matched { continue }
|
||||
let triggerEnd = tokens[i + count - 1].end
|
||||
let nextToken = tokens[i + count]
|
||||
let gap = nextToken.start - triggerEnd
|
||||
let formatted = String(format: "%.2f", gap)
|
||||
gaps.append("\(trigger.tokens.joined(separator: " ")):\(formatted)s")
|
||||
}
|
||||
}
|
||||
return gaps.joined(separator: ", ")
|
||||
}
|
||||
|
||||
private struct DebugToken {
|
||||
let normalized: String
|
||||
let start: TimeInterval
|
||||
let end: TimeInterval
|
||||
}
|
||||
|
||||
private struct DebugTriggerTokens {
|
||||
let tokens: [String]
|
||||
}
|
||||
|
||||
private static func normalizeTriggers(_ triggers: [String]) -> [DebugTriggerTokens] {
|
||||
var output: [DebugTriggerTokens] = []
|
||||
for trigger in triggers {
|
||||
let tokens = trigger
|
||||
.split(whereSeparator: { $0.isWhitespace })
|
||||
.map { normalizeToken(String($0)) }
|
||||
.filter { !$0.isEmpty }
|
||||
if tokens.isEmpty { continue }
|
||||
output.append(DebugTriggerTokens(tokens: tokens))
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
private static func normalizeSegments(_ segments: [WakeWordSegment]) -> [DebugToken] {
|
||||
segments.compactMap { segment in
|
||||
let normalized = normalizeToken(segment.text)
|
||||
guard !normalized.isEmpty else { return nil }
|
||||
return DebugToken(
|
||||
normalized: normalized,
|
||||
start: segment.start,
|
||||
end: segment.end)
|
||||
}
|
||||
}
|
||||
|
||||
private static func normalizeToken(_ token: String) -> String {
|
||||
token
|
||||
.trimmingCharacters(in: Self.whitespaceAndPunctuation)
|
||||
.lowercased()
|
||||
}
|
||||
|
||||
private static let whitespaceAndPunctuation = CharacterSet.whitespacesAndNewlines
|
||||
.union(.punctuationCharacters)
|
||||
|
||||
private func textOnlyFallbackMatch(
|
||||
transcript: String,
|
||||
triggers: [String],
|
||||
config: WakeWordGateConfig
|
||||
) -> WakeWordGateMatch? {
|
||||
guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return nil }
|
||||
guard Self.startsWithTrigger(transcript: transcript, triggers: triggers) else { return nil }
|
||||
let trimmed = WakeWordGate.stripWake(text: transcript, triggers: triggers)
|
||||
guard trimmed.count >= config.minCommandLength else { return nil }
|
||||
return WakeWordGateMatch(triggerEndTime: 0, postGap: 0, command: trimmed)
|
||||
}
|
||||
|
||||
private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool {
|
||||
let tokens = transcript
|
||||
.split(whereSeparator: { $0.isWhitespace })
|
||||
.map { normalizeToken(String($0)) }
|
||||
.filter { !$0.isEmpty }
|
||||
guard !tokens.isEmpty else { return false }
|
||||
for trigger in triggers {
|
||||
let triggerTokens = trigger
|
||||
.split(whereSeparator: { $0.isWhitespace })
|
||||
.map { normalizeToken(String($0)) }
|
||||
.filter { !$0.isEmpty }
|
||||
guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue }
|
||||
if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
private func holdUntilSilence(onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void) {
|
||||
Task { [weak self] in
|
||||
guard let self else { return }
|
||||
@@ -187,6 +403,40 @@ final class VoiceWakeTester {
|
||||
}
|
||||
}
|
||||
|
||||
private func scheduleSilenceCheck(
|
||||
triggers: [String],
|
||||
onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void
|
||||
) {
|
||||
self.silenceTask?.cancel()
|
||||
let lastSeenAt = self.lastTranscriptAt
|
||||
let lastText = self.lastTranscript
|
||||
self.silenceTask = Task { [weak self] in
|
||||
guard let self else { return }
|
||||
try? await Task.sleep(nanoseconds: UInt64(self.silenceWindow * 1_000_000_000))
|
||||
guard !Task.isCancelled else { return }
|
||||
guard !self.isStopping, !self.holdingAfterDetect else { return }
|
||||
guard let lastSeenAt, let lastText else { return }
|
||||
guard self.lastTranscriptAt == lastSeenAt, self.lastTranscript == lastText else { return }
|
||||
guard let match = self.textOnlyFallbackMatch(
|
||||
transcript: lastText,
|
||||
triggers: triggers,
|
||||
config: WakeWordGateConfig(triggers: triggers)
|
||||
) else { return }
|
||||
self.holdingAfterDetect = true
|
||||
self.detectedText = match.command
|
||||
self.logger.info("voice wake detected (silence); forwarding (len=\(match.command.count))")
|
||||
await MainActor.run { AppStateStore.shared.triggerVoiceEars(ttl: nil) }
|
||||
Task.detached {
|
||||
await VoiceWakeForwarder.forward(transcript: match.command)
|
||||
}
|
||||
self.stop()
|
||||
await MainActor.run {
|
||||
AppStateStore.shared.stopVoiceEars()
|
||||
onUpdate(.detected(match.command))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func configureSession(preferredMicID: String?) {
|
||||
_ = preferredMicID
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user