feat: share wake gate via SwabbleKit

This commit is contained in:
Peter Steinberger
2025-12-23 01:30:40 +01:00
parent cf48d297dd
commit ef35868bef
2945 changed files with 27887 additions and 122 deletions

View File

@@ -2,6 +2,7 @@ import AVFoundation
import Foundation
import OSLog
import Speech
import SwabbleKit
#if canImport(AppKit)
import AppKit
#endif
@@ -35,6 +36,7 @@ actor VoiceWakeRuntime {
private var currentConfig: RuntimeConfig?
private var listeningState: ListeningState = .idle
private var overlayToken: UUID?
private var activeTriggerEndTime: TimeInterval?
// Tunables
// Silence threshold once we've captured user speech (post-trigger).
@@ -147,9 +149,13 @@ actor VoiceWakeRuntime {
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self, generation] result, error in
guard let self else { return }
let transcript = result?.bestTranscription.formattedString
let segments = result.flatMap { result in
transcript.map { WakeWordSpeechSegments.from(transcription: result.bestTranscription, transcript: $0) }
} ?? []
let isFinal = result?.isFinal ?? false
Task { await self.handleRecognition(
transcript: transcript,
segments: segments,
isFinal: isFinal,
error: error,
config: config,
@@ -184,6 +190,7 @@ actor VoiceWakeRuntime {
self.audioEngine = nil
self.currentConfig = nil
self.listeningState = .idle
self.activeTriggerEndTime = nil
self.logger.debug("voicewake runtime stopped")
DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "stopped")
@@ -206,6 +213,7 @@ actor VoiceWakeRuntime {
private func handleRecognition(
transcript: String?,
segments: [WakeWordSegment],
isFinal: Bool,
error: Error?,
config: RuntimeConfig,
@@ -224,7 +232,11 @@ actor VoiceWakeRuntime {
if !transcript.isEmpty {
self.lastHeard = now
if self.isCapturing {
let trimmed = Self.trimmedAfterTrigger(transcript, triggers: config.triggers)
let trimmed = Self.commandAfterTrigger(
transcript: transcript,
segments: segments,
triggerEndTime: self.activeTriggerEndTime,
triggers: config.triggers)
self.capturedTranscript = trimmed
self.updateHeardBeyondTrigger(withTrimmed: trimmed)
if isFinal {
@@ -252,37 +264,27 @@ actor VoiceWakeRuntime {
if self.isCapturing { return }
if Self.matches(text: transcript, triggers: config.triggers) {
let gateConfig = WakeWordGateConfig(triggers: config.triggers)
if let match = WakeWordGate.match(transcript: transcript, segments: segments, config: gateConfig) {
if let cooldown = cooldownUntil, now < cooldown {
return
}
await self.beginCapture(transcript: transcript, config: config)
await self.beginCapture(command: match.command, triggerEndTime: match.triggerEndTime, config: config)
}
}
private static func matches(text: String, triggers: [String]) -> Bool {
guard !text.isEmpty else { return false }
let normalized = text.lowercased()
for trigger in triggers {
let t = trigger.lowercased().trimmingCharacters(in: .whitespacesAndNewlines)
if t.isEmpty { continue }
if normalized.contains(t) { return true }
}
return false
}
private func beginCapture(transcript: String, config: RuntimeConfig) async {
private func beginCapture(command: String, triggerEndTime: TimeInterval, config: RuntimeConfig) async {
self.listeningState = .voiceWake
self.isCapturing = true
DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "beginCapture")
let trimmed = Self.trimmedAfterTrigger(transcript, triggers: config.triggers)
self.capturedTranscript = trimmed
self.capturedTranscript = command
self.committedTranscript = ""
self.volatileTranscript = trimmed
self.volatileTranscript = command
self.captureStartedAt = Date()
self.cooldownUntil = nil
self.heardBeyondTrigger = !trimmed.isEmpty
self.heardBeyondTrigger = !command.isEmpty
self.triggerChimePlayed = false
self.activeTriggerEndTime = triggerEndTime
if config.triggerChime != .none, !self.triggerChimePlayed {
self.triggerChimePlayed = true
@@ -354,6 +356,7 @@ actor VoiceWakeRuntime {
self.lastHeard = nil
self.heardBeyondTrigger = false
self.triggerChimePlayed = false
self.activeTriggerEndTime = nil
await MainActor.run { AppStateStore.shared.stopVoiceEars() }
if let token = self.overlayToken {
@@ -467,6 +470,22 @@ actor VoiceWakeRuntime {
return text
}
private static func commandAfterTrigger(
transcript: String,
segments: [WakeWordSegment],
triggerEndTime: TimeInterval?,
triggers: [String]) -> String
{
guard let triggerEndTime else {
return trimmedAfterTrigger(transcript, triggers: triggers)
}
let trimmed = WakeWordGate.commandText(
transcript: transcript,
segments: segments,
triggerEndTime: triggerEndTime)
return trimmed.isEmpty ? trimmedAfterTrigger(transcript, triggers: triggers) : trimmed
}
#if DEBUG
static func _testTrimmedAfterTrigger(_ text: String, triggers: [String]) -> String {
self.trimmedAfterTrigger(text, triggers: triggers)
@@ -481,9 +500,6 @@ actor VoiceWakeRuntime {
.attribute(.foregroundColor, at: 0, effectiveRange: nil) as? NSColor ?? .clear
}
static func _testMatches(text: String, triggers: [String]) -> Bool {
self.matches(text: text, triggers: triggers)
}
#endif
private static func delta(after committed: String, current: String) -> String {

View File

@@ -2,6 +2,7 @@ import AVFoundation
import Foundation
import OSLog
import Speech
import SwabbleKit
enum VoiceWakeTestState: Equatable {
case idle
@@ -93,14 +94,16 @@ final class VoiceWakeTester {
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self] result, error in
guard let self, !self.isStopping else { return }
let text = result?.bestTranscription.formattedString ?? ""
let matched = Self.matches(text: text, triggers: triggers)
let segments = result.map { WakeWordSpeechSegments.from(transcription: $0.bestTranscription, transcript: text) } ?? []
let gateConfig = WakeWordGateConfig(triggers: triggers)
let match = WakeWordGate.match(transcript: text, segments: segments, config: gateConfig)
let isFinal = result?.isFinal ?? false
let errorMessage = error?.localizedDescription
Task { [weak self] in
guard let self, !self.isStopping else { return }
await self.handleResult(
matched: matched,
match: match,
text: text,
isFinal: isFinal,
errorMessage: errorMessage,
@@ -120,7 +123,7 @@ final class VoiceWakeTester {
}
private func handleResult(
matched: Bool,
match: WakeWordGateMatch?,
text: String,
isFinal: Bool,
errorMessage: String?,
@@ -129,15 +132,15 @@ final class VoiceWakeTester {
if !text.isEmpty {
self.lastHeard = Date()
}
if matched, !text.isEmpty {
if let match, !match.command.isEmpty {
self.holdingAfterDetect = true
self.detectedText = text
self.logger.info("voice wake detected; forwarding (len=\(text.count))")
self.detectedText = match.command
self.logger.info("voice wake detected; forwarding (len=\(match.command.count))")
await MainActor.run { AppStateStore.shared.triggerVoiceEars(ttl: nil) }
Task.detached {
await VoiceWakeForwarder.forward(transcript: text)
await VoiceWakeForwarder.forward(transcript: match.command)
}
Task { @MainActor in onUpdate(.detected(text)) }
Task { @MainActor in onUpdate(.detected(match.command)) }
self.holdUntilSilence(onUpdate: onUpdate)
return
}
@@ -187,15 +190,6 @@ final class VoiceWakeTester {
_ = preferredMicID
}
private static func matches(text: String, triggers: [String]) -> Bool {
let lowered = text.lowercased()
return triggers.contains { lowered.contains($0.lowercased()) }
}
static func _testMatches(text: String, triggers: [String]) -> Bool {
self.matches(text: text, triggers: triggers)
}
private nonisolated static func ensurePermissions() async throws -> Bool {
let speechStatus = SFSpeechRecognizer.authorizationStatus()
if speechStatus == .notDetermined {