feat(macos): run live voice wake listener and animate ears

This commit is contained in:
Peter Steinberger
2025-12-07 04:52:27 +01:00
parent ca4e76b34f
commit 38abb044d0
2 changed files with 170 additions and 3 deletions

View File

@@ -26,7 +26,10 @@ final class AppState: ObservableObject {
}
@Published var swabbleEnabled: Bool {
didSet { UserDefaults.standard.set(self.swabbleEnabled, forKey: swabbleEnabledKey) }
didSet {
UserDefaults.standard.set(self.swabbleEnabled, forKey: swabbleEnabledKey)
Task { await VoiceWakeRuntime.shared.refresh(state: self) }
}
}
@Published var swabbleTriggerWords: [String] {
@@ -36,6 +39,10 @@ final class AppState: ObservableObject {
UserDefaults.standard.set(cleaned, forKey: swabbleTriggersKey)
if cleaned.count != self.swabbleTriggerWords.count {
self.swabbleTriggerWords = cleaned
return
}
if self.swabbleEnabled {
Task { await VoiceWakeRuntime.shared.refresh(state: self) }
}
}
}
@@ -52,11 +59,21 @@ final class AppState: ObservableObject {
}
@Published var voiceWakeMicID: String {
didSet { UserDefaults.standard.set(self.voiceWakeMicID, forKey: voiceWakeMicKey) }
didSet {
UserDefaults.standard.set(self.voiceWakeMicID, forKey: voiceWakeMicKey)
if self.swabbleEnabled {
Task { await VoiceWakeRuntime.shared.refresh(state: self) }
}
}
}
@Published var voiceWakeLocaleID: String {
didSet { UserDefaults.standard.set(self.voiceWakeLocaleID, forKey: voiceWakeLocaleKey) }
didSet {
UserDefaults.standard.set(self.voiceWakeLocaleID, forKey: voiceWakeLocaleKey)
if self.swabbleEnabled {
Task { await VoiceWakeRuntime.shared.refresh(state: self) }
}
}
}
@Published var voiceWakeAdditionalLocaleIDs: [String] {
@@ -116,6 +133,8 @@ final class AppState: ObservableObject {
if self.swabbleEnabled && !PermissionManager.voiceWakePermissionsGranted() {
self.swabbleEnabled = false
}
Task { await VoiceWakeRuntime.shared.refresh(state: self) }
}
func triggerVoiceEars(ttl: TimeInterval = 5) {
@@ -135,16 +154,19 @@ final class AppState: ObservableObject {
if !enabled {
self.swabbleEnabled = false
Task { await VoiceWakeRuntime.shared.refresh(state: self) }
return
}
if PermissionManager.voiceWakePermissionsGranted() {
self.swabbleEnabled = true
Task { await VoiceWakeRuntime.shared.refresh(state: self) }
return
}
let granted = await PermissionManager.ensureVoiceWakePermissions(interactive: true)
self.swabbleEnabled = granted
Task { await VoiceWakeRuntime.shared.refresh(state: self) }
}
func setWorking(_ working: Bool) {

View File

@@ -0,0 +1,145 @@
import AVFoundation
import Foundation
import OSLog
import Speech
/// Background listener that keeps the voice-wake pipeline alive outside the settings test view.
actor VoiceWakeRuntime {
static let shared = VoiceWakeRuntime()
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "voicewake.runtime")
private var recognizer: SFSpeechRecognizer?
private var audioEngine = AVAudioEngine()
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var lastHeard: Date?
private var cooldownUntil: Date?
private var currentConfig: RuntimeConfig?
struct RuntimeConfig: Equatable {
let triggers: [String]
let micID: String?
let localeID: String?
}
func refresh(state: AppState) async {
let snapshot = await MainActor.run { () -> (Bool, RuntimeConfig) in
let enabled = state.swabbleEnabled
let config = RuntimeConfig(
triggers: state.swabbleTriggerWords,
micID: state.voiceWakeMicID.isEmpty ? nil : state.voiceWakeMicID,
localeID: state.voiceWakeLocaleID.isEmpty ? nil : state.voiceWakeLocaleID)
return (enabled, config)
}
guard voiceWakeSupported, snapshot.0 else {
stop()
return
}
guard PermissionManager.voiceWakePermissionsGranted() else {
logger.debug("voicewake runtime not starting: permissions missing")
stop()
return
}
let config = snapshot.1
if config == currentConfig, recognitionTask != nil {
return
}
stop()
await start(with: config)
}
private func start(with config: RuntimeConfig) async {
do {
configureSession(localeID: config.localeID)
self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
self.recognitionRequest?.shouldReportPartialResults = true
guard let request = self.recognitionRequest else { return }
let input = audioEngine.inputNode
let format = input.outputFormat(forBus: 0)
input.removeTap(onBus: 0)
input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request] buffer, _ in
request?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
currentConfig = config
lastHeard = Date()
cooldownUntil = nil
self.recognitionTask = recognizer?.recognitionTask(with: request) { [weak self] result, error in
guard let self else { return }
let transcript = result?.bestTranscription.formattedString
Task { await self.handleRecognition(transcript: transcript, error: error, config: config) }
}
logger.info("voicewake runtime started")
} catch {
logger.error("voicewake runtime failed to start: \(error.localizedDescription, privacy: .public)")
stop()
}
}
private func stop() {
recognitionTask?.cancel()
recognitionTask = nil
recognitionRequest?.endAudio()
recognitionRequest = nil
audioEngine.inputNode.removeTap(onBus: 0)
audioEngine.stop()
currentConfig = nil
logger.debug("voicewake runtime stopped")
}
private func configureSession(localeID: String?) {
let locale = localeID.flatMap { Locale(identifier: $0) } ?? Locale(identifier: Locale.current.identifier)
self.recognizer = SFSpeechRecognizer(locale: locale)
}
private func handleRecognition(
transcript: String?,
error: Error?,
config: RuntimeConfig) async {
if let error {
logger.debug("voicewake recognition error: \(error.localizedDescription, privacy: .public)")
}
guard let transcript else { return }
if !transcript.isEmpty { lastHeard = Date() }
if Self.matches(text: transcript, triggers: config.triggers) {
let now = Date()
if let cooldown = cooldownUntil, now < cooldown {
return
}
cooldownUntil = now.addingTimeInterval(2.5)
await MainActor.run { AppStateStore.shared.triggerVoiceEars() }
let forwardConfig = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig }
if forwardConfig.enabled {
Task.detached {
await VoiceWakeForwarder.forward(transcript: transcript, config: forwardConfig)
}
}
}
}
private static func matches(text: String, triggers: [String]) -> Bool {
guard !text.isEmpty else { return false }
let normalized = text.lowercased()
for trigger in triggers {
let t = trigger.lowercased().trimmingCharacters(in: .whitespacesAndNewlines)
if t.isEmpty { continue }
if normalized.contains(t) { return true }
}
return false
}
}