From e906b8745006acf4fabf0c52b0a9317269058318 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 7 Dec 2025 02:18:37 +0100 Subject: [PATCH] VoiceWake: keep listening until silence, gate enable on permissions --- apps/macos/Sources/Clawdis/AppState.swift | 24 ++++++++++ apps/macos/Sources/Clawdis/MenuBar.swift | 10 +++- .../Sources/Clawdis/PermissionManager.swift | 11 +++++ .../Sources/Clawdis/VoiceWakeSettings.swift | 46 ++++++++++++++++++- 4 files changed, 88 insertions(+), 3 deletions(-) diff --git a/apps/macos/Sources/Clawdis/AppState.swift b/apps/macos/Sources/Clawdis/AppState.swift index bdd70c622..b6eaee426 100644 --- a/apps/macos/Sources/Clawdis/AppState.swift +++ b/apps/macos/Sources/Clawdis/AppState.swift @@ -102,6 +102,10 @@ final class AppState: ObservableObject { self.voiceWakeForwardIdentity = UserDefaults.standard.string(forKey: voiceWakeForwardIdentityKey) ?? "" self.voiceWakeForwardCommand = UserDefaults.standard .string(forKey: voiceWakeForwardCommandKey) ?? defaultVoiceWakeForwardCommand + + if self.swabbleEnabled && !PermissionManager.voiceWakePermissionsGranted() { + self.swabbleEnabled = false + } } func triggerVoiceEars(ttl: TimeInterval = 5) { @@ -113,6 +117,26 @@ final class AppState: ObservableObject { } } + func setVoiceWakeEnabled(_ enabled: Bool) async { + guard voiceWakeSupported else { + self.swabbleEnabled = false + return + } + + if !enabled { + self.swabbleEnabled = false + return + } + + if PermissionManager.voiceWakePermissionsGranted() { + self.swabbleEnabled = true + return + } + + let granted = await PermissionManager.ensureVoiceWakePermissions(interactive: true) + self.swabbleEnabled = granted + } + func setWorking(_ working: Bool) { self.isWorking = working } diff --git a/apps/macos/Sources/Clawdis/MenuBar.swift b/apps/macos/Sources/Clawdis/MenuBar.swift index 7ff44045d..a4f95e370 100644 --- a/apps/macos/Sources/Clawdis/MenuBar.swift +++ b/apps/macos/Sources/Clawdis/MenuBar.swift @@ -55,7 +55,7 @@ private struct MenuContent: View { VStack(alignment: .leading, spacing: 8) { Toggle(isOn: self.activeBinding) { Text("Clawdis Active") } self.relayStatusRow - Toggle(isOn: self.$state.swabbleEnabled) { Text("Voice Wake") } + Toggle(isOn: self.voiceWakeBinding) { Text("Voice Wake") } .disabled(!voiceWakeSupported) .opacity(voiceWakeSupported ? 1 : 0.5) Button("Open Chat") { WebChatManager.shared.show(sessionKey: self.primarySessionKey()) } @@ -101,6 +101,14 @@ private struct MenuContent: View { Binding(get: { !self.state.isPaused }, set: { self.state.isPaused = !$0 }) } + private var voiceWakeBinding: Binding { + Binding( + get: { self.state.swabbleEnabled }, + set: { newValue in + Task { await self.state.setVoiceWakeEnabled(newValue) } + }) + } + private func primarySessionKey() -> String { // Prefer canonical main session; fall back to most recent. let storePath = SessionLoader.defaultStorePath diff --git a/apps/macos/Sources/Clawdis/PermissionManager.swift b/apps/macos/Sources/Clawdis/PermissionManager.swift index 8e8eab974..d675e2aa7 100644 --- a/apps/macos/Sources/Clawdis/PermissionManager.swift +++ b/apps/macos/Sources/Clawdis/PermissionManager.swift @@ -82,6 +82,17 @@ enum PermissionManager { return results } + static func voiceWakePermissionsGranted() -> Bool { + let mic = AVCaptureDevice.authorizationStatus(for: .audio) == .authorized + let speech = SFSpeechRecognizer.authorizationStatus() == .authorized + return mic && speech + } + + static func ensureVoiceWakePermissions(interactive: Bool) async -> Bool { + let results = await self.ensure([.microphone, .speechRecognition], interactive: interactive) + return results[.microphone] == true && results[.speechRecognition] == true + } + static func status(_ caps: [Capability] = Capability.allCases) async -> [Capability: Bool] { var results: [Capability: Bool] = [:] for cap in caps { diff --git a/apps/macos/Sources/Clawdis/VoiceWakeSettings.swift b/apps/macos/Sources/Clawdis/VoiceWakeSettings.swift index 3090f6c94..8b2e4d4d4 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeSettings.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeSettings.swift @@ -82,6 +82,10 @@ final class VoiceWakeTester { private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? private var recognitionTask: SFSpeechRecognitionTask? private var isStopping = false + private var detectionStart: Date? + private var lastHeard: Date? + private var holdingAfterDetect = false + private var detectedText: String? init(locale: Locale = .current) { self.recognizer = SFSpeechRecognizer(locale: locale) @@ -143,6 +147,9 @@ final class VoiceWakeTester { onUpdate(.listening) } + self.detectionStart = Date() + self.lastHeard = self.detectionStart + guard let request = recognitionRequest else { return } self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self] result, error in @@ -180,14 +187,19 @@ final class VoiceWakeTester { errorMessage: String?, onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void) { + if !text.isEmpty { + self.lastHeard = Date() + } if matched, !text.isEmpty { - self.stop() + self.holdingAfterDetect = true + self.detectedText = text AppStateStore.shared.triggerVoiceEars() let config = AppStateStore.shared.voiceWakeForwardConfig Task.detached { await VoiceWakeForwarder.forward(transcript: text, config: config) } onUpdate(.detected(text)) + self.holdUntilSilence(onUpdate: onUpdate) return } if let errorMessage { @@ -203,6 +215,28 @@ final class VoiceWakeTester { } } + private func holdUntilSilence(onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void) { + Task { @MainActor [weak self] in + guard let self else { return } + let start = self.detectionStart ?? Date() + let deadline = start.addingTimeInterval(10) + while !self.isStopping { + let now = Date() + if now >= deadline { break } + if let last = self.lastHeard, now.timeIntervalSince(last) >= 1 { + break + } + try? await Task.sleep(nanoseconds: 250_000_000) + } + if !self.isStopping { + self.stop() + if let detectedText { + onUpdate(.detected(detectedText)) + } + } + } + } + private func configureSession(preferredMicID: String?) { _ = preferredMicID } @@ -262,6 +296,14 @@ struct VoiceWakeSettings: View { @State private var showForwardAdvanced = false @State private var forwardStatus: ForwardStatus = .idle + private var voiceWakeBinding: Binding { + Binding( + get: { self.state.swabbleEnabled }, + set: { newValue in + Task { await self.state.setVoiceWakeEnabled(newValue) } + }) + } + private struct IndexedWord: Identifiable { let id: Int let value: String @@ -274,7 +316,7 @@ struct VoiceWakeSettings: View { title: "Enable Voice Wake", subtitle: "Listen for a wake phrase (e.g. \"Claude\") before running voice commands. " + "Voice recognition runs fully on-device.", - binding: self.$state.swabbleEnabled) + binding: self.voiceWakeBinding) .disabled(!voiceWakeSupported) if !voiceWakeSupported {