import AVFAudio import Foundation import Speech enum SpeechAudioTapFactory { static func makeAppendTap(requestBox: SpeechRequestBox) -> @Sendable (AVAudioPCMBuffer, AVAudioTime) -> Void { { buffer, _ in requestBox.append(buffer) } } } final class SpeechRequestBox: @unchecked Sendable { let request: SFSpeechAudioBufferRecognitionRequest init(request: SFSpeechAudioBufferRecognitionRequest) { self.request = request } func append(_ buffer: AVAudioPCMBuffer) { self.request.append(buffer) } } @MainActor final class VoiceWakeManager: NSObject, ObservableObject { @Published var isEnabled: Bool = false @Published var isListening: Bool = false @Published var statusText: String = "Off" private let audioEngine = AVAudioEngine() private var speechRecognizer: SFSpeechRecognizer? private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? private var recognitionTask: SFSpeechRecognitionTask? private var lastDispatched: String? private var onCommand: (@Sendable (String) async -> Void)? func configure(onCommand: @escaping @Sendable (String) async -> Void) { self.onCommand = onCommand } func setEnabled(_ enabled: Bool) { self.isEnabled = enabled if enabled { Task { await self.start() } } else { self.stop() } } func start() async { guard self.isEnabled else { return } if self.isListening { return } self.statusText = "Requesting permissions…" let micOk = await Self.requestMicrophonePermission() guard micOk else { self.statusText = "Microphone permission denied" self.isListening = false return } let speechOk = await Self.requestSpeechPermission() guard speechOk else { self.statusText = "Speech recognition permission denied" self.isListening = false return } self.speechRecognizer = SFSpeechRecognizer() guard self.speechRecognizer != nil else { self.statusText = "Speech recognizer unavailable" self.isListening = false return } do { try Self.configureAudioSession() try self.startRecognition() self.isListening = true self.statusText = "Listening" } catch { self.isListening = false self.statusText = "Start failed: \(error.localizedDescription)" } } func stop() { self.isEnabled = false self.isListening = false self.statusText = "Off" self.recognitionTask?.cancel() self.recognitionTask = nil self.recognitionRequest = nil if self.audioEngine.isRunning { self.audioEngine.stop() self.audioEngine.inputNode.removeTap(onBus: 0) } try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation) } private func startRecognition() throws { self.recognitionTask?.cancel() self.recognitionTask = nil let request = SFSpeechAudioBufferRecognitionRequest() request.shouldReportPartialResults = true self.recognitionRequest = request let inputNode = self.audioEngine.inputNode inputNode.removeTap(onBus: 0) let requestBox = SpeechRequestBox(request: request) let recordingFormat = inputNode.outputFormat(forBus: 0) let tap = SpeechAudioTapFactory.makeAppendTap(requestBox: requestBox) inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat, block: tap) self.audioEngine.prepare() try self.audioEngine.start() self.recognitionTask = self.speechRecognizer? .recognitionTask(with: request) { [weak manager = self] result, error in Task { @MainActor in manager?.handleRecognitionCallback(result: result, error: error) } } } private func handleRecognitionCallback(result: SFSpeechRecognitionResult?, error: Error?) { if let error { self.statusText = "Recognizer error: \(error.localizedDescription)" self.isListening = false let shouldRestart = self.isEnabled if shouldRestart { Task { try? await Task.sleep(nanoseconds: 700_000_000) await self.start() } } return } guard let result else { return } let transcript = result.bestTranscription.formattedString guard let cmd = self.extractCommand(from: transcript) else { return } if cmd == self.lastDispatched { return } self.lastDispatched = cmd self.statusText = "Triggered" Task { [weak self] in guard let self else { return } await self.onCommand?(cmd) await self.startIfEnabled() } } private func startIfEnabled() async { let shouldRestart = self.isEnabled if shouldRestart { await self.start() } } private func extractCommand(from transcript: String) -> String? { let lower = transcript.lowercased() guard let range = lower.range(of: "clawdis", options: .backwards) else { return nil } let after = lower[range.upperBound...] let trimmed = after.trimmingCharacters(in: .whitespacesAndNewlines) if trimmed.isEmpty { return nil } return trimmed } private static func configureAudioSession() throws { let session = AVAudioSession.sharedInstance() try session.setCategory(.playAndRecord, mode: .measurement, options: [ .duckOthers, .mixWithOthers, .allowBluetoothHFP, .defaultToSpeaker, ]) try session.setActive(true, options: []) } private nonisolated static func requestMicrophonePermission() async -> Bool { await withCheckedContinuation(isolation: nil) { cont in AVAudioApplication.requestRecordPermission { ok in cont.resume(returning: ok) } } } private nonisolated static func requestSpeechPermission() async -> Bool { await withCheckedContinuation(isolation: nil) { cont in SFSpeechRecognizer.requestAuthorization { status in cont.resume(returning: status == .authorized) } } } }