From 804177b1f540352fc8e1defb4ff4fc157bc04966 Mon Sep 17 00:00:00 2001 From: Xaden Ryan Date: Wed, 7 Jan 2026 17:08:28 -0700 Subject: [PATCH] Voice Wake: preserve mic selection across disconnects - Keep the chosen mic label visible when it disconnects and show a disconnected hint while falling back to system default. - Avoid clearing the preferred mic on device changes so it auto-restores when available. - Add audio input change and default-input logs in voice wake runtime/tester/meter to debug routing. --- apps/macos/Sources/Clawdbot/AppState.swift | 6 + .../Clawdbot/AudioInputDeviceObserver.swift | 216 ++++++++++++++++++ apps/macos/Sources/Clawdbot/Constants.swift | 1 + .../Sources/Clawdbot/MenuContentView.swift | 67 +++++- .../Sources/Clawdbot/MicLevelMonitor.swift | 28 ++- .../Sources/Clawdbot/VoiceWakeRuntime.swift | 10 + .../Sources/Clawdbot/VoiceWakeSettings.swift | 77 ++++++- .../Sources/Clawdbot/VoiceWakeTester.swift | 39 +++- 8 files changed, 426 insertions(+), 18 deletions(-) create mode 100644 apps/macos/Sources/Clawdbot/AudioInputDeviceObserver.swift diff --git a/apps/macos/Sources/Clawdbot/AppState.swift b/apps/macos/Sources/Clawdbot/AppState.swift index 8811ef58f..371961070 100644 --- a/apps/macos/Sources/Clawdbot/AppState.swift +++ b/apps/macos/Sources/Clawdbot/AppState.swift @@ -100,6 +100,10 @@ final class AppState { } } + var voiceWakeMicName: String { + didSet { self.ifNotPreview { UserDefaults.standard.set(self.voiceWakeMicName, forKey: voiceWakeMicNameKey) } } + } + var voiceWakeLocaleID: String { didSet { self.ifNotPreview { @@ -229,6 +233,7 @@ final class AppState { } self.showDockIcon = UserDefaults.standard.bool(forKey: showDockIconKey) self.voiceWakeMicID = UserDefaults.standard.string(forKey: voiceWakeMicKey) ?? "" + self.voiceWakeMicName = UserDefaults.standard.string(forKey: voiceWakeMicNameKey) ?? "" self.voiceWakeLocaleID = UserDefaults.standard.string(forKey: voiceWakeLocaleKey) ?? Locale.current.identifier self.voiceWakeAdditionalLocaleIDs = UserDefaults.standard .stringArray(forKey: voiceWakeAdditionalLocalesKey) ?? [] @@ -583,6 +588,7 @@ extension AppState { state.iconAnimationsEnabled = true state.showDockIcon = true state.voiceWakeMicID = "BuiltInMic" + state.voiceWakeMicName = "Built-in Microphone" state.voiceWakeLocaleID = Locale.current.identifier state.voiceWakeAdditionalLocaleIDs = ["en-US", "de-DE"] state.voicePushToTalkEnabled = false diff --git a/apps/macos/Sources/Clawdbot/AudioInputDeviceObserver.swift b/apps/macos/Sources/Clawdbot/AudioInputDeviceObserver.swift new file mode 100644 index 000000000..bc296972c --- /dev/null +++ b/apps/macos/Sources/Clawdbot/AudioInputDeviceObserver.swift @@ -0,0 +1,216 @@ +import CoreAudio +import Foundation +import OSLog + +final class AudioInputDeviceObserver { + private let logger = Logger(subsystem: "com.clawdbot", category: "audio.devices") + private var isActive = false + private var devicesListener: AudioObjectPropertyListenerBlock? + private var defaultInputListener: AudioObjectPropertyListenerBlock? + + static func defaultInputDeviceUID() -> String? { + let systemObject = AudioObjectID(kAudioObjectSystemObject) + var address = AudioObjectPropertyAddress( + mSelector: kAudioHardwarePropertyDefaultInputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain) + var deviceID = AudioObjectID(0) + var size = UInt32(MemoryLayout.size) + let status = AudioObjectGetPropertyData( + systemObject, + &address, + 0, + nil, + &size, + &deviceID) + guard status == noErr, deviceID != 0 else { return nil } + return self.deviceUID(for: deviceID) + } + + static func aliveInputDeviceUIDs() -> Set { + let systemObject = AudioObjectID(kAudioObjectSystemObject) + var address = AudioObjectPropertyAddress( + mSelector: kAudioHardwarePropertyDevices, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain) + var size: UInt32 = 0 + var status = AudioObjectGetPropertyDataSize(systemObject, &address, 0, nil, &size) + guard status == noErr, size > 0 else { return [] } + + let count = Int(size) / MemoryLayout.size + var deviceIDs = [AudioObjectID](repeating: 0, count: count) + status = AudioObjectGetPropertyData(systemObject, &address, 0, nil, &size, &deviceIDs) + guard status == noErr else { return [] } + + var output = Set() + for deviceID in deviceIDs { + guard self.deviceIsAlive(deviceID) else { continue } + guard self.deviceHasInput(deviceID) else { continue } + if let uid = self.deviceUID(for: deviceID) { + output.insert(uid) + } + } + return output + } + + static func defaultInputDeviceSummary() -> String { + let systemObject = AudioObjectID(kAudioObjectSystemObject) + var address = AudioObjectPropertyAddress( + mSelector: kAudioHardwarePropertyDefaultInputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain) + var deviceID = AudioObjectID(0) + var size = UInt32(MemoryLayout.size) + let status = AudioObjectGetPropertyData( + systemObject, + &address, + 0, + nil, + &size, + &deviceID) + guard status == noErr, deviceID != 0 else { + return "defaultInput=unknown" + } + let uid = self.deviceUID(for: deviceID) ?? "unknown" + let name = self.deviceName(for: deviceID) ?? "unknown" + return "defaultInput=\(name) (\(uid))" + } + + func start(onChange: @escaping @Sendable () -> Void) { + guard !self.isActive else { return } + self.isActive = true + + let systemObject = AudioObjectID(kAudioObjectSystemObject) + let queue = DispatchQueue.main + + var devicesAddress = AudioObjectPropertyAddress( + mSelector: kAudioHardwarePropertyDevices, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain) + let devicesListener: AudioObjectPropertyListenerBlock = { _, _ in + self.logDefaultInputChange(reason: "devices") + onChange() + } + let devicesStatus = AudioObjectAddPropertyListenerBlock( + systemObject, + &devicesAddress, + queue, + devicesListener) + + var defaultInputAddress = AudioObjectPropertyAddress( + mSelector: kAudioHardwarePropertyDefaultInputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain) + let defaultInputListener: AudioObjectPropertyListenerBlock = { _, _ in + self.logDefaultInputChange(reason: "default") + onChange() + } + let defaultStatus = AudioObjectAddPropertyListenerBlock( + systemObject, + &defaultInputAddress, + queue, + defaultInputListener) + + if devicesStatus != noErr || defaultStatus != noErr { + self.logger.error("audio device observer install failed devices=\(devicesStatus) default=\(defaultStatus)") + } + + self.logger.info("audio device observer started (\(Self.defaultInputDeviceSummary(), privacy: .public))") + + self.devicesListener = devicesListener + self.defaultInputListener = defaultInputListener + } + + func stop() { + guard self.isActive else { return } + self.isActive = false + let systemObject = AudioObjectID(kAudioObjectSystemObject) + + if let devicesListener { + var devicesAddress = AudioObjectPropertyAddress( + mSelector: kAudioHardwarePropertyDevices, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain) + _ = AudioObjectRemovePropertyListenerBlock( + systemObject, + &devicesAddress, + DispatchQueue.main, + devicesListener) + } + + if let defaultInputListener { + var defaultInputAddress = AudioObjectPropertyAddress( + mSelector: kAudioHardwarePropertyDefaultInputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain) + _ = AudioObjectRemovePropertyListenerBlock( + systemObject, + &defaultInputAddress, + DispatchQueue.main, + defaultInputListener) + } + + self.devicesListener = nil + self.defaultInputListener = nil + } + + private static func deviceUID(for deviceID: AudioObjectID) -> String? { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyDeviceUID, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain) + var uid: Unmanaged? + var size = UInt32(MemoryLayout?>.size) + let status = AudioObjectGetPropertyData(deviceID, &address, 0, nil, &size, &uid) + guard status == noErr, let uid else { return nil } + return uid.takeUnretainedValue() as String + } + + private static func deviceName(for deviceID: AudioObjectID) -> String? { + var address = AudioObjectPropertyAddress( + mSelector: kAudioObjectPropertyName, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain) + var name: Unmanaged? + var size = UInt32(MemoryLayout?>.size) + let status = AudioObjectGetPropertyData(deviceID, &address, 0, nil, &size, &name) + guard status == noErr, let name else { return nil } + return name.takeUnretainedValue() as String + } + + private static func deviceIsAlive(_ deviceID: AudioObjectID) -> Bool { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyDeviceIsAlive, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMain) + var alive: UInt32 = 0 + var size = UInt32(MemoryLayout.size) + let status = AudioObjectGetPropertyData(deviceID, &address, 0, nil, &size, &alive) + return status == noErr && alive != 0 + } + + private static func deviceHasInput(_ deviceID: AudioObjectID) -> Bool { + var address = AudioObjectPropertyAddress( + mSelector: kAudioDevicePropertyStreamConfiguration, + mScope: kAudioDevicePropertyScopeInput, + mElement: kAudioObjectPropertyElementMain) + var size: UInt32 = 0 + var status = AudioObjectGetPropertyDataSize(deviceID, &address, 0, nil, &size) + guard status == noErr, size > 0 else { return false } + + let raw = UnsafeMutableRawPointer.allocate( + byteCount: Int(size), + alignment: MemoryLayout.alignment) + defer { raw.deallocate() } + let bufferList = raw.bindMemory(to: AudioBufferList.self, capacity: 1) + status = AudioObjectGetPropertyData(deviceID, &address, 0, nil, &size, bufferList) + guard status == noErr else { return false } + + let buffers = UnsafeMutableAudioBufferListPointer(bufferList) + return buffers.contains(where: { $0.mNumberChannels > 0 }) + } + + private func logDefaultInputChange(reason: StaticString) { + self.logger.info("audio input changed (\(reason)) (\(Self.defaultInputDeviceSummary(), privacy: .public))") + } +} diff --git a/apps/macos/Sources/Clawdbot/Constants.swift b/apps/macos/Sources/Clawdbot/Constants.swift index 5e4b9b1e0..95391deac 100644 --- a/apps/macos/Sources/Clawdbot/Constants.swift +++ b/apps/macos/Sources/Clawdbot/Constants.swift @@ -13,6 +13,7 @@ let voiceWakeSendChimeKey = "clawdbot.voiceWakeSendChime" let showDockIconKey = "clawdbot.showDockIcon" let defaultVoiceWakeTriggers = ["clawd", "claude"] let voiceWakeMicKey = "clawdbot.voiceWakeMicID" +let voiceWakeMicNameKey = "clawdbot.voiceWakeMicName" let voiceWakeLocaleKey = "clawdbot.voiceWakeLocaleID" let voiceWakeAdditionalLocalesKey = "clawdbot.voiceWakeAdditionalLocaleIDs" let voicePushToTalkEnabledKey = "clawdbot.voicePushToTalkEnabled" diff --git a/apps/macos/Sources/Clawdbot/MenuContentView.swift b/apps/macos/Sources/Clawdbot/MenuContentView.swift index 83cd5b5b7..1e5be8683 100644 --- a/apps/macos/Sources/Clawdbot/MenuContentView.swift +++ b/apps/macos/Sources/Clawdbot/MenuContentView.swift @@ -18,6 +18,8 @@ struct MenuContent: View { @Environment(\.openSettings) private var openSettings @State private var availableMics: [AudioInputDevice] = [] @State private var loadingMics = false + @State private var micObserver = AudioInputDeviceObserver() + @State private var micRefreshTask: Task? @State private var browserControlEnabled = true @AppStorage(cameraEnabledKey) private var cameraEnabled: Bool = false @AppStorage(appLogLevelKey) private var appLogLevelRaw: String = AppLogLevel.default.rawValue @@ -143,6 +145,14 @@ struct MenuContent: View { .task(id: self.state.connectionMode) { await self.loadBrowserControlEnabled() } + .onAppear { + self.startMicObserver() + } + .onDisappear { + self.micRefreshTask?.cancel() + self.micRefreshTask = nil + self.micObserver.stop() + } } private var connectionLabel: String { @@ -440,13 +450,22 @@ struct MenuContent: View { if let match = self.availableMics.first(where: { $0.uid == self.state.voiceWakeMicID }) { return match.name } + if !self.state.voiceWakeMicName.isEmpty { return self.state.voiceWakeMicName } return "Unavailable" } private var microphoneMenuItems: some View { Group { + if self.isSelectedMicUnavailable { + Label("Disconnected (using System default)", systemImage: "exclamationmark.triangle") + .labelStyle(.titleAndIcon) + .foregroundStyle(.secondary) + .disabled(true) + Divider() + } Button { self.state.voiceWakeMicID = "" + self.state.voiceWakeMicName = "" } label: { Label(self.defaultMicLabel, systemImage: self.state.voiceWakeMicID.isEmpty ? "checkmark" : "") .labelStyle(.titleAndIcon) @@ -456,6 +475,7 @@ struct MenuContent: View { ForEach(self.availableMics) { mic in Button { self.state.voiceWakeMicID = mic.uid + self.state.voiceWakeMicName = mic.name } label: { Label(mic.name, systemImage: self.state.voiceWakeMicID == mic.uid ? "checkmark" : "") .labelStyle(.titleAndIcon) @@ -465,6 +485,12 @@ struct MenuContent: View { } } + private var isSelectedMicUnavailable: Bool { + let selected = self.state.voiceWakeMicID + guard !selected.isEmpty else { return false } + return !self.availableMics.contains(where: { $0.uid == selected }) + } + private var defaultMicLabel: String { if let host = Host.current().localizedName, !host.isEmpty { return "Auto-detect (\(host))" @@ -500,14 +526,53 @@ struct MenuContent: View { deviceTypes: [.external, .microphone], mediaType: .audio, position: .unspecified) - self.availableMics = discovery.devices + let connectedDevices = discovery.devices.filter { $0.isConnected } + self.availableMics = connectedDevices .sorted { lhs, rhs in lhs.localizedName.localizedCaseInsensitiveCompare(rhs.localizedName) == .orderedAscending } .map { AudioInputDevice(uid: $0.uniqueID, name: $0.localizedName) } + self.availableMics = self.filterAliveInputs(self.availableMics) + self.updateSelectedMicName() self.loadingMics = false } + private func startMicObserver() { + self.micObserver.start { + Task { @MainActor in + self.scheduleMicRefresh() + } + } + } + + @MainActor + private func scheduleMicRefresh() { + self.micRefreshTask?.cancel() + self.micRefreshTask = Task { @MainActor in + try? await Task.sleep(nanoseconds: 300_000_000) + guard !Task.isCancelled else { return } + await self.loadMicrophones(force: true) + } + } + + private func filterAliveInputs(_ inputs: [AudioInputDevice]) -> [AudioInputDevice] { + let aliveUIDs = AudioInputDeviceObserver.aliveInputDeviceUIDs() + guard !aliveUIDs.isEmpty else { return inputs } + return inputs.filter { aliveUIDs.contains($0.uid) } + } + + @MainActor + private func updateSelectedMicName() { + let selected = self.state.voiceWakeMicID + if selected.isEmpty { + self.state.voiceWakeMicName = "" + return + } + if let match = self.availableMics.first(where: { $0.uid == selected }) { + self.state.voiceWakeMicName = match.name + } + } + private struct AudioInputDevice: Identifiable, Equatable { let uid: String let name: String diff --git a/apps/macos/Sources/Clawdbot/MicLevelMonitor.swift b/apps/macos/Sources/Clawdbot/MicLevelMonitor.swift index 1cda23708..2b2b4c99b 100644 --- a/apps/macos/Sources/Clawdbot/MicLevelMonitor.swift +++ b/apps/macos/Sources/Clawdbot/MicLevelMonitor.swift @@ -1,8 +1,10 @@ import AVFoundation +import OSLog import SwiftUI actor MicLevelMonitor { - private let engine = AVAudioEngine() + private let logger = Logger(subsystem: "com.clawdbot", category: "voicewake.meter") + private var engine: AVAudioEngine? private var update: (@Sendable (Double) -> Void)? private var running = false private var smoothedLevel: Double = 0 @@ -10,23 +12,37 @@ actor MicLevelMonitor { func start(onLevel: @Sendable @escaping (Double) -> Void) async throws { self.update = onLevel if self.running { return } - let input = self.engine.inputNode + self.logger.info( + "mic level monitor start (\(AudioInputDeviceObserver.defaultInputDeviceSummary(), privacy: .public))") + let engine = AVAudioEngine() + self.engine = engine + let input = engine.inputNode let format = input.outputFormat(forBus: 0) + guard format.channelCount > 0, format.sampleRate > 0 else { + self.engine = nil + throw NSError( + domain: "MicLevelMonitor", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "No audio input available"]) + } input.removeTap(onBus: 0) input.installTap(onBus: 0, bufferSize: 512, format: format) { [weak self] buffer, _ in guard let self else { return } let level = Self.normalizedLevel(from: buffer) Task { await self.push(level: level) } } - self.engine.prepare() - try self.engine.start() + engine.prepare() + try engine.start() self.running = true } func stop() { guard self.running else { return } - self.engine.inputNode.removeTap(onBus: 0) - self.engine.stop() + if let engine { + engine.inputNode.removeTap(onBus: 0) + engine.stop() + } + self.engine = nil self.running = false } diff --git a/apps/macos/Sources/Clawdbot/VoiceWakeRuntime.swift b/apps/macos/Sources/Clawdbot/VoiceWakeRuntime.swift index a0f661051..e025f9ccc 100644 --- a/apps/macos/Sources/Clawdbot/VoiceWakeRuntime.swift +++ b/apps/macos/Sources/Clawdbot/VoiceWakeRuntime.swift @@ -139,6 +139,12 @@ actor VoiceWakeRuntime { let input = audioEngine.inputNode let format = input.outputFormat(forBus: 0) + guard format.channelCount > 0, format.sampleRate > 0 else { + throw NSError( + domain: "VoiceWakeRuntime", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "No audio input available"]) + } input.removeTap(onBus: 0) input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak self, weak request] buffer, _ in request?.append(buffer) @@ -173,6 +179,10 @@ actor VoiceWakeRuntime { Task { await self.handleRecognition(update, config: config) } } + let preferred = config.micID?.isEmpty == false ? config.micID! : "system-default" + self.logger.info( + "voicewake runtime input preferred=\(preferred, privacy: .public) " + + "\(AudioInputDeviceObserver.defaultInputDeviceSummary(), privacy: .public)") self.logger.info("voicewake runtime started") DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "started", fields: [ "locale": config.localeID ?? "", diff --git a/apps/macos/Sources/Clawdbot/VoiceWakeSettings.swift b/apps/macos/Sources/Clawdbot/VoiceWakeSettings.swift index 4f486b653..9449eeb5d 100644 --- a/apps/macos/Sources/Clawdbot/VoiceWakeSettings.swift +++ b/apps/macos/Sources/Clawdbot/VoiceWakeSettings.swift @@ -18,6 +18,8 @@ struct VoiceWakeSettings: View { @State private var meterLevel: Double = 0 @State private var meterError: String? private let meter = MicLevelMonitor() + @State private var micObserver = AudioInputDeviceObserver() + @State private var micRefreshTask: Task? @State private var availableLocales: [Locale] = [] private let fieldLabelWidth: CGFloat = 140 private let controlWidth: CGFloat = 240 @@ -100,8 +102,13 @@ struct VoiceWakeSettings: View { guard !self.isPreview else { return } await self.restartMeter() } + .onAppear { + guard !self.isPreview else { return } + self.startMicObserver() + } .onChange(of: self.state.voiceWakeMicID) { _, _ in guard !self.isPreview else { return } + self.updateSelectedMicName() Task { await self.restartMeter() } } .onChange(of: self.isActive) { _, active in @@ -111,7 +118,12 @@ struct VoiceWakeSettings: View { self.isTesting = false self.testState = .idle self.testTimeoutTask?.cancel() + self.micRefreshTask?.cancel() + self.micRefreshTask = nil Task { await self.meter.stop() } + self.micObserver.stop() + } else { + self.startMicObserver() } } .onDisappear { @@ -120,6 +132,9 @@ struct VoiceWakeSettings: View { self.isTesting = false self.testState = .idle self.testTimeoutTask?.cancel() + self.micRefreshTask?.cancel() + self.micRefreshTask = nil + self.micObserver.stop() Task { await self.meter.stop() } } } @@ -400,6 +415,10 @@ struct VoiceWakeSettings: View { .frame(width: self.fieldLabelWidth, alignment: .leading) Picker("Microphone", selection: self.$state.voiceWakeMicID) { Text("System default").tag("") + if self.isSelectedMicUnavailable { + Text(self.state.voiceWakeMicName.isEmpty ? "Unavailable" : self.state.voiceWakeMicName) + .tag(self.state.voiceWakeMicID) + } ForEach(self.availableMics) { mic in Text(mic.name).tag(mic.uid) } @@ -407,6 +426,15 @@ struct VoiceWakeSettings: View { .labelsHidden() .frame(width: self.controlWidth) } + if self.isSelectedMicUnavailable { + HStack(spacing: 10) { + Color.clear.frame(width: self.fieldLabelWidth, height: 1) + Text("Disconnected (using System default)") + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(1) + } + } if self.loadingMics { ProgressView().controlSize(.small) } @@ -499,17 +527,60 @@ struct VoiceWakeSettings: View { } @MainActor - private func loadMicsIfNeeded() async { - guard self.availableMics.isEmpty, !self.loadingMics else { return } + private func loadMicsIfNeeded(force: Bool = false) async { + guard (force || self.availableMics.isEmpty), !self.loadingMics else { return } self.loadingMics = true let discovery = AVCaptureDevice.DiscoverySession( deviceTypes: [.external, .microphone], mediaType: .audio, position: .unspecified) - self.availableMics = discovery.devices.map { AudioInputDevice(uid: $0.uniqueID, name: $0.localizedName) } + let aliveUIDs = AudioInputDeviceObserver.aliveInputDeviceUIDs() + let connectedDevices = discovery.devices.filter { $0.isConnected } + let devices = aliveUIDs.isEmpty + ? connectedDevices + : connectedDevices.filter { aliveUIDs.contains($0.uniqueID) } + self.availableMics = devices.map { AudioInputDevice(uid: $0.uniqueID, name: $0.localizedName) } + self.updateSelectedMicName() self.loadingMics = false } + private var isSelectedMicUnavailable: Bool { + let selected = self.state.voiceWakeMicID + guard !selected.isEmpty else { return false } + return !self.availableMics.contains(where: { $0.uid == selected }) + } + + @MainActor + private func updateSelectedMicName() { + let selected = self.state.voiceWakeMicID + if selected.isEmpty { + self.state.voiceWakeMicName = "" + return + } + if let match = self.availableMics.first(where: { $0.uid == selected }) { + self.state.voiceWakeMicName = match.name + } + } + + private func startMicObserver() { + self.micObserver.start { + Task { @MainActor in + self.scheduleMicRefresh() + } + } + } + + @MainActor + private func scheduleMicRefresh() { + self.micRefreshTask?.cancel() + self.micRefreshTask = Task { @MainActor in + try? await Task.sleep(nanoseconds: 300_000_000) + guard !Task.isCancelled else { return } + await self.loadMicsIfNeeded(force: true) + await self.restartMeter() + } + } + @MainActor private func loadLocalesIfNeeded() async { guard self.availableLocales.isEmpty else { return } diff --git a/apps/macos/Sources/Clawdbot/VoiceWakeTester.swift b/apps/macos/Sources/Clawdbot/VoiceWakeTester.swift index 05bb98342..64c51590c 100644 --- a/apps/macos/Sources/Clawdbot/VoiceWakeTester.swift +++ b/apps/macos/Sources/Clawdbot/VoiceWakeTester.swift @@ -15,7 +15,7 @@ enum VoiceWakeTestState: Equatable { final class VoiceWakeTester { private let recognizer: SFSpeechRecognizer? - private let audioEngine = AVAudioEngine() + private var audioEngine: AVAudioEngine? private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? private var recognitionTask: SFSpeechRecognitionTask? private var isStopping = false @@ -86,22 +86,33 @@ final class VoiceWakeTester { userInfo: [NSLocalizedDescriptionKey: "Microphone or speech permission denied"]) } + self.logInputSelection(preferredMicID: micID) self.configureSession(preferredMicID: micID) + let engine = AVAudioEngine() + self.audioEngine = engine + self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest() self.recognitionRequest?.shouldReportPartialResults = true self.recognitionRequest?.taskHint = .dictation let request = self.recognitionRequest - let inputNode = self.audioEngine.inputNode + let inputNode = engine.inputNode let format = inputNode.outputFormat(forBus: 0) + guard format.channelCount > 0, format.sampleRate > 0 else { + self.audioEngine = nil + throw NSError( + domain: "VoiceWakeTester", + code: 4, + userInfo: [NSLocalizedDescriptionKey: "No audio input available"]) + } inputNode.removeTap(onBus: 0) inputNode.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request] buffer, _ in request?.append(buffer) } - self.audioEngine.prepare() - try self.audioEngine.start() + engine.prepare() + try engine.start() DispatchQueue.main.async { onUpdate(.listening) } @@ -156,9 +167,11 @@ final class VoiceWakeTester { return } self.isFinalizing = true - self.audioEngine.inputNode.removeTap(onBus: 0) self.recognitionRequest?.endAudio() - self.audioEngine.stop() + if let engine = self.audioEngine { + engine.inputNode.removeTap(onBus: 0) + engine.stop() + } Task { [weak self] in guard let self else { return } try? await Task.sleep(nanoseconds: UInt64(timeout * 1_000_000_000)) @@ -171,12 +184,15 @@ final class VoiceWakeTester { private func stop(force: Bool) { if force { self.isStopping = true } self.isFinalizing = false - self.audioEngine.stop() self.recognitionRequest?.endAudio() self.recognitionTask?.cancel() self.recognitionTask = nil self.recognitionRequest = nil - self.audioEngine.inputNode.removeTap(onBus: 0) + if let engine = self.audioEngine { + engine.inputNode.removeTap(onBus: 0) + engine.stop() + } + self.audioEngine = nil self.holdingAfterDetect = false self.detectedText = nil self.lastHeard = nil @@ -435,6 +451,13 @@ final class VoiceWakeTester { _ = preferredMicID } + private func logInputSelection(preferredMicID: String?) { + let preferred = (preferredMicID?.isEmpty == false) ? preferredMicID! : "system-default" + self.logger.info( + "voicewake test input preferred=\(preferred, privacy: .public) " + + "\(AudioInputDeviceObserver.defaultInputDeviceSummary(), privacy: .public)") + } + private nonisolated static func ensurePermissions() async throws -> Bool { let speechStatus = SFSpeechRecognizer.authorizationStatus() if speechStatus == .notDetermined {