Files
clawdbot/apps/macos/Sources/Clawdbot/VoiceWakeSettings.swift
Xaden Ryan 804177b1f5 Voice Wake: preserve mic selection across disconnects
- Keep the chosen mic label visible when it disconnects and show a disconnected hint while falling back to system default.
- Avoid clearing the preferred mic on device changes so it auto-restores when available.
- Add audio input change and default-input logs in voice wake runtime/tester/meter to debug routing.
2026-01-08 01:36:15 +00:00

692 lines
26 KiB
Swift

import AppKit
import AVFoundation
import Observation
import SwabbleKit
import Speech
import SwiftUI
import UniformTypeIdentifiers
struct VoiceWakeSettings: View {
@Bindable var state: AppState
let isActive: Bool
@State private var testState: VoiceWakeTestState = .idle
@State private var tester = VoiceWakeTester()
@State private var isTesting = false
@State private var testTimeoutTask: Task<Void, Never>?
@State private var availableMics: [AudioInputDevice] = []
@State private var loadingMics = false
@State private var meterLevel: Double = 0
@State private var meterError: String?
private let meter = MicLevelMonitor()
@State private var micObserver = AudioInputDeviceObserver()
@State private var micRefreshTask: Task<Void, Never>?
@State private var availableLocales: [Locale] = []
private let fieldLabelWidth: CGFloat = 140
private let controlWidth: CGFloat = 240
private let isPreview = ProcessInfo.processInfo.isPreview
private struct AudioInputDevice: Identifiable, Equatable {
let uid: String
let name: String
var id: String { self.uid }
}
private struct IndexedWord: Identifiable {
let id: Int
let value: String
}
private var voiceWakeBinding: Binding<Bool> {
Binding(
get: { self.state.swabbleEnabled },
set: { newValue in
Task { await self.state.setVoiceWakeEnabled(newValue) }
})
}
var body: some View {
ScrollView(.vertical) {
VStack(alignment: .leading, spacing: 14) {
SettingsToggleRow(
title: "Enable Voice Wake",
subtitle: "Listen for a wake phrase (e.g. \"Claude\") before running voice commands. "
+ "Voice recognition runs fully on-device.",
binding: self.voiceWakeBinding)
.disabled(!voiceWakeSupported)
SettingsToggleRow(
title: "Hold Right Option to talk",
subtitle: """
Push-to-talk mode that starts listening while you hold the key
and shows the preview overlay.
""",
binding: self.$state.voicePushToTalkEnabled)
.disabled(!voiceWakeSupported)
if !voiceWakeSupported {
Label("Voice Wake requires macOS 26 or newer.", systemImage: "exclamationmark.triangle.fill")
.font(.callout)
.foregroundStyle(.yellow)
.padding(8)
.background(Color.secondary.opacity(0.15))
.clipShape(RoundedRectangle(cornerRadius: 8))
}
self.localePicker
self.micPicker
self.levelMeter
VoiceWakeTestCard(
testState: self.$testState,
isTesting: self.$isTesting,
onToggle: self.toggleTest)
self.chimeSection
self.triggerTable
Spacer(minLength: 8)
}
.frame(maxWidth: .infinity, alignment: .leading)
.padding(.horizontal, 12)
}
.task {
guard !self.isPreview else { return }
await self.loadMicsIfNeeded()
}
.task {
guard !self.isPreview else { return }
await self.loadLocalesIfNeeded()
}
.task {
guard !self.isPreview else { return }
await self.restartMeter()
}
.onAppear {
guard !self.isPreview else { return }
self.startMicObserver()
}
.onChange(of: self.state.voiceWakeMicID) { _, _ in
guard !self.isPreview else { return }
self.updateSelectedMicName()
Task { await self.restartMeter() }
}
.onChange(of: self.isActive) { _, active in
guard !self.isPreview else { return }
if !active {
self.tester.stop()
self.isTesting = false
self.testState = .idle
self.testTimeoutTask?.cancel()
self.micRefreshTask?.cancel()
self.micRefreshTask = nil
Task { await self.meter.stop() }
self.micObserver.stop()
} else {
self.startMicObserver()
}
}
.onDisappear {
guard !self.isPreview else { return }
self.tester.stop()
self.isTesting = false
self.testState = .idle
self.testTimeoutTask?.cancel()
self.micRefreshTask?.cancel()
self.micRefreshTask = nil
self.micObserver.stop()
Task { await self.meter.stop() }
}
}
private var indexedWords: [IndexedWord] {
self.state.swabbleTriggerWords.enumerated().map { IndexedWord(id: $0.offset, value: $0.element) }
}
private var triggerTable: some View {
VStack(alignment: .leading, spacing: 8) {
HStack {
Text("Trigger words")
.font(.callout.weight(.semibold))
Spacer()
Button {
self.addWord()
} label: {
Label("Add word", systemImage: "plus")
}
.disabled(self.state.swabbleTriggerWords
.contains(where: { $0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }))
Button("Reset defaults") { self.state.swabbleTriggerWords = defaultVoiceWakeTriggers }
}
Table(self.indexedWords) {
TableColumn("Word") { row in
TextField("Wake word", text: self.binding(for: row.id))
.textFieldStyle(.roundedBorder)
}
TableColumn("") { row in
Button {
self.removeWord(at: row.id)
} label: {
Image(systemName: "trash")
}
.buttonStyle(.borderless)
.help("Remove trigger word")
}
.width(36)
}
.frame(minHeight: 180)
.clipShape(RoundedRectangle(cornerRadius: 6))
.overlay(
RoundedRectangle(cornerRadius: 6)
.stroke(Color.secondary.opacity(0.25), lineWidth: 1))
Text(
"Clawdbot reacts when any trigger appears in a transcription. "
+ "Keep them short to avoid false positives.")
.font(.footnote)
.foregroundStyle(.secondary)
.fixedSize(horizontal: false, vertical: true)
}
}
private var chimeSection: some View {
VStack(alignment: .leading, spacing: 10) {
HStack(alignment: .firstTextBaseline, spacing: 10) {
Text("Sounds")
.font(.callout.weight(.semibold))
Spacer()
}
self.chimeRow(
title: "Trigger sound",
selection: self.$state.voiceWakeTriggerChime)
self.chimeRow(
title: "Send sound",
selection: self.$state.voiceWakeSendChime)
}
.padding(.top, 4)
}
private func addWord() {
self.state.swabbleTriggerWords.append("")
}
private func removeWord(at index: Int) {
guard self.state.swabbleTriggerWords.indices.contains(index) else { return }
self.state.swabbleTriggerWords.remove(at: index)
}
private func binding(for index: Int) -> Binding<String> {
Binding(
get: {
guard self.state.swabbleTriggerWords.indices.contains(index) else { return "" }
return self.state.swabbleTriggerWords[index]
},
set: { newValue in
guard self.state.swabbleTriggerWords.indices.contains(index) else { return }
self.state.swabbleTriggerWords[index] = newValue
})
}
private func toggleTest() {
guard voiceWakeSupported else {
self.testState = .failed("Voice Wake requires macOS 26 or newer.")
return
}
if self.isTesting {
self.tester.finalize()
self.isTesting = false
self.testState = .finalizing
Task { @MainActor in
try? await Task.sleep(nanoseconds: 2_000_000_000)
if self.testState == .finalizing {
self.tester.stop()
self.testState = .failed("Stopped")
}
}
self.testTimeoutTask?.cancel()
return
}
let triggers = self.sanitizedTriggers()
self.tester.stop()
self.testTimeoutTask?.cancel()
self.isTesting = true
self.testState = .requesting
Task { @MainActor in
do {
try await self.tester.start(
triggers: triggers,
micID: self.state.voiceWakeMicID.isEmpty ? nil : self.state.voiceWakeMicID,
localeID: self.state.voiceWakeLocaleID,
onUpdate: { newState in
DispatchQueue.main.async { [self] in
self.testState = newState
if case .detected = newState { self.isTesting = false }
if case .failed = newState { self.isTesting = false }
if case .detected = newState { self.testTimeoutTask?.cancel() }
if case .failed = newState { self.testTimeoutTask?.cancel() }
}
})
self.testTimeoutTask?.cancel()
self.testTimeoutTask = Task { @MainActor in
try? await Task.sleep(nanoseconds: 10 * 1_000_000_000)
guard !Task.isCancelled else { return }
if self.isTesting {
self.tester.stop()
if case let .hearing(text) = self.testState,
let command = Self.textOnlyCommand(from: text, triggers: triggers)
{
self.testState = .detected(command)
} else {
self.testState = .failed("Timeout: no trigger heard")
}
self.isTesting = false
}
}
} catch {
self.tester.stop()
self.testState = .failed(error.localizedDescription)
self.isTesting = false
self.testTimeoutTask?.cancel()
}
}
}
private func chimeRow(title: String, selection: Binding<VoiceWakeChime>) -> some View {
HStack(alignment: .center, spacing: 10) {
Text(title)
.font(.callout.weight(.semibold))
.frame(width: self.fieldLabelWidth, alignment: .leading)
Menu {
Button("No Sound") { self.selectChime(.none, binding: selection) }
Divider()
ForEach(VoiceWakeChimeCatalog.systemOptions, id: \.self) { option in
Button(VoiceWakeChimeCatalog.displayName(for: option)) {
self.selectChime(.system(name: option), binding: selection)
}
}
Divider()
Button("Choose file…") { self.chooseCustomChime(for: selection) }
} label: {
HStack(spacing: 6) {
Text(selection.wrappedValue.displayLabel)
.lineLimit(1)
.truncationMode(.middle)
Spacer()
Image(systemName: "chevron.down")
.font(.caption)
.foregroundStyle(.secondary)
}
.padding(6)
.frame(minWidth: self.controlWidth, maxWidth: .infinity, alignment: .leading)
.background(Color(nsColor: .windowBackgroundColor))
.overlay(
RoundedRectangle(cornerRadius: 6)
.stroke(Color.secondary.opacity(0.25), lineWidth: 1))
.clipShape(RoundedRectangle(cornerRadius: 6))
}
Button("Play") {
VoiceWakeChimePlayer.play(selection.wrappedValue)
}
.keyboardShortcut(.space, modifiers: [.command])
}
}
private func chooseCustomChime(for selection: Binding<VoiceWakeChime>) {
let panel = NSOpenPanel()
panel.allowedContentTypes = [.audio]
panel.allowsMultipleSelection = false
panel.canChooseDirectories = false
panel.resolvesAliases = true
panel.begin { response in
guard response == .OK, let url = panel.url else { return }
do {
let bookmark = try url.bookmarkData(
options: [.withSecurityScope],
includingResourceValuesForKeys: nil,
relativeTo: nil)
let chosen = VoiceWakeChime.custom(displayName: url.lastPathComponent, bookmark: bookmark)
selection.wrappedValue = chosen
VoiceWakeChimePlayer.play(chosen)
} catch {
// Ignore failures; user can retry.
}
}
}
private func selectChime(_ chime: VoiceWakeChime, binding: Binding<VoiceWakeChime>) {
binding.wrappedValue = chime
VoiceWakeChimePlayer.play(chime)
}
private func sanitizedTriggers() -> [String] {
sanitizeVoiceWakeTriggers(self.state.swabbleTriggerWords)
}
private static func textOnlyCommand(from transcript: String, triggers: [String]) -> String? {
guard !transcript.isEmpty else { return nil }
let normalized = normalizeToken(transcript)
guard !normalized.isEmpty else { return nil }
guard startsWithTrigger(transcript: transcript, triggers: triggers) else { return nil }
guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return nil }
let trimmed = WakeWordGate.stripWake(text: transcript, triggers: triggers)
return trimmed.isEmpty ? nil : trimmed
}
private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool {
let tokens = transcript
.split(whereSeparator: { $0.isWhitespace })
.map { normalizeToken(String($0)) }
.filter { !$0.isEmpty }
guard !tokens.isEmpty else { return false }
for trigger in triggers {
let triggerTokens = trigger
.split(whereSeparator: { $0.isWhitespace })
.map { normalizeToken(String($0)) }
.filter { !$0.isEmpty }
guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue }
if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) {
return true
}
}
return false
}
private static func normalizeToken(_ token: String) -> String {
token
.trimmingCharacters(in: Self.whitespaceAndPunctuation)
.lowercased()
}
private static let whitespaceAndPunctuation = CharacterSet.whitespacesAndNewlines
.union(.punctuationCharacters)
private var micPicker: some View {
VStack(alignment: .leading, spacing: 6) {
HStack(alignment: .firstTextBaseline, spacing: 10) {
Text("Microphone")
.font(.callout.weight(.semibold))
.frame(width: self.fieldLabelWidth, alignment: .leading)
Picker("Microphone", selection: self.$state.voiceWakeMicID) {
Text("System default").tag("")
if self.isSelectedMicUnavailable {
Text(self.state.voiceWakeMicName.isEmpty ? "Unavailable" : self.state.voiceWakeMicName)
.tag(self.state.voiceWakeMicID)
}
ForEach(self.availableMics) { mic in
Text(mic.name).tag(mic.uid)
}
}
.labelsHidden()
.frame(width: self.controlWidth)
}
if self.isSelectedMicUnavailable {
HStack(spacing: 10) {
Color.clear.frame(width: self.fieldLabelWidth, height: 1)
Text("Disconnected (using System default)")
.font(.caption)
.foregroundStyle(.secondary)
.lineLimit(1)
}
}
if self.loadingMics {
ProgressView().controlSize(.small)
}
}
}
private var localePicker: some View {
VStack(alignment: .leading, spacing: 6) {
HStack(alignment: .firstTextBaseline, spacing: 10) {
Text("Recognition language")
.font(.callout.weight(.semibold))
.frame(width: self.fieldLabelWidth, alignment: .leading)
Picker("Language", selection: self.$state.voiceWakeLocaleID) {
let current = Locale(identifier: Locale.current.identifier)
Text("\(self.friendlyName(for: current)) (System)").tag(Locale.current.identifier)
ForEach(self.availableLocales.map(\.identifier), id: \.self) { id in
if id != Locale.current.identifier {
Text(self.friendlyName(for: Locale(identifier: id))).tag(id)
}
}
}
.labelsHidden()
.frame(width: self.controlWidth)
}
if !self.state.voiceWakeAdditionalLocaleIDs.isEmpty {
VStack(alignment: .leading, spacing: 8) {
Text("Additional languages")
.font(.footnote.weight(.semibold))
ForEach(
Array(self.state.voiceWakeAdditionalLocaleIDs.enumerated()),
id: \.offset)
{ idx, localeID in
HStack(spacing: 8) {
Picker("Extra \(idx + 1)", selection: Binding(
get: { localeID },
set: { newValue in
guard self.state
.voiceWakeAdditionalLocaleIDs.indices
.contains(idx) else { return }
self.state
.voiceWakeAdditionalLocaleIDs[idx] =
newValue
})) {
ForEach(self.availableLocales.map(\.identifier), id: \.self) { id in
Text(self.friendlyName(for: Locale(identifier: id))).tag(id)
}
}
.labelsHidden()
.frame(width: 220)
Button {
guard self.state.voiceWakeAdditionalLocaleIDs.indices.contains(idx) else { return }
self.state.voiceWakeAdditionalLocaleIDs.remove(at: idx)
} label: {
Image(systemName: "trash")
}
.buttonStyle(.borderless)
.help("Remove language")
}
}
Button {
if let first = availableLocales.first {
self.state.voiceWakeAdditionalLocaleIDs.append(first.identifier)
}
} label: {
Label("Add language", systemImage: "plus")
}
.disabled(self.availableLocales.isEmpty)
}
.padding(.top, 4)
} else {
Button {
if let first = availableLocales.first {
self.state.voiceWakeAdditionalLocaleIDs.append(first.identifier)
}
} label: {
Label("Add additional language", systemImage: "plus")
}
.buttonStyle(.link)
.disabled(self.availableLocales.isEmpty)
.padding(.top, 4)
}
Text("Languages are tried in order. Models may need a first-use download on macOS 26.")
.font(.caption)
.foregroundStyle(.secondary)
}
}
@MainActor
private func loadMicsIfNeeded(force: Bool = false) async {
guard (force || self.availableMics.isEmpty), !self.loadingMics else { return }
self.loadingMics = true
let discovery = AVCaptureDevice.DiscoverySession(
deviceTypes: [.external, .microphone],
mediaType: .audio,
position: .unspecified)
let aliveUIDs = AudioInputDeviceObserver.aliveInputDeviceUIDs()
let connectedDevices = discovery.devices.filter { $0.isConnected }
let devices = aliveUIDs.isEmpty
? connectedDevices
: connectedDevices.filter { aliveUIDs.contains($0.uniqueID) }
self.availableMics = devices.map { AudioInputDevice(uid: $0.uniqueID, name: $0.localizedName) }
self.updateSelectedMicName()
self.loadingMics = false
}
private var isSelectedMicUnavailable: Bool {
let selected = self.state.voiceWakeMicID
guard !selected.isEmpty else { return false }
return !self.availableMics.contains(where: { $0.uid == selected })
}
@MainActor
private func updateSelectedMicName() {
let selected = self.state.voiceWakeMicID
if selected.isEmpty {
self.state.voiceWakeMicName = ""
return
}
if let match = self.availableMics.first(where: { $0.uid == selected }) {
self.state.voiceWakeMicName = match.name
}
}
private func startMicObserver() {
self.micObserver.start {
Task { @MainActor in
self.scheduleMicRefresh()
}
}
}
@MainActor
private func scheduleMicRefresh() {
self.micRefreshTask?.cancel()
self.micRefreshTask = Task { @MainActor in
try? await Task.sleep(nanoseconds: 300_000_000)
guard !Task.isCancelled else { return }
await self.loadMicsIfNeeded(force: true)
await self.restartMeter()
}
}
@MainActor
private func loadLocalesIfNeeded() async {
guard self.availableLocales.isEmpty else { return }
self.availableLocales = Array(SFSpeechRecognizer.supportedLocales()).sorted { lhs, rhs in
self.friendlyName(for: lhs)
.localizedCaseInsensitiveCompare(self.friendlyName(for: rhs)) == .orderedAscending
}
}
private func friendlyName(for locale: Locale) -> String {
let cleanedID = normalizeLocaleIdentifier(locale.identifier)
let cleanLocale = Locale(identifier: cleanedID)
if let langCode = cleanLocale.language.languageCode?.identifier,
let lang = cleanLocale.localizedString(forLanguageCode: langCode),
let regionCode = cleanLocale.region?.identifier,
let region = cleanLocale.localizedString(forRegionCode: regionCode)
{
return "\(lang) (\(region))"
}
if let langCode = cleanLocale.language.languageCode?.identifier,
let lang = cleanLocale.localizedString(forLanguageCode: langCode)
{
return lang
}
return cleanLocale.localizedString(forIdentifier: cleanedID) ?? cleanedID
}
private var levelMeter: some View {
VStack(alignment: .leading, spacing: 6) {
HStack(alignment: .center, spacing: 10) {
Text("Live level")
.font(.callout.weight(.semibold))
.frame(width: self.fieldLabelWidth, alignment: .leading)
MicLevelBar(level: self.meterLevel)
.frame(width: self.controlWidth, alignment: .leading)
Text(self.levelLabel)
.font(.callout.monospacedDigit())
.foregroundStyle(.secondary)
.frame(width: 60, alignment: .trailing)
}
if let meterError {
Text(meterError)
.font(.footnote)
.foregroundStyle(.secondary)
}
}
}
private var levelLabel: String {
let db = (meterLevel * 50) - 50
return String(format: "%.0f dB", db)
}
@MainActor
private func restartMeter() async {
self.meterError = nil
await self.meter.stop()
do {
try await self.meter.start { [weak state] level in
Task { @MainActor in
guard state != nil else { return }
self.meterLevel = level
}
}
} catch {
self.meterError = error.localizedDescription
}
}
}
#if DEBUG
struct VoiceWakeSettings_Previews: PreviewProvider {
static var previews: some View {
VoiceWakeSettings(state: .preview, isActive: true)
.frame(width: SettingsTab.windowWidth, height: SettingsTab.windowHeight)
}
}
@MainActor
extension VoiceWakeSettings {
static func exerciseForTesting() {
let state = AppState(preview: true)
state.swabbleEnabled = true
state.voicePushToTalkEnabled = true
state.swabbleTriggerWords = ["Claude", "Hey"]
let view = VoiceWakeSettings(state: state, isActive: true)
view.availableMics = [AudioInputDevice(uid: "mic-1", name: "Built-in")]
view.availableLocales = [Locale(identifier: "en_US")]
view.meterLevel = 0.42
view.meterError = "No input"
view.testState = .detected("ok")
view.isTesting = true
_ = view.body
_ = view.localePicker
_ = view.micPicker
_ = view.levelMeter
_ = view.triggerTable
_ = view.chimeSection
view.addWord()
_ = view.binding(for: 0).wrappedValue
view.removeWord(at: 0)
}
}
#endif