From df9f72134b88964e7e0ad6a175e1b854dda6cf61 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 7 Dec 2025 17:54:53 +0100 Subject: [PATCH] refactor(mac): split voice wake settings --- .../Sources/Clawdis/MicLevelMonitor.swift | 81 +++ .../Clawdis/VoiceWakeForwardSection.swift | 108 ++++ .../Sources/Clawdis/VoiceWakeSettings.swift | 601 ++---------------- .../Sources/Clawdis/VoiceWakeTestCard.swift | 89 +++ .../Sources/Clawdis/VoiceWakeTester.swift | 229 +++++++ 5 files changed, 574 insertions(+), 534 deletions(-) create mode 100644 apps/macos/Sources/Clawdis/MicLevelMonitor.swift create mode 100644 apps/macos/Sources/Clawdis/VoiceWakeForwardSection.swift create mode 100644 apps/macos/Sources/Clawdis/VoiceWakeTestCard.swift create mode 100644 apps/macos/Sources/Clawdis/VoiceWakeTester.swift diff --git a/apps/macos/Sources/Clawdis/MicLevelMonitor.swift b/apps/macos/Sources/Clawdis/MicLevelMonitor.swift new file mode 100644 index 000000000..1cda23708 --- /dev/null +++ b/apps/macos/Sources/Clawdis/MicLevelMonitor.swift @@ -0,0 +1,81 @@ +import AVFoundation +import SwiftUI + +actor MicLevelMonitor { + private let engine = AVAudioEngine() + private var update: (@Sendable (Double) -> Void)? + private var running = false + private var smoothedLevel: Double = 0 + + func start(onLevel: @Sendable @escaping (Double) -> Void) async throws { + self.update = onLevel + if self.running { return } + let input = self.engine.inputNode + let format = input.outputFormat(forBus: 0) + input.removeTap(onBus: 0) + input.installTap(onBus: 0, bufferSize: 512, format: format) { [weak self] buffer, _ in + guard let self else { return } + let level = Self.normalizedLevel(from: buffer) + Task { await self.push(level: level) } + } + self.engine.prepare() + try self.engine.start() + self.running = true + } + + func stop() { + guard self.running else { return } + self.engine.inputNode.removeTap(onBus: 0) + self.engine.stop() + self.running = false + } + + private func push(level: Double) { + self.smoothedLevel = (self.smoothedLevel * 0.45) + (level * 0.55) + guard let update else { return } + let value = self.smoothedLevel + Task { @MainActor in update(value) } + } + + private static func normalizedLevel(from buffer: AVAudioPCMBuffer) -> Double { + guard let channel = buffer.floatChannelData?[0] else { return 0 } + let frameCount = Int(buffer.frameLength) + guard frameCount > 0 else { return 0 } + var sum: Float = 0 + for i in 0.. Double(idx) + RoundedRectangle(cornerRadius: 2) + .fill(fill ? self.segmentColor(for: idx) : Color.gray.opacity(0.35)) + .frame(width: 14, height: 10) + } + } + .padding(4) + .background( + RoundedRectangle(cornerRadius: 6) + .stroke(Color.gray.opacity(0.25), lineWidth: 1)) + } + + private func segmentColor(for idx: Int) -> Color { + let fraction = Double(idx + 1) / Double(self.segments) + if fraction < 0.65 { return .green } + if fraction < 0.85 { return .yellow } + return .red + } +} diff --git a/apps/macos/Sources/Clawdis/VoiceWakeForwardSection.swift b/apps/macos/Sources/Clawdis/VoiceWakeForwardSection.swift new file mode 100644 index 000000000..e08aaf32c --- /dev/null +++ b/apps/macos/Sources/Clawdis/VoiceWakeForwardSection.swift @@ -0,0 +1,108 @@ +import SwiftUI + +enum VoiceWakeForwardStatus: Equatable { + case idle + case checking + case ok + case failed(String) +} + +struct VoiceWakeForwardSection: View { + @Binding var enabled: Bool + @Binding var target: String + @Binding var identity: String + @Binding var command: String + @Binding var showAdvanced: Bool + @Binding var status: VoiceWakeForwardStatus + let onTest: () -> Void + let onChange: () -> Void + + var body: some View { + VStack(alignment: .leading, spacing: 8) { + Toggle(isOn: self.$enabled) { + Text("Forward wake to host (SSH)") + } + + if self.enabled { + VStack(alignment: .leading, spacing: 8) { + HStack(spacing: 10) { + Text("SSH") + .font(.callout.weight(.semibold)) + .frame(width: 40, alignment: .leading) + TextField("steipete@peters-mac-studio-1", text: self.$target) + .textFieldStyle(.roundedBorder) + .frame(maxWidth: .infinity) + .onChange(of: self.target) { _, _ in + self.onChange() + } + self.statusIcon + .frame(width: 16, height: 16, alignment: .center) + Button("Test") { self.onTest() } + .disabled(self.target.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) + } + + if case let .failed(message) = self.status { + Text(message) + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(5) + } + + DisclosureGroup(isExpanded: self.$showAdvanced) { + VStack(alignment: .leading, spacing: 10) { + LabeledContent("Identity file") { + TextField( + "/Users/you/.ssh/voicewake_ed25519", + text: self.$identity) + .textFieldStyle(.roundedBorder) + .frame(width: 320) + .onChange(of: self.identity) { _, _ in + self.onChange() + } + } + + VStack(alignment: .leading, spacing: 4) { + Text("Remote command template") + .font(.callout.weight(.semibold)) + TextField( + "clawdis-mac agent --message \"${text}\" --thinking low", + text: self.$command, + axis: .vertical) + .textFieldStyle(.roundedBorder) + .onChange(of: self.command) { _, _ in + self.onChange() + } + Text( + "${text} is replaced with the transcript." + + "\nIt is also piped to stdin if you prefer $(cat).") + .font(.footnote) + .foregroundStyle(.secondary) + .fixedSize(horizontal: false, vertical: true) + } + } + .padding(.top, 4) + } label: { + Text("Advanced") + .font(.callout.weight(.semibold)) + } + } + .transition(.opacity.combined(with: .move(edge: .top))) + } + } + } + + private var statusIcon: some View { + Group { + switch self.status { + case .idle: + Image(systemName: "circle.dashed").foregroundStyle(.secondary) + case .checking: + ProgressView().controlSize(.mini) + case .ok: + Image(systemName: "checkmark.circle.fill").foregroundStyle(.green) + case .failed: + Image(systemName: "exclamationmark.triangle.fill").foregroundStyle(.yellow) + } + } + } +} diff --git a/apps/macos/Sources/Clawdis/VoiceWakeSettings.swift b/apps/macos/Sources/Clawdis/VoiceWakeSettings.swift index 0bf110b53..8cec87424 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeSettings.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeSettings.swift @@ -1,296 +1,7 @@ import AVFoundation -import OSLog import Speech import SwiftUI -enum VoiceWakeTestState: Equatable { - case idle - case requesting - case listening - case hearing(String) - case detected(String) - case failed(String) -} - -private enum ForwardStatus: Equatable { - case idle - case checking - case ok - case failed(String) -} - -private struct AudioInputDevice: Identifiable, Equatable { - let uid: String - let name: String - var id: String { self.uid } -} - -actor MicLevelMonitor { - private let engine = AVAudioEngine() - private var update: (@Sendable (Double) -> Void)? - private var running = false - private var smoothedLevel: Double = 0 - - func start(onLevel: @Sendable @escaping (Double) -> Void) async throws { - self.update = onLevel - if self.running { return } - let input = self.engine.inputNode - let format = input.outputFormat(forBus: 0) - input.removeTap(onBus: 0) - input.installTap(onBus: 0, bufferSize: 512, format: format) { [weak self] buffer, _ in - guard let self else { return } - let level = Self.normalizedLevel(from: buffer) - Task { await self.push(level: level) } - } - self.engine.prepare() - try self.engine.start() - self.running = true - } - - func stop() { - guard self.running else { return } - self.engine.inputNode.removeTap(onBus: 0) - self.engine.stop() - self.running = false - } - - private func push(level: Double) { - self.smoothedLevel = (self.smoothedLevel * 0.45) + (level * 0.55) - guard let update else { return } - let value = self.smoothedLevel - Task { @MainActor in update(value) } - } - - private static func normalizedLevel(from buffer: AVAudioPCMBuffer) -> Double { - guard let channel = buffer.floatChannelData?[0] else { return 0 } - let frameCount = Int(buffer.frameLength) - guard frameCount > 0 else { return 0 } - var sum: Float = 0 - for i in 0.. Void) async throws - { - guard self.recognitionTask == nil else { return } - self.isStopping = false - let chosenLocale = localeID.flatMap { Locale(identifier: $0) } ?? Locale.current - let recognizer = SFSpeechRecognizer(locale: chosenLocale) - guard let recognizer, recognizer.isAvailable else { - throw NSError( - domain: "VoiceWakeTester", - code: 1, - userInfo: [NSLocalizedDescriptionKey: "Speech recognition unavailable"]) - } - - guard Self.hasPrivacyStrings else { - throw NSError( - domain: "VoiceWakeTester", - code: 3, - userInfo: [ - NSLocalizedDescriptionKey: """ - Missing mic/speech privacy strings. Rebuild the mac app (scripts/restart-mac.sh) \ - to include usage descriptions. - """, - ]) - } - - let granted = try await Self.ensurePermissions() - guard granted else { - throw NSError( - domain: "VoiceWakeTester", - code: 2, - userInfo: [NSLocalizedDescriptionKey: "Microphone or speech permission denied"]) - } - - self.configureSession(preferredMicID: micID) - - self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest() - self.recognitionRequest?.shouldReportPartialResults = true - let request = self.recognitionRequest - - let inputNode = self.audioEngine.inputNode - let format = inputNode.outputFormat(forBus: 0) - inputNode.removeTap(onBus: 0) - inputNode.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request] buffer, _ in - request?.append(buffer) - } - - self.audioEngine.prepare() - try self.audioEngine.start() - DispatchQueue.main.async { - onUpdate(.listening) - } - - self.detectionStart = Date() - self.lastHeard = self.detectionStart - - guard let request = recognitionRequest else { return } - - self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self] result, error in - guard let self, !self.isStopping else { return } - let text = result?.bestTranscription.formattedString ?? "" - let matched = Self.matches(text: text, triggers: triggers) - let isFinal = result?.isFinal ?? false - let errorMessage = error?.localizedDescription - - Task { [weak self] in - guard let self, !self.isStopping else { return } - await self.handleResult( - matched: matched, - text: text, - isFinal: isFinal, - errorMessage: errorMessage, - onUpdate: onUpdate) - } - } - } - - func stop() { - self.isStopping = true - self.audioEngine.stop() - self.recognitionRequest?.endAudio() - self.recognitionTask?.cancel() - self.recognitionTask = nil - self.recognitionRequest = nil - self.audioEngine.inputNode.removeTap(onBus: 0) - } - - private func handleResult( - matched: Bool, - text: String, - isFinal: Bool, - errorMessage: String?, - onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void) async - { - if !text.isEmpty { - self.lastHeard = Date() - } - if matched, !text.isEmpty { - self.holdingAfterDetect = true - self.detectedText = text - self.logger.info("voice wake detected; forwarding (len=\(text.count))") - await MainActor.run { AppStateStore.shared.triggerVoiceEars() } - let config = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig } - Task.detached { - await VoiceWakeForwarder.forward(transcript: text, config: config) - } - Task { @MainActor in onUpdate(.detected(text)) } - self.holdUntilSilence(onUpdate: onUpdate) - return - } - if let errorMessage { - self.stop() - Task { @MainActor in onUpdate(.failed(errorMessage)) } - return - } - if isFinal { - self.stop() - let state: VoiceWakeTestState = text.isEmpty - ? .failed("No speech detected") - : .failed("No trigger heard: “\(text)”") - Task { @MainActor in onUpdate(state) } - } else { - let state: VoiceWakeTestState = text.isEmpty ? .listening : .hearing(text) - Task { @MainActor in onUpdate(state) } - } - } - - private func holdUntilSilence(onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void) { - Task { [weak self] in - guard let self else { return } - let start = self.detectionStart ?? Date() - let deadline = start.addingTimeInterval(10) - while !self.isStopping { - let now = Date() - if now >= deadline { break } - if let last = self.lastHeard, now.timeIntervalSince(last) >= 1 { - break - } - try? await Task.sleep(nanoseconds: 250_000_000) - } - if !self.isStopping { - self.stop() - if let detectedText { - self.logger.info("voice wake hold finished; len=\(detectedText.count)") - Task { @MainActor in onUpdate(.detected(detectedText)) } - } - } - } - } - - private func configureSession(preferredMicID: String?) { - _ = preferredMicID - } - - private static func matches(text: String, triggers: [String]) -> Bool { - let lowered = text.lowercased() - return triggers.contains { lowered.contains($0.lowercased()) } - } - - private nonisolated static func ensurePermissions() async throws -> Bool { - let speechStatus = SFSpeechRecognizer.authorizationStatus() - if speechStatus == .notDetermined { - let granted = await withCheckedContinuation { continuation in - SFSpeechRecognizer.requestAuthorization { status in - continuation.resume(returning: status == .authorized) - } - } - guard granted else { return false } - } else if speechStatus != .authorized { - return false - } - - let micStatus = AVCaptureDevice.authorizationStatus(for: .audio) - switch micStatus { - case .authorized: return true - - case .notDetermined: - return await withCheckedContinuation { continuation in - AVCaptureDevice.requestAccess(for: .audio) { granted in - continuation.resume(returning: granted) - } - } - - default: - return false - } - } - - private static var hasPrivacyStrings: Bool { - let speech = Bundle.main.object(forInfoDictionaryKey: "NSSpeechRecognitionUsageDescription") as? String - let mic = Bundle.main.object(forInfoDictionaryKey: "NSMicrophoneUsageDescription") as? String - return speech?.isEmpty == false && mic?.isEmpty == false - } -} - struct VoiceWakeSettings: View { @ObservedObject var state: AppState @State private var testState: VoiceWakeTestState = .idle @@ -303,10 +14,21 @@ struct VoiceWakeSettings: View { private let meter = MicLevelMonitor() @State private var availableLocales: [Locale] = [] @State private var showForwardAdvanced = false - @State private var forwardStatus: ForwardStatus = .idle + @State private var forwardStatus: VoiceWakeForwardStatus = .idle private let fieldLabelWidth: CGFloat = 120 private let controlWidth: CGFloat = 240 + private struct AudioInputDevice: Identifiable, Equatable { + let uid: String + let name: String + var id: String { self.uid } + } + + private struct IndexedWord: Identifiable { + let id: Int + let value: String + } + private var voiceWakeBinding: Binding { Binding( get: { self.state.swabbleEnabled }, @@ -315,11 +37,6 @@ struct VoiceWakeSettings: View { }) } - private struct IndexedWord: Identifiable { - let id: Int - let value: String - } - var body: some View { ScrollView(.vertical) { VStack(alignment: .leading, spacing: 14) { @@ -343,55 +60,22 @@ struct VoiceWakeSettings: View { self.micPicker self.levelMeter - self.forwardSection + VoiceWakeForwardSection( + enabled: self.$state.voiceWakeForwardEnabled, + target: self.$state.voiceWakeForwardTarget, + identity: self.$state.voiceWakeForwardIdentity, + command: self.$state.voiceWakeForwardCommand, + showAdvanced: self.$showForwardAdvanced, + status: self.$forwardStatus, + onTest: { Task { await self.checkForwardConnection() } }, + onChange: self.forwardConfigChanged) - self.testCard + VoiceWakeTestCard( + testState: self.$testState, + isTesting: self.$isTesting, + onToggle: self.toggleTest) - VStack(alignment: .leading, spacing: 8) { - HStack { - Text("Trigger words") - .font(.callout.weight(.semibold)) - Spacer() - Button { - self.addWord() - } label: { - Label("Add word", systemImage: "plus") - } - .disabled(self.state.swabbleTriggerWords - .contains(where: { $0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty })) - - Button("Reset defaults") { self.state.swabbleTriggerWords = defaultVoiceWakeTriggers } - } - - Table(self.indexedWords) { - TableColumn("Word") { row in - TextField("Wake word", text: self.binding(for: row.id)) - .textFieldStyle(.roundedBorder) - } - TableColumn("") { row in - Button { - self.removeWord(at: row.id) - } label: { - Image(systemName: "trash") - } - .buttonStyle(.borderless) - .help("Remove trigger word") - } - .width(36) - } - .frame(minHeight: 180) - .clipShape(RoundedRectangle(cornerRadius: 6)) - .overlay( - RoundedRectangle(cornerRadius: 6) - .stroke(Color.secondary.opacity(0.25), lineWidth: 1)) - - Text( - "Clawdis reacts when any trigger appears in a transcription. " - + "Keep them short to avoid false positives.") - .font(.footnote) - .foregroundStyle(.secondary) - .fixedSize(horizontal: false, vertical: true) - } + self.triggerTable Spacer(minLength: 8) } @@ -413,85 +97,51 @@ struct VoiceWakeSettings: View { self.state.swabbleTriggerWords.enumerated().map { IndexedWord(id: $0.offset, value: $0.element) } } - private var testCard: some View { - VStack(alignment: .leading, spacing: 10) { + private var triggerTable: some View { + VStack(alignment: .leading, spacing: 8) { HStack { - Text("Test Voice Wake") + Text("Trigger words") .font(.callout.weight(.semibold)) Spacer() - Button(action: self.toggleTest) { - Label( - self.isTesting ? "Stop" : "Start test", - systemImage: self.isTesting ? "stop.circle.fill" : "play.circle") + Button { + self.addWord() + } label: { + Label("Add word", systemImage: "plus") } - .buttonStyle(.borderedProminent) - .tint(self.isTesting ? .red : .accentColor) + .disabled(self.state.swabbleTriggerWords + .contains(where: { $0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty })) + + Button("Reset defaults") { self.state.swabbleTriggerWords = defaultVoiceWakeTriggers } } - HStack(spacing: 8) { - self.statusIcon - VStack(alignment: .leading, spacing: 4) { - Text(self.statusText) - .font(.subheadline) - .frame(maxHeight: 22, alignment: .center) - if case let .detected(text) = testState { - Text("Heard: \(text)") - .font(.footnote) - .foregroundStyle(.secondary) - .lineLimit(2) + Table(self.indexedWords) { + TableColumn("Word") { row in + TextField("Wake word", text: self.binding(for: row.id)) + .textFieldStyle(.roundedBorder) + } + TableColumn("") { row in + Button { + self.removeWord(at: row.id) + } label: { + Image(systemName: "trash") } + .buttonStyle(.borderless) + .help("Remove trigger word") } - Spacer() + .width(36) } - .padding(10) - .background(.quaternary.opacity(0.2)) - .clipShape(RoundedRectangle(cornerRadius: 8)) - .frame(minHeight: 54) - } - .padding(.vertical, 2) - } + .frame(minHeight: 180) + .clipShape(RoundedRectangle(cornerRadius: 6)) + .overlay( + RoundedRectangle(cornerRadius: 6) + .stroke(Color.secondary.opacity(0.25), lineWidth: 1)) - private var statusIcon: some View { - switch self.testState { - case .idle: - AnyView(Image(systemName: "waveform").foregroundStyle(.secondary)) - - case .requesting: - AnyView(ProgressView().controlSize(.small)) - - case .listening, .hearing: - AnyView( - Image(systemName: "ear.and.waveform") - .symbolEffect(.pulse) - .foregroundStyle(Color.accentColor)) - - case .detected: - AnyView(Image(systemName: "checkmark.circle.fill").foregroundStyle(.green)) - - case .failed: - AnyView(Image(systemName: "exclamationmark.triangle.fill").foregroundStyle(.yellow)) - } - } - - private var statusText: String { - switch self.testState { - case .idle: - "Press start, say a trigger word, and wait for detection." - - case .requesting: - "Requesting mic & speech permission…" - - case .listening: - "Listening… say your trigger word." - - case let .hearing(text): - "Heard: \(text)" - - case .detected: - "Voice wake detected!" - - case let .failed(reason): - reason + Text( + "Clawdis reacts when any trigger appears in a transcription. " + + "Keep them short to avoid false positives.") + .font(.footnote) + .foregroundStyle(.secondary) + .fixedSize(horizontal: false, vertical: true) } } @@ -629,7 +279,7 @@ struct VoiceWakeSettings: View { } } .labelsHidden() - .frame(width: 220) + .frame(width: 220) Button { guard self.state.voiceWakeAdditionalLocaleIDs.indices.contains(idx) else { return } @@ -746,99 +396,6 @@ struct VoiceWakeSettings: View { } } - private var forwardSection: some View { - VStack(alignment: .leading, spacing: 8) { - Toggle(isOn: self.$state.voiceWakeForwardEnabled) { - Text("Forward wake to host (SSH)") - } - if self.state.voiceWakeForwardEnabled { - VStack(alignment: .leading, spacing: 8) { - HStack(spacing: 10) { - Text("SSH") - .font(.callout.weight(.semibold)) - .frame(width: 40, alignment: .leading) - TextField("steipete@peters-mac-studio-1", text: self.$state.voiceWakeForwardTarget) - .textFieldStyle(.roundedBorder) - .frame(maxWidth: .infinity) - .onChange(of: self.state.voiceWakeForwardTarget) { _, _ in - self.forwardStatus = .idle - VoiceWakeForwarder.clearCliCache() - } - self.forwardStatusIcon - .frame(width: 16, height: 16, alignment: .center) - Button("Test") { - Task { await self.checkForwardConnection() } - } - .disabled( - self.state.voiceWakeForwardTarget - .trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) - } - - if case let .failed(message) = self.forwardStatus { - Text(message) - .font(.caption) - .foregroundStyle(.secondary) - .lineLimit(5) - } - - DisclosureGroup(isExpanded: self.$showForwardAdvanced) { - VStack(alignment: .leading, spacing: 10) { - LabeledContent("Identity file") { - TextField( - "/Users/you/.ssh/voicewake_ed25519", - text: self.$state.voiceWakeForwardIdentity) - .textFieldStyle(.roundedBorder) - .frame(width: 320) - .onChange(of: self.state.voiceWakeForwardIdentity) { _, _ in - self.forwardStatus = .idle - } - } - - VStack(alignment: .leading, spacing: 4) { - Text("Remote command template") - .font(.callout.weight(.semibold)) - TextField( - "clawdis-mac agent --message \"${text}\" --thinking low", - text: self.$state.voiceWakeForwardCommand, - axis: .vertical) - .textFieldStyle(.roundedBorder) - .onChange(of: self.state.voiceWakeForwardCommand) { _, _ in - self.forwardStatus = .idle - } - Text( - "${text} is replaced with the transcript." - + "\nIt is also piped to stdin if you prefer $(cat).") - .font(.footnote) - .foregroundStyle(.secondary) - .fixedSize(horizontal: false, vertical: true) - } - } - .padding(.top, 4) - } label: { - Text("Advanced") - .font(.callout.weight(.semibold)) - } - } - .transition(.opacity.combined(with: .move(edge: .top))) - } - } - } - - private var forwardStatusIcon: some View { - Group { - switch self.forwardStatus { - case .idle: - Image(systemName: "circle.dashed").foregroundStyle(.secondary) - case .checking: - ProgressView().controlSize(.mini) - case .ok: - Image(systemName: "checkmark.circle.fill").foregroundStyle(.green) - case .failed: - Image(systemName: "exclamationmark.triangle.fill").foregroundStyle(.yellow) - } - } - } - private var levelLabel: String { let db = (meterLevel * 50) - 50 return String(format: "%.0f dB", db) @@ -859,6 +416,11 @@ struct VoiceWakeSettings: View { } } + private func forwardConfigChanged() { + self.forwardStatus = .idle + VoiceWakeForwarder.clearCliCache() + } + @MainActor private func restartMeter() async { self.meterError = nil @@ -875,32 +437,3 @@ struct VoiceWakeSettings: View { } } } - -struct MicLevelBar: View { - let level: Double - let segments: Int = 12 - - var body: some View { - HStack(spacing: 3) { - ForEach(0.. Double(idx) - RoundedRectangle(cornerRadius: 2) - .fill(fill ? self.segmentColor(for: idx) : Color.gray.opacity(0.35)) - .frame(width: 14, height: 10) - } - } - .padding(4) - .background( - RoundedRectangle(cornerRadius: 6) - .stroke(Color.gray.opacity(0.25), lineWidth: 1)) - } - - private func segmentColor(for idx: Int) -> Color { - let fraction = Double(idx + 1) / Double(self.segments) - if fraction < 0.65 { return .green } - if fraction < 0.85 { return .yellow } - return .red - } -} - -extension VoiceWakeTester: @unchecked Sendable {} diff --git a/apps/macos/Sources/Clawdis/VoiceWakeTestCard.swift b/apps/macos/Sources/Clawdis/VoiceWakeTestCard.swift new file mode 100644 index 000000000..e13018fd0 --- /dev/null +++ b/apps/macos/Sources/Clawdis/VoiceWakeTestCard.swift @@ -0,0 +1,89 @@ +import SwiftUI + +struct VoiceWakeTestCard: View { + @Binding var testState: VoiceWakeTestState + @Binding var isTesting: Bool + let onToggle: () -> Void + + var body: some View { + VStack(alignment: .leading, spacing: 10) { + HStack { + Text("Test Voice Wake") + .font(.callout.weight(.semibold)) + Spacer() + Button(action: self.onToggle) { + Label( + self.isTesting ? "Stop" : "Start test", + systemImage: self.isTesting ? "stop.circle.fill" : "play.circle") + } + .buttonStyle(.borderedProminent) + .tint(self.isTesting ? .red : .accentColor) + } + + HStack(spacing: 8) { + self.statusIcon + VStack(alignment: .leading, spacing: 4) { + Text(self.statusText) + .font(.subheadline) + .frame(maxHeight: 22, alignment: .center) + if case let .detected(text) = testState { + Text("Heard: \(text)") + .font(.footnote) + .foregroundStyle(.secondary) + .lineLimit(2) + } + } + Spacer() + } + .padding(10) + .background(.quaternary.opacity(0.2)) + .clipShape(RoundedRectangle(cornerRadius: 8)) + .frame(minHeight: 54) + } + .padding(.vertical, 2) + } + + private var statusIcon: some View { + switch self.testState { + case .idle: + AnyView(Image(systemName: "waveform").foregroundStyle(.secondary)) + + case .requesting: + AnyView(ProgressView().controlSize(.small)) + + case .listening, .hearing: + AnyView( + Image(systemName: "ear.and.waveform") + .symbolEffect(.pulse) + .foregroundStyle(Color.accentColor)) + + case .detected: + AnyView(Image(systemName: "checkmark.circle.fill").foregroundStyle(.green)) + + case .failed: + AnyView(Image(systemName: "exclamationmark.triangle.fill").foregroundStyle(.yellow)) + } + } + + private var statusText: String { + switch self.testState { + case .idle: + "Press start, say a trigger word, and wait for detection." + + case .requesting: + "Requesting mic & speech permission…" + + case .listening: + "Listening… say your trigger word." + + case let .hearing(text): + "Heard: \(text)" + + case .detected: + "Voice wake detected!" + + case let .failed(reason): + reason + } + } +} diff --git a/apps/macos/Sources/Clawdis/VoiceWakeTester.swift b/apps/macos/Sources/Clawdis/VoiceWakeTester.swift new file mode 100644 index 000000000..11cdbd9f7 --- /dev/null +++ b/apps/macos/Sources/Clawdis/VoiceWakeTester.swift @@ -0,0 +1,229 @@ +import AVFoundation +import Foundation +import OSLog +import Speech + +enum VoiceWakeTestState: Equatable { + case idle + case requesting + case listening + case hearing(String) + case detected(String) + case failed(String) +} + +final class VoiceWakeTester { + private let recognizer: SFSpeechRecognizer? + private let audioEngine = AVAudioEngine() + private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? + private var recognitionTask: SFSpeechRecognitionTask? + private var isStopping = false + private var detectionStart: Date? + private var lastHeard: Date? + private var holdingAfterDetect = false + private var detectedText: String? + private let logger = Logger(subsystem: "com.steipete.clawdis", category: "voicewake") + + init(locale: Locale = .current) { + self.recognizer = SFSpeechRecognizer(locale: locale) + } + + func start( + triggers: [String], + micID: String?, + localeID: String?, + onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void) async throws + { + guard self.recognitionTask == nil else { return } + self.isStopping = false + let chosenLocale = localeID.flatMap { Locale(identifier: $0) } ?? Locale.current + let recognizer = SFSpeechRecognizer(locale: chosenLocale) + guard let recognizer, recognizer.isAvailable else { + throw NSError( + domain: "VoiceWakeTester", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "Speech recognition unavailable"]) + } + + guard Self.hasPrivacyStrings else { + throw NSError( + domain: "VoiceWakeTester", + code: 3, + userInfo: [ + NSLocalizedDescriptionKey: """ + Missing mic/speech privacy strings. Rebuild the mac app (scripts/restart-mac.sh) \ + to include usage descriptions. + """, + ]) + } + + let granted = try await Self.ensurePermissions() + guard granted else { + throw NSError( + domain: "VoiceWakeTester", + code: 2, + userInfo: [NSLocalizedDescriptionKey: "Microphone or speech permission denied"]) + } + + self.configureSession(preferredMicID: micID) + + self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest() + self.recognitionRequest?.shouldReportPartialResults = true + let request = self.recognitionRequest + + let inputNode = self.audioEngine.inputNode + let format = inputNode.outputFormat(forBus: 0) + inputNode.removeTap(onBus: 0) + inputNode.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request] buffer, _ in + request?.append(buffer) + } + + self.audioEngine.prepare() + try self.audioEngine.start() + DispatchQueue.main.async { + onUpdate(.listening) + } + + self.detectionStart = Date() + self.lastHeard = self.detectionStart + + guard let request = recognitionRequest else { return } + + self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self] result, error in + guard let self, !self.isStopping else { return } + let text = result?.bestTranscription.formattedString ?? "" + let matched = Self.matches(text: text, triggers: triggers) + let isFinal = result?.isFinal ?? false + let errorMessage = error?.localizedDescription + + Task { [weak self] in + guard let self, !self.isStopping else { return } + await self.handleResult( + matched: matched, + text: text, + isFinal: isFinal, + errorMessage: errorMessage, + onUpdate: onUpdate) + } + } + } + + func stop() { + self.isStopping = true + self.audioEngine.stop() + self.recognitionRequest?.endAudio() + self.recognitionTask?.cancel() + self.recognitionTask = nil + self.recognitionRequest = nil + self.audioEngine.inputNode.removeTap(onBus: 0) + } + + private func handleResult( + matched: Bool, + text: String, + isFinal: Bool, + errorMessage: String?, + onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void) async + { + if !text.isEmpty { + self.lastHeard = Date() + } + if matched, !text.isEmpty { + self.holdingAfterDetect = true + self.detectedText = text + self.logger.info("voice wake detected; forwarding (len=\(text.count))") + await MainActor.run { AppStateStore.shared.triggerVoiceEars() } + let config = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig } + Task.detached { + await VoiceWakeForwarder.forward(transcript: text, config: config) + } + Task { @MainActor in onUpdate(.detected(text)) } + self.holdUntilSilence(onUpdate: onUpdate) + return + } + if let errorMessage { + self.stop() + Task { @MainActor in onUpdate(.failed(errorMessage)) } + return + } + if isFinal { + self.stop() + let state: VoiceWakeTestState = text.isEmpty + ? .failed("No speech detected") + : .failed("No trigger heard: “\(text)”") + Task { @MainActor in onUpdate(state) } + } else { + let state: VoiceWakeTestState = text.isEmpty ? .listening : .hearing(text) + Task { @MainActor in onUpdate(state) } + } + } + + private func holdUntilSilence(onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void) { + Task { [weak self] in + guard let self else { return } + let start = self.detectionStart ?? Date() + let deadline = start.addingTimeInterval(10) + while !self.isStopping { + let now = Date() + if now >= deadline { break } + if let last = self.lastHeard, now.timeIntervalSince(last) >= 1 { + break + } + try? await Task.sleep(nanoseconds: 250_000_000) + } + if !self.isStopping { + self.stop() + if let detectedText { + self.logger.info("voice wake hold finished; len=\(detectedText.count)") + Task { @MainActor in onUpdate(.detected(detectedText)) } + } + } + } + } + + private func configureSession(preferredMicID: String?) { + _ = preferredMicID + } + + private static func matches(text: String, triggers: [String]) -> Bool { + let lowered = text.lowercased() + return triggers.contains { lowered.contains($0.lowercased()) } + } + + private nonisolated static func ensurePermissions() async throws -> Bool { + let speechStatus = SFSpeechRecognizer.authorizationStatus() + if speechStatus == .notDetermined { + let granted = await withCheckedContinuation { continuation in + SFSpeechRecognizer.requestAuthorization { status in + continuation.resume(returning: status == .authorized) + } + } + guard granted else { return false } + } else if speechStatus != .authorized { + return false + } + + let micStatus = AVCaptureDevice.authorizationStatus(for: .audio) + switch micStatus { + case .authorized: return true + + case .notDetermined: + return await withCheckedContinuation { continuation in + AVCaptureDevice.requestAccess(for: .audio) { granted in + continuation.resume(returning: granted) + } + } + + default: + return false + } + } + + private static var hasPrivacyStrings: Bool { + let speech = Bundle.main.object(forInfoDictionaryKey: "NSSpeechRecognitionUsageDescription") as? String + let mic = Bundle.main.object(forInfoDictionaryKey: "NSMicrophoneUsageDescription") as? String + return speech?.isEmpty == false && mic?.isEmpty == false + } +} + +extension VoiceWakeTester: @unchecked Sendable {}