Mac: format voice wake files

This commit is contained in:
Xaden Ryan
2026-01-07 18:35:52 -07:00
committed by Peter Steinberger
parent da972b119d
commit a15943c809
4 changed files with 41 additions and 41 deletions

View File

@@ -526,7 +526,7 @@ struct MenuContent: View {
deviceTypes: [.external, .microphone], deviceTypes: [.external, .microphone],
mediaType: .audio, mediaType: .audio,
position: .unspecified) position: .unspecified)
let connectedDevices = discovery.devices.filter { $0.isConnected } let connectedDevices = discovery.devices.filter(\.isConnected)
self.availableMics = connectedDevices self.availableMics = connectedDevices
.sorted { lhs, rhs in .sorted { lhs, rhs in
lhs.localizedName.localizedCaseInsensitiveCompare(rhs.localizedName) == .orderedAscending lhs.localizedName.localizedCaseInsensitiveCompare(rhs.localizedName) == .orderedAscending

View File

@@ -212,7 +212,7 @@ actor VoiceWakeRuntime {
let preferred = config.micID?.isEmpty == false ? config.micID! : "system-default" let preferred = config.micID?.isEmpty == false ? config.micID! : "system-default"
self.logger.info( self.logger.info(
"voicewake runtime input preferred=\(preferred, privacy: .public) " + "voicewake runtime input preferred=\(preferred, privacy: .public) " +
"\(AudioInputDeviceObserver.defaultInputDeviceSummary(), privacy: .public)") "\(AudioInputDeviceObserver.defaultInputDeviceSummary(), privacy: .public)")
self.logger.info("voicewake runtime started") self.logger.info("voicewake runtime started")
DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "started", fields: [ DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "started", fields: [
"locale": config.localeID ?? "", "locale": config.localeID ?? "",
@@ -377,8 +377,8 @@ actor VoiceWakeRuntime {
isFinal: Bool, isFinal: Bool,
match: WakeWordGateMatch?, match: WakeWordGateMatch?,
usedFallback: Bool, usedFallback: Bool,
capturing: Bool capturing: Bool)
) { {
guard !transcript.isEmpty else { return } guard !transcript.isEmpty else { return }
if transcript == self.lastLoggedText, !isFinal { if transcript == self.lastLoggedText, !isFinal {
if let last = self.lastLoggedAt, Date().timeIntervalSince(last) < 0.25 { if let last = self.lastLoggedAt, Date().timeIntervalSince(last) < 0.25 {
@@ -389,7 +389,7 @@ actor VoiceWakeRuntime {
self.lastLoggedAt = Date() self.lastLoggedAt = Date()
let textOnly = WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) let textOnly = WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers)
let timingCount = segments.filter { $0.start > 0 || $0.duration > 0 }.count let timingCount = segments.count(where: { $0.start > 0 || $0.duration > 0 })
let matchSummary = match.map { let matchSummary = match.map {
"match=true gap=\(String(format: "%.2f", $0.postGap))s cmdLen=\($0.command.count)" "match=true gap=\(String(format: "%.2f", $0.postGap))s cmdLen=\($0.command.count)"
} ?? "match=false" } ?? "match=false"
@@ -401,9 +401,9 @@ actor VoiceWakeRuntime {
self.logger.info( self.logger.info(
"voicewake runtime transcript='\(transcript, privacy: .public)' textOnly=\(textOnly) " + "voicewake runtime transcript='\(transcript, privacy: .public)' textOnly=\(textOnly) " +
"isFinal=\(isFinal) timing=\(timingCount)/\(segments.count) " + "isFinal=\(isFinal) timing=\(timingCount)/\(segments.count) " +
"capturing=\(capturing) fallback=\(usedFallback) " + "capturing=\(capturing) fallback=\(usedFallback) " +
"\(matchSummary) segments=[\(segmentSummary, privacy: .public)]") "\(matchSummary) segments=[\(segmentSummary, privacy: .public)]")
} }
private func noteAudioTap(rms: Double) { private func noteAudioTap(rms: Double) {
@@ -471,8 +471,8 @@ actor VoiceWakeRuntime {
lastSeenAt: Date?, lastSeenAt: Date?,
lastText: String?, lastText: String?,
triggers: [String], triggers: [String],
config: RuntimeConfig config: RuntimeConfig) async
) async { {
guard !Task.isCancelled else { return } guard !Task.isCancelled else { return }
guard !self.isCapturing else { return } guard !self.isCapturing else { return }
guard let lastSeenAt, let lastText else { return } guard let lastSeenAt, let lastText else { return }
@@ -488,8 +488,8 @@ actor VoiceWakeRuntime {
private func textOnlyFallbackMatch( private func textOnlyFallbackMatch(
transcript: String, transcript: String,
triggers: [String], triggers: [String],
config: WakeWordGateConfig config: WakeWordGateConfig) -> WakeWordGateMatch?
) -> WakeWordGateMatch? { {
guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return nil } guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return nil }
guard Self.startsWithTrigger(transcript: transcript, triggers: triggers) else { return nil } guard Self.startsWithTrigger(transcript: transcript, triggers: triggers) else { return nil }
let trimmed = Self.trimmedAfterTrigger(transcript, triggers: triggers) let trimmed = Self.trimmedAfterTrigger(transcript, triggers: triggers)
@@ -745,13 +745,13 @@ actor VoiceWakeRuntime {
private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool { private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool {
let tokens = transcript let tokens = transcript
.split(whereSeparator: { $0.isWhitespace }) .split(whereSeparator: { $0.isWhitespace })
.map { normalizeToken(String($0)) } .map { self.normalizeToken(String($0)) }
.filter { !$0.isEmpty } .filter { !$0.isEmpty }
guard !tokens.isEmpty else { return false } guard !tokens.isEmpty else { return false }
for trigger in triggers { for trigger in triggers {
let triggerTokens = trigger let triggerTokens = trigger
.split(whereSeparator: { $0.isWhitespace }) .split(whereSeparator: { $0.isWhitespace })
.map { normalizeToken(String($0)) } .map { self.normalizeToken(String($0)) }
.filter { !$0.isEmpty } .filter { !$0.isEmpty }
guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue } guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue }
if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) { if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) {
@@ -763,7 +763,7 @@ actor VoiceWakeRuntime {
private static func normalizeToken(_ token: String) -> String { private static func normalizeToken(_ token: String) -> String {
token token
.trimmingCharacters(in: Self.whitespaceAndPunctuation) .trimmingCharacters(in: self.whitespaceAndPunctuation)
.lowercased() .lowercased()
} }

View File

@@ -1,8 +1,8 @@
import AppKit import AppKit
import AVFoundation import AVFoundation
import Observation import Observation
import SwabbleKit
import Speech import Speech
import SwabbleKit
import SwiftUI import SwiftUI
import UniformTypeIdentifiers import UniformTypeIdentifiers
@@ -371,9 +371,9 @@ struct VoiceWakeSettings: View {
private static func textOnlyCommand(from transcript: String, triggers: [String]) -> String? { private static func textOnlyCommand(from transcript: String, triggers: [String]) -> String? {
guard !transcript.isEmpty else { return nil } guard !transcript.isEmpty else { return nil }
let normalized = normalizeToken(transcript) let normalized = self.normalizeToken(transcript)
guard !normalized.isEmpty else { return nil } guard !normalized.isEmpty else { return nil }
guard startsWithTrigger(transcript: transcript, triggers: triggers) else { return nil } guard self.startsWithTrigger(transcript: transcript, triggers: triggers) else { return nil }
guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return nil } guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return nil }
let trimmed = WakeWordGate.stripWake(text: transcript, triggers: triggers) let trimmed = WakeWordGate.stripWake(text: transcript, triggers: triggers)
return trimmed.isEmpty ? nil : trimmed return trimmed.isEmpty ? nil : trimmed
@@ -382,13 +382,13 @@ struct VoiceWakeSettings: View {
private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool { private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool {
let tokens = transcript let tokens = transcript
.split(whereSeparator: { $0.isWhitespace }) .split(whereSeparator: { $0.isWhitespace })
.map { normalizeToken(String($0)) } .map { self.normalizeToken(String($0)) }
.filter { !$0.isEmpty } .filter { !$0.isEmpty }
guard !tokens.isEmpty else { return false } guard !tokens.isEmpty else { return false }
for trigger in triggers { for trigger in triggers {
let triggerTokens = trigger let triggerTokens = trigger
.split(whereSeparator: { $0.isWhitespace }) .split(whereSeparator: { $0.isWhitespace })
.map { normalizeToken(String($0)) } .map { self.normalizeToken(String($0)) }
.filter { !$0.isEmpty } .filter { !$0.isEmpty }
guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue } guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue }
if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) { if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) {
@@ -400,7 +400,7 @@ struct VoiceWakeSettings: View {
private static func normalizeToken(_ token: String) -> String { private static func normalizeToken(_ token: String) -> String {
token token
.trimmingCharacters(in: Self.whitespaceAndPunctuation) .trimmingCharacters(in: self.whitespaceAndPunctuation)
.lowercased() .lowercased()
} }
@@ -528,14 +528,14 @@ struct VoiceWakeSettings: View {
@MainActor @MainActor
private func loadMicsIfNeeded(force: Bool = false) async { private func loadMicsIfNeeded(force: Bool = false) async {
guard (force || self.availableMics.isEmpty), !self.loadingMics else { return } guard force || self.availableMics.isEmpty, !self.loadingMics else { return }
self.loadingMics = true self.loadingMics = true
let discovery = AVCaptureDevice.DiscoverySession( let discovery = AVCaptureDevice.DiscoverySession(
deviceTypes: [.external, .microphone], deviceTypes: [.external, .microphone],
mediaType: .audio, mediaType: .audio,
position: .unspecified) position: .unspecified)
let aliveUIDs = AudioInputDeviceObserver.aliveInputDeviceUIDs() let aliveUIDs = AudioInputDeviceObserver.aliveInputDeviceUIDs()
let connectedDevices = discovery.devices.filter { $0.isConnected } let connectedDevices = discovery.devices.filter(\.isConnected)
let devices = aliveUIDs.isEmpty let devices = aliveUIDs.isEmpty
? connectedDevices ? connectedDevices
: connectedDevices.filter { aliveUIDs.contains($0.uniqueID) } : connectedDevices.filter { aliveUIDs.contains($0.uniqueID) }

View File

@@ -263,7 +263,8 @@ final class VoiceWakeTester {
segments: [WakeWordSegment], segments: [WakeWordSegment],
triggers: [String], triggers: [String],
match: WakeWordGateMatch?, match: WakeWordGateMatch?,
isFinal: Bool) { isFinal: Bool)
{
guard !transcript.isEmpty else { return } guard !transcript.isEmpty else { return }
if transcript == self.lastLoggedText, !isFinal { if transcript == self.lastLoggedText, !isFinal {
if let last = self.lastLoggedAt, Date().timeIntervalSince(last) < 0.25 { if let last = self.lastLoggedAt, Date().timeIntervalSince(last) < 0.25 {
@@ -276,15 +277,15 @@ final class VoiceWakeTester {
let textOnly = WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) let textOnly = WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers)
let gaps = Self.debugCandidateGaps(triggers: triggers, segments: segments) let gaps = Self.debugCandidateGaps(triggers: triggers, segments: segments)
let segmentSummary = Self.debugSegments(segments) let segmentSummary = Self.debugSegments(segments)
let timingCount = segments.filter { $0.start > 0 || $0.duration > 0 }.count let timingCount = segments.count(where: { $0.start > 0 || $0.duration > 0 })
let matchSummary = match.map { let matchSummary = match.map {
"match=true gap=\(String(format: "%.2f", $0.postGap))s cmdLen=\($0.command.count)" "match=true gap=\(String(format: "%.2f", $0.postGap))s cmdLen=\($0.command.count)"
} ?? "match=false" } ?? "match=false"
self.logger.info( self.logger.info(
"voicewake test transcript='\(transcript, privacy: .public)' textOnly=\(textOnly) " + "voicewake test transcript='\(transcript, privacy: .public)' textOnly=\(textOnly) " +
"isFinal=\(isFinal) timing=\(timingCount)/\(segments.count) " + "isFinal=\(isFinal) timing=\(timingCount)/\(segments.count) " +
"\(matchSummary) gaps=[\(gaps, privacy: .public)] segments=[\(segmentSummary, privacy: .public)]") "\(matchSummary) gaps=[\(gaps, privacy: .public)] segments=[\(segmentSummary, privacy: .public)]")
} }
private static func debugSegments(_ segments: [WakeWordSegment]) -> String { private static func debugSegments(_ segments: [WakeWordSegment]) -> String {
@@ -296,9 +297,9 @@ final class VoiceWakeTester {
} }
private static func debugCandidateGaps(triggers: [String], segments: [WakeWordSegment]) -> String { private static func debugCandidateGaps(triggers: [String], segments: [WakeWordSegment]) -> String {
let tokens = normalizeSegments(segments) let tokens = self.normalizeSegments(segments)
guard !tokens.isEmpty else { return "" } guard !tokens.isEmpty else { return "" }
let triggerTokens = normalizeTriggers(triggers) let triggerTokens = self.normalizeTriggers(triggers)
var gaps: [String] = [] var gaps: [String] = []
for trigger in triggerTokens { for trigger in triggerTokens {
@@ -332,7 +333,7 @@ final class VoiceWakeTester {
for trigger in triggers { for trigger in triggers {
let tokens = trigger let tokens = trigger
.split(whereSeparator: { $0.isWhitespace }) .split(whereSeparator: { $0.isWhitespace })
.map { normalizeToken(String($0)) } .map { self.normalizeToken(String($0)) }
.filter { !$0.isEmpty } .filter { !$0.isEmpty }
if tokens.isEmpty { continue } if tokens.isEmpty { continue }
output.append(DebugTriggerTokens(tokens: tokens)) output.append(DebugTriggerTokens(tokens: tokens))
@@ -342,7 +343,7 @@ final class VoiceWakeTester {
private static func normalizeSegments(_ segments: [WakeWordSegment]) -> [DebugToken] { private static func normalizeSegments(_ segments: [WakeWordSegment]) -> [DebugToken] {
segments.compactMap { segment in segments.compactMap { segment in
let normalized = normalizeToken(segment.text) let normalized = self.normalizeToken(segment.text)
guard !normalized.isEmpty else { return nil } guard !normalized.isEmpty else { return nil }
return DebugToken( return DebugToken(
normalized: normalized, normalized: normalized,
@@ -353,7 +354,7 @@ final class VoiceWakeTester {
private static func normalizeToken(_ token: String) -> String { private static func normalizeToken(_ token: String) -> String {
token token
.trimmingCharacters(in: Self.whitespaceAndPunctuation) .trimmingCharacters(in: self.whitespaceAndPunctuation)
.lowercased() .lowercased()
} }
@@ -363,8 +364,8 @@ final class VoiceWakeTester {
private func textOnlyFallbackMatch( private func textOnlyFallbackMatch(
transcript: String, transcript: String,
triggers: [String], triggers: [String],
config: WakeWordGateConfig config: WakeWordGateConfig) -> WakeWordGateMatch?
) -> WakeWordGateMatch? { {
guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return nil } guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return nil }
guard Self.startsWithTrigger(transcript: transcript, triggers: triggers) else { return nil } guard Self.startsWithTrigger(transcript: transcript, triggers: triggers) else { return nil }
let trimmed = WakeWordGate.stripWake(text: transcript, triggers: triggers) let trimmed = WakeWordGate.stripWake(text: transcript, triggers: triggers)
@@ -375,13 +376,13 @@ final class VoiceWakeTester {
private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool { private static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool {
let tokens = transcript let tokens = transcript
.split(whereSeparator: { $0.isWhitespace }) .split(whereSeparator: { $0.isWhitespace })
.map { normalizeToken(String($0)) } .map { self.normalizeToken(String($0)) }
.filter { !$0.isEmpty } .filter { !$0.isEmpty }
guard !tokens.isEmpty else { return false } guard !tokens.isEmpty else { return false }
for trigger in triggers { for trigger in triggers {
let triggerTokens = trigger let triggerTokens = trigger
.split(whereSeparator: { $0.isWhitespace }) .split(whereSeparator: { $0.isWhitespace })
.map { normalizeToken(String($0)) } .map { self.normalizeToken(String($0)) }
.filter { !$0.isEmpty } .filter { !$0.isEmpty }
guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue } guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue }
if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) { if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) {
@@ -418,8 +419,8 @@ final class VoiceWakeTester {
private func scheduleSilenceCheck( private func scheduleSilenceCheck(
triggers: [String], triggers: [String],
onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void onUpdate: @escaping @Sendable (VoiceWakeTestState) -> Void)
) { {
self.silenceTask?.cancel() self.silenceTask?.cancel()
let lastSeenAt = self.lastTranscriptAt let lastSeenAt = self.lastTranscriptAt
let lastText = self.lastTranscript let lastText = self.lastTranscript
@@ -433,8 +434,7 @@ final class VoiceWakeTester {
guard let match = self.textOnlyFallbackMatch( guard let match = self.textOnlyFallbackMatch(
transcript: lastText, transcript: lastText,
triggers: triggers, triggers: triggers,
config: WakeWordGateConfig(triggers: triggers) config: WakeWordGateConfig(triggers: triggers)) else { return }
) else { return }
self.holdingAfterDetect = true self.holdingAfterDetect = true
self.detectedText = match.command self.detectedText = match.command
self.logger.info("voice wake detected (test, silence) (len=\(match.command.count))") self.logger.info("voice wake detected (test, silence) (len=\(match.command.count))")
@@ -455,7 +455,7 @@ final class VoiceWakeTester {
let preferred = (preferredMicID?.isEmpty == false) ? preferredMicID! : "system-default" let preferred = (preferredMicID?.isEmpty == false) ? preferredMicID! : "system-default"
self.logger.info( self.logger.info(
"voicewake test input preferred=\(preferred, privacy: .public) " + "voicewake test input preferred=\(preferred, privacy: .public) " +
"\(AudioInputDeviceObserver.defaultInputDeviceSummary(), privacy: .public)") "\(AudioInputDeviceObserver.defaultInputDeviceSummary(), privacy: .public)")
} }
private nonisolated static func ensurePermissions() async throws -> Bool { private nonisolated static func ensurePermissions() async throws -> Bool {