feat: share wake gate via SwabbleKit

This commit is contained in:
Peter Steinberger
2025-12-23 01:30:40 +01:00
parent cf48d297dd
commit ef35868bef
2945 changed files with 27887 additions and 122 deletions

View File

@@ -2,11 +2,13 @@ import AVFoundation
import Foundation
import Speech
@available(macOS 26.0, iOS 26.0, *)
public struct SpeechSegment: Sendable {
public let text: String
public let isFinal: Bool
}
@available(macOS 26.0, iOS 26.0, *)
public enum SpeechPipelineError: Error {
case authorizationDenied
case analyzerFormatUnavailable
@@ -14,6 +16,7 @@ public enum SpeechPipelineError: Error {
}
/// Live microphone SpeechAnalyzer SpeechTranscriber pipeline.
@available(macOS 26.0, iOS 26.0, *)
public actor SpeechPipeline {
private struct UnsafeBuffer: @unchecked Sendable { let buffer: AVAudioPCMBuffer }

View File

@@ -0,0 +1,202 @@
import Foundation
public struct WakeWordSegment: Sendable, Equatable {
public let text: String
public let start: TimeInterval
public let duration: TimeInterval
public let range: Range<String.Index>?
public init(text: String, start: TimeInterval, duration: TimeInterval, range: Range<String.Index>? = nil) {
self.text = text
self.start = start
self.duration = duration
self.range = range
}
public var end: TimeInterval { start + duration }
}
public struct WakeWordGateConfig: Sendable, Equatable {
public var triggers: [String]
public var minPostTriggerGap: TimeInterval
public var minCommandLength: Int
public init(
triggers: [String],
minPostTriggerGap: TimeInterval = 0.45,
minCommandLength: Int = 1)
{
self.triggers = triggers
self.minPostTriggerGap = minPostTriggerGap
self.minCommandLength = minCommandLength
}
}
public struct WakeWordGateMatch: Sendable, Equatable {
public let triggerEndTime: TimeInterval
public let postGap: TimeInterval
public let command: String
public init(triggerEndTime: TimeInterval, postGap: TimeInterval, command: String) {
self.triggerEndTime = triggerEndTime
self.postGap = postGap
self.command = command
}
}
public enum WakeWordGate {
private struct Token {
let normalized: String
let start: TimeInterval
let end: TimeInterval
let range: Range<String.Index>?
let text: String
}
private struct TriggerTokens {
let tokens: [String]
}
public static func match(
transcript: String,
segments: [WakeWordSegment],
config: WakeWordGateConfig)
-> WakeWordGateMatch? {
let triggerTokens = normalizeTriggers(config.triggers)
guard !triggerTokens.isEmpty else { return nil }
let tokens = normalizeSegments(segments)
guard !tokens.isEmpty else { return nil }
var bestIndex: Int?
var bestTriggerEnd: TimeInterval = 0
var bestGap: TimeInterval = 0
for trigger in triggerTokens {
let count = trigger.tokens.count
guard count > 0, tokens.count > count else { continue }
for i in 0...(tokens.count - count - 1) {
var matched = true
for t in 0..<count {
if tokens[i + t].normalized != trigger.tokens[t] {
matched = false
break
}
}
if !matched { continue }
let triggerEnd = tokens[i + count - 1].end
let nextToken = tokens[i + count]
let gap = nextToken.start - triggerEnd
if gap < config.minPostTriggerGap { continue }
if let bestIndex, i <= bestIndex { continue }
bestIndex = i
bestTriggerEnd = triggerEnd
bestGap = gap
}
}
guard let bestIndex else { return nil }
let command = commandText(transcript: transcript, segments: segments, triggerEndTime: bestTriggerEnd)
.trimmingCharacters(in: Self.whitespaceAndPunctuation)
guard command.count >= config.minCommandLength else { return nil }
return WakeWordGateMatch(triggerEndTime: bestTriggerEnd, postGap: bestGap, command: command)
}
public static func commandText(
transcript: String,
segments: [WakeWordSegment],
triggerEndTime: TimeInterval)
-> String {
let threshold = triggerEndTime + 0.001
for segment in segments where segment.start >= threshold {
if normalizeToken(segment.text).isEmpty { continue }
if let range = segment.range {
let slice = transcript[range.lowerBound...]
return String(slice).trimmingCharacters(in: Self.whitespaceAndPunctuation)
}
break
}
let text = segments
.filter { $0.start >= threshold && !normalizeToken($0.text).isEmpty }
.map { $0.text }
.joined(separator: " ")
return text.trimmingCharacters(in: Self.whitespaceAndPunctuation)
}
public static func matchesTextOnly(text: String, triggers: [String]) -> Bool {
guard !text.isEmpty else { return false }
let normalized = text.lowercased()
for trigger in triggers {
let token = trigger.trimmingCharacters(in: whitespaceAndPunctuation).lowercased()
if token.isEmpty { continue }
if normalized.contains(token) { return true }
}
return false
}
public static func stripWake(text: String, triggers: [String]) -> String {
var out = text
for trigger in triggers {
let token = trigger.trimmingCharacters(in: whitespaceAndPunctuation)
guard !token.isEmpty else { continue }
out = out.replacingOccurrences(of: token, with: "", options: [.caseInsensitive])
}
return out.trimmingCharacters(in: whitespaceAndPunctuation)
}
private static func normalizeTriggers(_ triggers: [String]) -> [TriggerTokens] {
var output: [TriggerTokens] = []
for trigger in triggers {
let tokens = trigger
.split(whereSeparator: { $0.isWhitespace })
.map { normalizeToken(String($0)) }
.filter { !$0.isEmpty }
if tokens.isEmpty { continue }
output.append(TriggerTokens(tokens: tokens))
}
return output
}
private static func normalizeSegments(_ segments: [WakeWordSegment]) -> [Token] {
segments.compactMap { segment in
let normalized = normalizeToken(segment.text)
guard !normalized.isEmpty else { return nil }
return Token(
normalized: normalized,
start: segment.start,
end: segment.end,
range: segment.range,
text: segment.text)
}
}
private static func normalizeToken(_ token: String) -> String {
token
.trimmingCharacters(in: whitespaceAndPunctuation)
.lowercased()
}
private static let whitespaceAndPunctuation = CharacterSet.whitespacesAndNewlines
.union(.punctuationCharacters)
}
#if canImport(Speech)
import Speech
public enum WakeWordSpeechSegments {
public static func from(transcription: SFTranscription, transcript: String) -> [WakeWordSegment] {
transcription.segments.map { segment in
let range = Range(segment.substringRange, in: transcript)
return WakeWordSegment(
text: segment.substring,
start: segment.timestamp,
duration: segment.duration,
range: range)
}
}
}
#endif

View File

@@ -1,6 +1,7 @@
import Commander
import Foundation
@available(macOS 26.0, *)
@MainActor
enum CLIRegistry {
static var descriptors: [CommandDescriptor] {

View File

@@ -1,7 +1,9 @@
import Commander
import Foundation
import Swabble
import SwabbleKit
@available(macOS 26.0, *)
@MainActor
struct ServeCommand: ParsableCommand {
@Option(name: .long("config"), help: "Path to config JSON") var configPath: String?
@@ -68,17 +70,12 @@ struct ServeCommand: ParsableCommand {
}
private static func matchesWake(text: String, cfg: SwabbleConfig) -> Bool {
let lowered = text.lowercased()
if lowered.contains(cfg.wake.word.lowercased()) { return true }
return cfg.wake.aliases.contains(where: { lowered.contains($0.lowercased()) })
let triggers = [cfg.wake.word] + cfg.wake.aliases
return WakeWordGate.matchesTextOnly(text: text, triggers: triggers)
}
private static func stripWake(text: String, cfg: SwabbleConfig) -> String {
var out = text
out = out.replacingOccurrences(of: cfg.wake.word, with: "", options: [.caseInsensitive])
for alias in cfg.wake.aliases {
out = out.replacingOccurrences(of: alias, with: "", options: [.caseInsensitive])
}
return out.trimmingCharacters(in: .whitespacesAndNewlines)
let triggers = [cfg.wake.word] + cfg.wake.aliases
return WakeWordGate.stripWake(text: text, triggers: triggers)
}
}

View File

@@ -1,6 +1,7 @@
import Commander
import Foundation
@available(macOS 26.0, *)
@MainActor
private func runCLI() async -> Int32 {
do {
@@ -15,6 +16,7 @@ private func runCLI() async -> Int32 {
}
}
@available(macOS 26.0, *)
@MainActor
private func dispatch(invocation: CommandInvocation) async throws {
let parsed = invocation.parsedValues
@@ -95,5 +97,10 @@ private func dispatch(invocation: CommandInvocation) async throws {
}
}
let exitCode = await runCLI()
exit(exitCode)
if #available(macOS 26.0, *) {
let exitCode = await runCLI()
exit(exitCode)
} else {
fputs("error: swabble requires macOS 26 or newer\n", stderr)
exit(1)
}