chore: vendor swabble and add speech usage strings
This commit is contained in:
77
Swabble/Sources/SwabbleCore/Config/Config.swift
Normal file
77
Swabble/Sources/SwabbleCore/Config/Config.swift
Normal file
@@ -0,0 +1,77 @@
|
||||
import Foundation
|
||||
|
||||
public struct SwabbleConfig: Codable, Sendable {
|
||||
public struct Audio: Codable, Sendable {
|
||||
public var deviceName: String = ""
|
||||
public var deviceIndex: Int = -1
|
||||
public var sampleRate: Double = 16000
|
||||
public var channels: Int = 1
|
||||
}
|
||||
|
||||
public struct Wake: Codable, Sendable {
|
||||
public var enabled: Bool = true
|
||||
public var word: String = "clawd"
|
||||
public var aliases: [String] = ["claude"]
|
||||
}
|
||||
|
||||
public struct Hook: Codable, Sendable {
|
||||
public var command: String = ""
|
||||
public var args: [String] = []
|
||||
public var prefix: String = "Voice swabble from ${hostname}: "
|
||||
public var cooldownSeconds: Double = 1
|
||||
public var minCharacters: Int = 24
|
||||
public var timeoutSeconds: Double = 5
|
||||
public var env: [String: String] = [:]
|
||||
}
|
||||
|
||||
public struct Logging: Codable, Sendable {
|
||||
public var level: String = "info"
|
||||
public var format: String = "text" // text|json placeholder
|
||||
}
|
||||
|
||||
public struct Transcripts: Codable, Sendable {
|
||||
public var enabled: Bool = true
|
||||
public var maxEntries: Int = 50
|
||||
}
|
||||
|
||||
public struct Speech: Codable, Sendable {
|
||||
public var localeIdentifier: String = Locale.current.identifier
|
||||
public var etiquetteReplacements: Bool = false
|
||||
}
|
||||
|
||||
public var audio = Audio()
|
||||
public var wake = Wake()
|
||||
public var hook = Hook()
|
||||
public var logging = Logging()
|
||||
public var transcripts = Transcripts()
|
||||
public var speech = Speech()
|
||||
|
||||
public static let defaultPath = FileManager.default
|
||||
.homeDirectoryForCurrentUser
|
||||
.appendingPathComponent(".config/swabble/config.json")
|
||||
|
||||
public init() {}
|
||||
}
|
||||
|
||||
public enum ConfigError: Error {
|
||||
case missingConfig
|
||||
}
|
||||
|
||||
public enum ConfigLoader {
|
||||
public static func load(at path: URL?) throws -> SwabbleConfig {
|
||||
let url = path ?? SwabbleConfig.defaultPath
|
||||
if !FileManager.default.fileExists(atPath: url.path) {
|
||||
throw ConfigError.missingConfig
|
||||
}
|
||||
let data = try Data(contentsOf: url)
|
||||
return try JSONDecoder().decode(SwabbleConfig.self, from: data)
|
||||
}
|
||||
|
||||
public static func save(_ config: SwabbleConfig, at path: URL?) throws {
|
||||
let url = path ?? SwabbleConfig.defaultPath
|
||||
let dir = url.deletingLastPathComponent()
|
||||
try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
|
||||
let data = try JSONEncoder().encode(config)
|
||||
try data.write(to: url)
|
||||
}
|
||||
}
|
||||
75
Swabble/Sources/SwabbleCore/Hooks/HookRunner.swift
Normal file
75
Swabble/Sources/SwabbleCore/Hooks/HookRunner.swift
Normal file
@@ -0,0 +1,75 @@
|
||||
import Foundation
|
||||
|
||||
public struct HookJob: Sendable {
|
||||
public let text: String
|
||||
public let timestamp: Date
|
||||
|
||||
public init(text: String, timestamp: Date) {
|
||||
self.text = text
|
||||
self.timestamp = timestamp
|
||||
}
|
||||
}
|
||||
|
||||
public actor HookRunner {
|
||||
private let config: SwabbleConfig
|
||||
private var lastRun: Date?
|
||||
private let hostname: String
|
||||
|
||||
public init(config: SwabbleConfig) {
|
||||
self.config = config
|
||||
self.hostname = Host.current().localizedName ?? "host"
|
||||
}
|
||||
|
||||
public func shouldRun() -> Bool {
|
||||
guard self.config.hook.cooldownSeconds > 0 else { return true }
|
||||
if let lastRun, Date().timeIntervalSince(lastRun) < config.hook.cooldownSeconds {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
public func run(job: HookJob) async throws {
|
||||
guard self.shouldRun() else { return }
|
||||
guard !self.config.hook.command.isEmpty else { throw NSError(
|
||||
domain: "Hook",
|
||||
code: 1,
|
||||
userInfo: [NSLocalizedDescriptionKey: "hook command not set"]) }
|
||||
|
||||
let prefix = self.config.hook.prefix.replacingOccurrences(of: "${hostname}", with: self.hostname)
|
||||
let payload = prefix + job.text
|
||||
|
||||
let process = Process()
|
||||
process.executableURL = URL(fileURLWithPath: self.config.hook.command)
|
||||
process.arguments = self.config.hook.args + [payload]
|
||||
|
||||
var env = ProcessInfo.processInfo.environment
|
||||
env["SWABBLE_TEXT"] = job.text
|
||||
env["SWABBLE_PREFIX"] = prefix
|
||||
for (k, v) in self.config.hook.env {
|
||||
env[k] = v
|
||||
}
|
||||
process.environment = env
|
||||
|
||||
let pipe = Pipe()
|
||||
process.standardOutput = pipe
|
||||
process.standardError = pipe
|
||||
|
||||
try process.run()
|
||||
|
||||
let timeoutNanos = UInt64(max(config.hook.timeoutSeconds, 0.1) * 1_000_000_000)
|
||||
try await withThrowingTaskGroup(of: Void.self) { group in
|
||||
group.addTask {
|
||||
process.waitUntilExit()
|
||||
}
|
||||
group.addTask {
|
||||
try await Task.sleep(nanoseconds: timeoutNanos)
|
||||
if process.isRunning {
|
||||
process.terminate()
|
||||
}
|
||||
}
|
||||
try await group.next()
|
||||
group.cancelAll()
|
||||
}
|
||||
self.lastRun = Date()
|
||||
}
|
||||
}
|
||||
50
Swabble/Sources/SwabbleCore/Speech/BufferConverter.swift
Normal file
50
Swabble/Sources/SwabbleCore/Speech/BufferConverter.swift
Normal file
@@ -0,0 +1,50 @@
|
||||
@preconcurrency import AVFoundation
|
||||
import Foundation
|
||||
|
||||
final class BufferConverter {
|
||||
private final class Box<T>: @unchecked Sendable { var value: T; init(_ value: T) { self.value = value } }
|
||||
enum ConverterError: Swift.Error {
|
||||
case failedToCreateConverter
|
||||
case failedToCreateConversionBuffer
|
||||
case conversionFailed(NSError?)
|
||||
}
|
||||
|
||||
private var converter: AVAudioConverter?
|
||||
|
||||
func convert(_ buffer: AVAudioPCMBuffer, to format: AVAudioFormat) throws -> AVAudioPCMBuffer {
|
||||
let inputFormat = buffer.format
|
||||
if inputFormat == format {
|
||||
return buffer
|
||||
}
|
||||
if converter == nil || converter?.outputFormat != format {
|
||||
converter = AVAudioConverter(from: inputFormat, to: format)
|
||||
converter?.primeMethod = .none
|
||||
}
|
||||
guard let converter else { throw ConverterError.failedToCreateConverter }
|
||||
|
||||
let sampleRateRatio = converter.outputFormat.sampleRate / converter.inputFormat.sampleRate
|
||||
let scaledInputFrameLength = Double(buffer.frameLength) * sampleRateRatio
|
||||
let frameCapacity = AVAudioFrameCount(scaledInputFrameLength.rounded(.up))
|
||||
guard let conversionBuffer = AVAudioPCMBuffer(pcmFormat: converter.outputFormat, frameCapacity: frameCapacity)
|
||||
else {
|
||||
throw ConverterError.failedToCreateConversionBuffer
|
||||
}
|
||||
|
||||
var nsError: NSError?
|
||||
let consumed = Box(false)
|
||||
let inputBuffer = buffer
|
||||
let status = converter.convert(to: conversionBuffer, error: &nsError) { _, statusPtr in
|
||||
if consumed.value {
|
||||
statusPtr.pointee = .noDataNow
|
||||
return nil
|
||||
}
|
||||
consumed.value = true
|
||||
statusPtr.pointee = .haveData
|
||||
return inputBuffer
|
||||
}
|
||||
if status == .error {
|
||||
throw ConverterError.conversionFailed(nsError)
|
||||
}
|
||||
return conversionBuffer
|
||||
}
|
||||
}
|
||||
111
Swabble/Sources/SwabbleCore/Speech/SpeechPipeline.swift
Normal file
111
Swabble/Sources/SwabbleCore/Speech/SpeechPipeline.swift
Normal file
@@ -0,0 +1,111 @@
|
||||
import AVFoundation
|
||||
import Foundation
|
||||
import Speech
|
||||
|
||||
public struct SpeechSegment: Sendable {
|
||||
public let text: String
|
||||
public let isFinal: Bool
|
||||
}
|
||||
|
||||
public enum SpeechPipelineError: Error {
|
||||
case authorizationDenied
|
||||
case analyzerFormatUnavailable
|
||||
case transcriberUnavailable
|
||||
}
|
||||
|
||||
/// Live microphone → SpeechAnalyzer → SpeechTranscriber pipeline.
|
||||
public actor SpeechPipeline {
|
||||
private struct UnsafeBuffer: @unchecked Sendable { let buffer: AVAudioPCMBuffer }
|
||||
|
||||
private var engine = AVAudioEngine()
|
||||
private var transcriber: SpeechTranscriber?
|
||||
private var analyzer: SpeechAnalyzer?
|
||||
private var inputContinuation: AsyncStream<AnalyzerInput>.Continuation?
|
||||
private var resultTask: Task<Void, Never>?
|
||||
private let converter = BufferConverter()
|
||||
|
||||
public init() {}
|
||||
|
||||
public func start(localeIdentifier: String, etiquette: Bool) async throws -> AsyncStream<SpeechSegment> {
|
||||
let auth = await requestAuthorizationIfNeeded()
|
||||
guard auth == .authorized else { throw SpeechPipelineError.authorizationDenied }
|
||||
|
||||
let transcriberModule = SpeechTranscriber(
|
||||
locale: Locale(identifier: localeIdentifier),
|
||||
transcriptionOptions: etiquette ? [.etiquetteReplacements] : [],
|
||||
reportingOptions: [.volatileResults],
|
||||
attributeOptions: [])
|
||||
self.transcriber = transcriberModule
|
||||
|
||||
guard let analyzerFormat = await SpeechAnalyzer.bestAvailableAudioFormat(compatibleWith: [transcriberModule])
|
||||
else {
|
||||
throw SpeechPipelineError.analyzerFormatUnavailable
|
||||
}
|
||||
|
||||
self.analyzer = SpeechAnalyzer(modules: [transcriberModule])
|
||||
let (stream, continuation) = AsyncStream<AnalyzerInput>.makeStream()
|
||||
self.inputContinuation = continuation
|
||||
|
||||
let inputNode = self.engine.inputNode
|
||||
let inputFormat = inputNode.outputFormat(forBus: 0)
|
||||
inputNode.removeTap(onBus: 0)
|
||||
inputNode.installTap(onBus: 0, bufferSize: 2048, format: inputFormat) { [weak self] buffer, _ in
|
||||
guard let self else { return }
|
||||
let boxed = UnsafeBuffer(buffer: buffer)
|
||||
Task { await self.handleBuffer(boxed.buffer, targetFormat: analyzerFormat) }
|
||||
}
|
||||
|
||||
self.engine.prepare()
|
||||
try self.engine.start()
|
||||
try await self.analyzer?.start(inputSequence: stream)
|
||||
|
||||
guard let transcriberForStream = self.transcriber else {
|
||||
throw SpeechPipelineError.transcriberUnavailable
|
||||
}
|
||||
|
||||
return AsyncStream { continuation in
|
||||
self.resultTask = Task {
|
||||
do {
|
||||
for try await result in transcriberForStream.results {
|
||||
let seg = SpeechSegment(text: String(result.text.characters), isFinal: result.isFinal)
|
||||
continuation.yield(seg)
|
||||
}
|
||||
} catch {
|
||||
// swallow errors and finish
|
||||
}
|
||||
continuation.finish()
|
||||
}
|
||||
continuation.onTermination = { _ in
|
||||
Task { await self.stop() }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public func stop() async {
|
||||
self.resultTask?.cancel()
|
||||
self.inputContinuation?.finish()
|
||||
self.engine.inputNode.removeTap(onBus: 0)
|
||||
self.engine.stop()
|
||||
try? await self.analyzer?.finalizeAndFinishThroughEndOfInput()
|
||||
}
|
||||
|
||||
private func handleBuffer(_ buffer: AVAudioPCMBuffer, targetFormat: AVAudioFormat) async {
|
||||
do {
|
||||
let converted = try converter.convert(buffer, to: targetFormat)
|
||||
let input = AnalyzerInput(buffer: converted)
|
||||
self.inputContinuation?.yield(input)
|
||||
} catch {
|
||||
// drop on conversion failure
|
||||
}
|
||||
}
|
||||
|
||||
private func requestAuthorizationIfNeeded() async -> SFSpeechRecognizerAuthorizationStatus {
|
||||
let current = SFSpeechRecognizer.authorizationStatus()
|
||||
guard current == .notDetermined else { return current }
|
||||
return await withCheckedContinuation { continuation in
|
||||
SFSpeechRecognizer.requestAuthorization { status in
|
||||
continuation.resume(returning: status)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
import CoreMedia
|
||||
import Foundation
|
||||
import NaturalLanguage
|
||||
|
||||
extension AttributedString {
|
||||
public func sentences(maxLength: Int? = nil) -> [AttributedString] {
|
||||
let tokenizer = NLTokenizer(unit: .sentence)
|
||||
let string = String(characters)
|
||||
tokenizer.string = string
|
||||
let sentenceRanges = tokenizer.tokens(for: string.startIndex..<string.endIndex).map {
|
||||
(
|
||||
$0,
|
||||
AttributedString.Index($0.lowerBound, within: self)!
|
||||
..<
|
||||
AttributedString.Index($0.upperBound, within: self)!)
|
||||
}
|
||||
let ranges = sentenceRanges.flatMap { sentenceStringRange, sentenceRange in
|
||||
let sentence = self[sentenceRange]
|
||||
guard let maxLength, sentence.characters.count > maxLength else {
|
||||
return [sentenceRange]
|
||||
}
|
||||
|
||||
let wordTokenizer = NLTokenizer(unit: .word)
|
||||
wordTokenizer.string = string
|
||||
var wordRanges = wordTokenizer.tokens(for: sentenceStringRange).map {
|
||||
AttributedString.Index($0.lowerBound, within: self)!
|
||||
..<
|
||||
AttributedString.Index($0.upperBound, within: self)!
|
||||
}
|
||||
guard !wordRanges.isEmpty else { return [sentenceRange] }
|
||||
wordRanges[0] = sentenceRange.lowerBound..<wordRanges[0].upperBound
|
||||
wordRanges[wordRanges.count - 1] = wordRanges[wordRanges.count - 1].lowerBound..<sentenceRange.upperBound
|
||||
|
||||
var ranges: [Range<AttributedString.Index>] = []
|
||||
for wordRange in wordRanges {
|
||||
if let lastRange = ranges.last,
|
||||
self[lastRange].characters.count + self[wordRange].characters.count <= maxLength
|
||||
{
|
||||
ranges[ranges.count - 1] = lastRange.lowerBound..<wordRange.upperBound
|
||||
} else {
|
||||
ranges.append(wordRange)
|
||||
}
|
||||
}
|
||||
|
||||
return ranges
|
||||
}
|
||||
|
||||
return ranges.compactMap { range in
|
||||
let audioTimeRanges = self[range].runs.filter {
|
||||
!String(self[$0.range].characters)
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
|
||||
}.compactMap(\.audioTimeRange)
|
||||
guard !audioTimeRanges.isEmpty else { return nil }
|
||||
let start = audioTimeRanges.first!.start
|
||||
let end = audioTimeRanges.last!.end
|
||||
var attributes = AttributeContainer()
|
||||
attributes[AttributeScopes.SpeechAttributes.TimeRangeAttribute.self] = CMTimeRange(
|
||||
start: start,
|
||||
end: end)
|
||||
return AttributedString(self[range].characters, attributes: attributes)
|
||||
}
|
||||
}
|
||||
}
|
||||
41
Swabble/Sources/SwabbleCore/Support/Logging.swift
Normal file
41
Swabble/Sources/SwabbleCore/Support/Logging.swift
Normal file
@@ -0,0 +1,41 @@
|
||||
import Foundation
|
||||
|
||||
public enum LogLevel: String, Comparable, CaseIterable, Sendable {
|
||||
case trace, debug, info, warn, error
|
||||
|
||||
var rank: Int {
|
||||
switch self {
|
||||
case .trace: 0
|
||||
case .debug: 1
|
||||
case .info: 2
|
||||
case .warn: 3
|
||||
case .error: 4
|
||||
}
|
||||
}
|
||||
|
||||
public static func < (lhs: LogLevel, rhs: LogLevel) -> Bool { lhs.rank < rhs.rank }
|
||||
}
|
||||
|
||||
public struct Logger: Sendable {
|
||||
public let level: LogLevel
|
||||
|
||||
public init(level: LogLevel) { self.level = level }
|
||||
|
||||
public func log(_ level: LogLevel, _ message: String) {
|
||||
guard level >= self.level else { return }
|
||||
let ts = ISO8601DateFormatter().string(from: Date())
|
||||
print("[\(level.rawValue.uppercased())] \(ts) | \(message)")
|
||||
}
|
||||
|
||||
public func trace(_ msg: String) { self.log(.trace, msg) }
|
||||
public func debug(_ msg: String) { self.log(.debug, msg) }
|
||||
public func info(_ msg: String) { self.log(.info, msg) }
|
||||
public func warn(_ msg: String) { self.log(.warn, msg) }
|
||||
public func error(_ msg: String) { self.log(.error, msg) }
|
||||
}
|
||||
|
||||
extension LogLevel {
|
||||
public init?(configValue: String) {
|
||||
self.init(rawValue: configValue.lowercased())
|
||||
}
|
||||
}
|
||||
45
Swabble/Sources/SwabbleCore/Support/OutputFormat.swift
Normal file
45
Swabble/Sources/SwabbleCore/Support/OutputFormat.swift
Normal file
@@ -0,0 +1,45 @@
|
||||
import CoreMedia
|
||||
import Foundation
|
||||
|
||||
public enum OutputFormat: String {
|
||||
case txt
|
||||
case srt
|
||||
|
||||
public var needsAudioTimeRange: Bool {
|
||||
switch self {
|
||||
case .srt: true
|
||||
default: false
|
||||
}
|
||||
}
|
||||
|
||||
public func text(for transcript: AttributedString, maxLength: Int) -> String {
|
||||
switch self {
|
||||
case .txt:
|
||||
return String(transcript.characters)
|
||||
case .srt:
|
||||
func format(_ timeInterval: TimeInterval) -> String {
|
||||
let ms = Int(timeInterval.truncatingRemainder(dividingBy: 1) * 1000)
|
||||
let s = Int(timeInterval) % 60
|
||||
let m = (Int(timeInterval) / 60) % 60
|
||||
let h = Int(timeInterval) / 60 / 60
|
||||
return String(format: "%0.2d:%0.2d:%0.2d,%0.3d", h, m, s, ms)
|
||||
}
|
||||
|
||||
return transcript.sentences(maxLength: maxLength).compactMap { (sentence: AttributedString) -> (
|
||||
CMTimeRange,
|
||||
String)? in
|
||||
guard let timeRange = sentence.audioTimeRange else { return nil }
|
||||
return (timeRange, String(sentence.characters))
|
||||
}.enumerated().map { index, run in
|
||||
let (timeRange, text) = run
|
||||
return """
|
||||
|
||||
\(index + 1)
|
||||
\(format(timeRange.start.seconds)) --> \(format(timeRange.end.seconds))
|
||||
\(text.trimmingCharacters(in: .whitespacesAndNewlines))
|
||||
|
||||
"""
|
||||
}.joined().trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
}
|
||||
}
|
||||
}
|
||||
46
Swabble/Sources/SwabbleCore/Support/TranscriptsStore.swift
Normal file
46
Swabble/Sources/SwabbleCore/Support/TranscriptsStore.swift
Normal file
@@ -0,0 +1,46 @@
|
||||
import Foundation
|
||||
|
||||
public actor TranscriptsStore {
|
||||
public static let shared = TranscriptsStore()
|
||||
|
||||
private var entries: [String] = []
|
||||
private let limit = 100
|
||||
private let fileURL: URL
|
||||
|
||||
public init() {
|
||||
let dir = FileManager.default.homeDirectoryForCurrentUser
|
||||
.appendingPathComponent("Library/Application Support/swabble", isDirectory: true)
|
||||
try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
|
||||
self.fileURL = dir.appendingPathComponent("transcripts.log")
|
||||
if let data = try? Data(contentsOf: fileURL),
|
||||
let text = String(data: data, encoding: .utf8)
|
||||
{
|
||||
self.entries = text.split(separator: "\n").map(String.init).suffix(self.limit)
|
||||
}
|
||||
}
|
||||
|
||||
public func append(text: String) {
|
||||
self.entries.append(text)
|
||||
if self.entries.count > self.limit {
|
||||
self.entries.removeFirst(self.entries.count - self.limit)
|
||||
}
|
||||
let body = self.entries.joined(separator: "\n")
|
||||
try? body.write(to: self.fileURL, atomically: false, encoding: .utf8)
|
||||
}
|
||||
|
||||
public func latest() -> [String] { self.entries }
|
||||
}
|
||||
|
||||
extension String {
|
||||
private func appendLine(to url: URL) throws {
|
||||
let data = (self + "\n").data(using: .utf8) ?? Data()
|
||||
if FileManager.default.fileExists(atPath: url.path) {
|
||||
let handle = try FileHandle(forWritingTo: url)
|
||||
try handle.seekToEnd()
|
||||
try handle.write(contentsOf: data)
|
||||
try handle.close()
|
||||
} else {
|
||||
try data.write(to: url)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user