From 83262a67b1cedec1c64dee3581d4aa5b72f7eafb Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 30 Dec 2025 12:48:09 +0100 Subject: [PATCH] refactor: extract elevenlabs kit --- apps/shared/ClawdisKit/Package.swift | 7 +- .../ClawdisKit/ElevenLabsKitShim.swift | 9 + .../Sources/ClawdisKit/ElevenLabsTTS.swift | 331 -------------- .../ClawdisKit/PCMStreamingAudioPlayer.swift | 145 ------ .../ClawdisKit/StreamingAudioPlayer.swift | 429 ------------------ .../ClawdisKit/TalkTTSValidation.swift | 51 --- .../ElevenLabsTTSValidationTests.swift | 3 +- .../PCMStreamingAudioPlayerTests.swift | 26 -- .../TalkTTSValidationTests.swift | 45 -- 9 files changed, 16 insertions(+), 1030 deletions(-) create mode 100644 apps/shared/ClawdisKit/Sources/ClawdisKit/ElevenLabsKitShim.swift delete mode 100644 apps/shared/ClawdisKit/Sources/ClawdisKit/ElevenLabsTTS.swift delete mode 100644 apps/shared/ClawdisKit/Sources/ClawdisKit/PCMStreamingAudioPlayer.swift delete mode 100644 apps/shared/ClawdisKit/Sources/ClawdisKit/StreamingAudioPlayer.swift delete mode 100644 apps/shared/ClawdisKit/Sources/ClawdisKit/TalkTTSValidation.swift delete mode 100644 apps/shared/ClawdisKit/Tests/ClawdisKitTests/PCMStreamingAudioPlayerTests.swift delete mode 100644 apps/shared/ClawdisKit/Tests/ClawdisKitTests/TalkTTSValidationTests.swift diff --git a/apps/shared/ClawdisKit/Package.swift b/apps/shared/ClawdisKit/Package.swift index 3d7c4a784..c3054011a 100644 --- a/apps/shared/ClawdisKit/Package.swift +++ b/apps/shared/ClawdisKit/Package.swift @@ -12,10 +12,15 @@ let package = Package( .library(name: "ClawdisKit", targets: ["ClawdisKit"]), .library(name: "ClawdisChatUI", targets: ["ClawdisChatUI"]), ], + dependencies: [ + .package(path: "../../../ElevenLabsKit"), + ], targets: [ .target( name: "ClawdisKit", - dependencies: [], + dependencies: [ + .product(name: "ElevenLabsKit", package: "ElevenLabsKit"), + ], resources: [ .process("Resources"), ], diff --git a/apps/shared/ClawdisKit/Sources/ClawdisKit/ElevenLabsKitShim.swift b/apps/shared/ClawdisKit/Sources/ClawdisKit/ElevenLabsKitShim.swift new file mode 100644 index 000000000..07fe91ac3 --- /dev/null +++ b/apps/shared/ClawdisKit/Sources/ClawdisKit/ElevenLabsKitShim.swift @@ -0,0 +1,9 @@ +@_exported import ElevenLabsKit + +public typealias ElevenLabsVoice = ElevenLabsKit.ElevenLabsVoice +public typealias ElevenLabsTTSRequest = ElevenLabsKit.ElevenLabsTTSRequest +public typealias ElevenLabsTTSClient = ElevenLabsKit.ElevenLabsTTSClient +public typealias TalkTTSValidation = ElevenLabsKit.TalkTTSValidation +public typealias StreamingAudioPlayer = ElevenLabsKit.StreamingAudioPlayer +public typealias PCMStreamingAudioPlayer = ElevenLabsKit.PCMStreamingAudioPlayer +public typealias StreamingPlaybackResult = ElevenLabsKit.StreamingPlaybackResult diff --git a/apps/shared/ClawdisKit/Sources/ClawdisKit/ElevenLabsTTS.swift b/apps/shared/ClawdisKit/Sources/ClawdisKit/ElevenLabsTTS.swift deleted file mode 100644 index 38e602094..000000000 --- a/apps/shared/ClawdisKit/Sources/ClawdisKit/ElevenLabsTTS.swift +++ /dev/null @@ -1,331 +0,0 @@ -import Foundation - -public struct ElevenLabsVoice: Decodable, Sendable { - public let voiceId: String - public let name: String? - - enum CodingKeys: String, CodingKey { - case voiceId = "voice_id" - case name - } -} - -public struct ElevenLabsTTSRequest: Sendable { - public var text: String - public var modelId: String? - public var outputFormat: String? - public var speed: Double? - public var stability: Double? - public var similarity: Double? - public var style: Double? - public var speakerBoost: Bool? - public var seed: UInt32? - public var normalize: String? - public var language: String? - public var latencyTier: Int? - - public init( - text: String, - modelId: String? = nil, - outputFormat: String? = nil, - speed: Double? = nil, - stability: Double? = nil, - similarity: Double? = nil, - style: Double? = nil, - speakerBoost: Bool? = nil, - seed: UInt32? = nil, - normalize: String? = nil, - language: String? = nil, - latencyTier: Int? = nil) - { - self.text = text - self.modelId = modelId - self.outputFormat = outputFormat - self.speed = speed - self.stability = stability - self.similarity = similarity - self.style = style - self.speakerBoost = speakerBoost - self.seed = seed - self.normalize = normalize - self.language = language - self.latencyTier = latencyTier - } -} - -public struct ElevenLabsTTSClient: Sendable { - public var apiKey: String - public var requestTimeoutSeconds: TimeInterval - public var listVoicesTimeoutSeconds: TimeInterval - public var baseUrl: URL - - public init( - apiKey: String, - requestTimeoutSeconds: TimeInterval = 45, - listVoicesTimeoutSeconds: TimeInterval = 15, - baseUrl: URL = URL(string: "https://api.elevenlabs.io")!) - { - self.apiKey = apiKey - self.requestTimeoutSeconds = requestTimeoutSeconds - self.listVoicesTimeoutSeconds = listVoicesTimeoutSeconds - self.baseUrl = baseUrl - } - - public func synthesizeWithHardTimeout( - voiceId: String, - request: ElevenLabsTTSRequest, - hardTimeoutSeconds: TimeInterval) async throws -> Data - { - try await withThrowingTaskGroup(of: Data.self) { group in - group.addTask { - try await self.synthesize(voiceId: voiceId, request: request) - } - group.addTask { - try await Task.sleep(nanoseconds: UInt64(hardTimeoutSeconds * 1_000_000_000)) - throw NSError(domain: "ElevenLabsTTS", code: 408, userInfo: [ - NSLocalizedDescriptionKey: "ElevenLabs TTS timed out after \(hardTimeoutSeconds)s", - ]) - } - let data = try await group.next()! - group.cancelAll() - return data - } - } - - public func synthesize(voiceId: String, request: ElevenLabsTTSRequest) async throws -> Data { - var url = self.baseUrl - url.appendPathComponent("v1") - url.appendPathComponent("text-to-speech") - url.appendPathComponent(voiceId) - - let body = try JSONSerialization.data(withJSONObject: Self.buildPayload(request), options: []) - - var lastError: Error? - for attempt in 0..<3 { - var req = URLRequest(url: url) - req.httpMethod = "POST" - req.httpBody = body - req.timeoutInterval = self.requestTimeoutSeconds - req.setValue("application/json", forHTTPHeaderField: "Content-Type") - req.setValue("audio/mpeg", forHTTPHeaderField: "Accept") - req.setValue(self.apiKey, forHTTPHeaderField: "xi-api-key") - - do { - let (data, response) = try await URLSession.shared.data(for: req) - if let http = response as? HTTPURLResponse { - let contentType = (http.value(forHTTPHeaderField: "Content-Type") ?? "unknown").lowercased() - if http.statusCode == 429 || http.statusCode >= 500 { - let message = Self.truncatedErrorBody(data) - lastError = NSError(domain: "ElevenLabsTTS", code: http.statusCode, userInfo: [ - NSLocalizedDescriptionKey: "ElevenLabs retryable failure: \(http.statusCode) ct=\(contentType) \(message)", - ]) - if attempt < 2 { - let retryAfter = Double(http.value(forHTTPHeaderField: "Retry-After") ?? "") - let baseDelay = [0.25, 0.75, 1.5][attempt] - let delaySeconds = max(baseDelay, retryAfter ?? 0) - try? await Task.sleep(nanoseconds: UInt64(delaySeconds * 1_000_000_000)) - continue - } - throw lastError! - } - - if http.statusCode >= 400 { - let message = Self.truncatedErrorBody(data) - throw NSError(domain: "ElevenLabsTTS", code: http.statusCode, userInfo: [ - NSLocalizedDescriptionKey: "ElevenLabs failed: \(http.statusCode) ct=\(contentType) \(message)", - ]) - } - - if !contentType.contains("audio") { - let message = Self.truncatedErrorBody(data) - throw NSError(domain: "ElevenLabsTTS", code: 415, userInfo: [ - NSLocalizedDescriptionKey: "ElevenLabs returned non-audio ct=\(contentType) \(message)", - ]) - } - } - return data - } catch { - lastError = error - if attempt < 2 { - try? await Task.sleep(nanoseconds: UInt64([0.25, 0.75, 1.5][attempt] * 1_000_000_000)) - continue - } - throw error - } - } - throw lastError ?? NSError(domain: "ElevenLabsTTS", code: 1, userInfo: [ - NSLocalizedDescriptionKey: "ElevenLabs failed", - ]) - } - - public func streamSynthesize( - voiceId: String, - request: ElevenLabsTTSRequest) -> AsyncThrowingStream - { - AsyncThrowingStream { continuation in - let task = Task { - do { - let url = Self.streamingURL( - baseUrl: self.baseUrl, - voiceId: voiceId, - latencyTier: request.latencyTier) - let body = try JSONSerialization.data(withJSONObject: Self.buildPayload(request), options: []) - - var req = URLRequest(url: url) - req.httpMethod = "POST" - req.httpBody = body - req.timeoutInterval = self.requestTimeoutSeconds - req.setValue("application/json", forHTTPHeaderField: "Content-Type") - req.setValue("audio/mpeg", forHTTPHeaderField: "Accept") - req.setValue(self.apiKey, forHTTPHeaderField: "xi-api-key") - - let (bytes, response) = try await URLSession.shared.bytes(for: req) - guard let http = response as? HTTPURLResponse else { - throw NSError(domain: "ElevenLabsTTS", code: 1, userInfo: [ - NSLocalizedDescriptionKey: "ElevenLabs invalid response", - ]) - } - - let contentType = (http.value(forHTTPHeaderField: "Content-Type") ?? "unknown").lowercased() - if http.statusCode >= 400 { - let message = try await Self.readErrorBody(bytes: bytes) - throw NSError(domain: "ElevenLabsTTS", code: http.statusCode, userInfo: [ - NSLocalizedDescriptionKey: "ElevenLabs failed: \(http.statusCode) ct=\(contentType) \(message)", - ]) - } - if !contentType.contains("audio") { - let message = try await Self.readErrorBody(bytes: bytes) - throw NSError(domain: "ElevenLabsTTS", code: 415, userInfo: [ - NSLocalizedDescriptionKey: "ElevenLabs returned non-audio ct=\(contentType) \(message)", - ]) - } - - var buffer = Data() - buffer.reserveCapacity(16_384) - for try await byte in bytes { - buffer.append(byte) - if buffer.count >= 8_192 { - continuation.yield(buffer) - buffer.removeAll(keepingCapacity: true) - } - } - if !buffer.isEmpty { - continuation.yield(buffer) - } - continuation.finish() - } catch { - continuation.finish(throwing: error) - } - } - - continuation.onTermination = { _ in - task.cancel() - } - } - } - - public func listVoices() async throws -> [ElevenLabsVoice] { - var url = self.baseUrl - url.appendPathComponent("v1") - url.appendPathComponent("voices") - - var req = URLRequest(url: url) - req.httpMethod = "GET" - req.timeoutInterval = self.listVoicesTimeoutSeconds - req.setValue(self.apiKey, forHTTPHeaderField: "xi-api-key") - - let (data, response) = try await URLSession.shared.data(for: req) - if let http = response as? HTTPURLResponse, http.statusCode >= 400 { - let message = Self.truncatedErrorBody(data) - throw NSError(domain: "ElevenLabsTTS", code: http.statusCode, userInfo: [ - NSLocalizedDescriptionKey: "ElevenLabs voices failed: \(http.statusCode) \(message)", - ]) - } - - struct VoicesResponse: Decodable { let voices: [ElevenLabsVoice] } - return try JSONDecoder().decode(VoicesResponse.self, from: data).voices - } - - public static func validatedOutputFormat(_ value: String?) -> String? { - let trimmed = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines) - guard !trimmed.isEmpty else { return nil } - guard trimmed.hasPrefix("mp3_") || trimmed.hasPrefix("pcm_") else { return nil } - return trimmed - } - - public static func validatedLanguage(_ value: String?) -> String? { - let normalized = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased() - guard normalized.count == 2, normalized.allSatisfy({ $0 >= "a" && $0 <= "z" }) else { return nil } - return normalized - } - - public static func validatedNormalize(_ value: String?) -> String? { - let normalized = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased() - guard ["auto", "on", "off"].contains(normalized) else { return nil } - return normalized - } - - private static func buildPayload(_ request: ElevenLabsTTSRequest) -> [String: Any] { - var payload: [String: Any] = ["text": request.text] - if let modelId = request.modelId?.trimmingCharacters(in: .whitespacesAndNewlines), !modelId.isEmpty { - payload["model_id"] = modelId - } - if let outputFormat = request.outputFormat?.trimmingCharacters(in: .whitespacesAndNewlines), !outputFormat.isEmpty { - payload["output_format"] = outputFormat - } - if let seed = request.seed { - payload["seed"] = seed - } - if let normalize = request.normalize { - payload["apply_text_normalization"] = normalize - } - if let language = request.language { - payload["language_code"] = language - } - - var voiceSettings: [String: Any] = [:] - if let speed = request.speed { voiceSettings["speed"] = speed } - if let stability = request.stability { voiceSettings["stability"] = stability } - if let similarity = request.similarity { voiceSettings["similarity_boost"] = similarity } - if let style = request.style { voiceSettings["style"] = style } - if let speakerBoost = request.speakerBoost { voiceSettings["use_speaker_boost"] = speakerBoost } - if !voiceSettings.isEmpty { - payload["voice_settings"] = voiceSettings - } - return payload - } - - private static func truncatedErrorBody(_ data: Data) -> String { - let raw = String(data: data.prefix(4096), encoding: .utf8) ?? "unknown" - return raw.replacingOccurrences(of: "\n", with: " ").replacingOccurrences(of: "\r", with: " ") - } - - private static func streamingURL(baseUrl: URL, voiceId: String, latencyTier: Int?) -> URL { - var url = baseUrl - url.appendPathComponent("v1") - url.appendPathComponent("text-to-speech") - url.appendPathComponent(voiceId) - url.appendPathComponent("stream") - - guard let latencyTier else { return url } - let latencyItem = URLQueryItem( - name: "optimize_streaming_latency", - value: "\(latencyTier)") - guard var components = URLComponents(url: url, resolvingAgainstBaseURL: false) else { - return url - } - var items = components.queryItems ?? [] - items.append(latencyItem) - components.queryItems = items - return components.url ?? url - } - - private static func readErrorBody(bytes: URLSession.AsyncBytes) async throws -> String { - var data = Data() - for try await byte in bytes { - data.append(byte) - if data.count >= 4096 { break } - } - return truncatedErrorBody(data) - } -} diff --git a/apps/shared/ClawdisKit/Sources/ClawdisKit/PCMStreamingAudioPlayer.swift b/apps/shared/ClawdisKit/Sources/ClawdisKit/PCMStreamingAudioPlayer.swift deleted file mode 100644 index 05d122974..000000000 --- a/apps/shared/ClawdisKit/Sources/ClawdisKit/PCMStreamingAudioPlayer.swift +++ /dev/null @@ -1,145 +0,0 @@ -import AVFoundation -import Foundation -import OSLog - -@MainActor -public final class PCMStreamingAudioPlayer { - public static let shared = PCMStreamingAudioPlayer() - - private let logger = Logger(subsystem: "com.steipete.clawdis", category: "talk.tts.pcm") - private var engine = AVAudioEngine() - private var player = AVAudioPlayerNode() - private var format: AVAudioFormat? - private var pendingBuffers: Int = 0 - private var inputFinished = false - private var continuation: CheckedContinuation? - - public init() { - self.engine.attach(self.player) - } - - public func play(stream: AsyncThrowingStream, sampleRate: Double) async -> StreamingPlaybackResult { - self.stopInternal() - - let format = AVAudioFormat( - commonFormat: .pcmFormatInt16, - sampleRate: sampleRate, - channels: 1, - interleaved: true) - - guard let format else { - return StreamingPlaybackResult(finished: false, interruptedAt: nil) - } - self.configure(format: format) - - return await withCheckedContinuation { continuation in - self.continuation = continuation - self.pendingBuffers = 0 - self.inputFinished = false - - Task.detached { [weak self] in - guard let self else { return } - do { - for try await chunk in stream { - await self.enqueuePCM(chunk, format: format) - } - await self.finishInput() - } catch { - await self.fail(error) - } - } - } - } - - public func stop() -> Double? { - let interruptedAt = self.currentTimeSeconds() - self.stopInternal() - self.finish(StreamingPlaybackResult(finished: false, interruptedAt: interruptedAt)) - return interruptedAt - } - - private func configure(format: AVAudioFormat) { - if self.format?.sampleRate != format.sampleRate || self.format?.commonFormat != format.commonFormat { - self.engine.stop() - self.engine = AVAudioEngine() - self.player = AVAudioPlayerNode() - self.engine.attach(self.player) - } - self.format = format - if self.engine.attachedNodes.contains(self.player) { - self.engine.connect(self.player, to: self.engine.mainMixerNode, format: format) - } - } - - private func enqueuePCM(_ data: Data, format: AVAudioFormat) async { - guard !data.isEmpty else { return } - let frameCount = data.count / MemoryLayout.size - guard frameCount > 0 else { return } - guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(frameCount)) else { - return - } - buffer.frameLength = AVAudioFrameCount(frameCount) - - data.withUnsafeBytes { raw in - guard let src = raw.baseAddress else { return } - let audioBuffer = buffer.audioBufferList.pointee.mBuffers - if let dst = audioBuffer.mData { - memcpy(dst, src, frameCount * MemoryLayout.size) - } - } - - self.pendingBuffers += 1 - Task.detached { [weak self] in - guard let self else { return } - await self.player.scheduleBuffer(buffer) - await MainActor.run { - self.pendingBuffers = max(0, self.pendingBuffers - 1) - if self.inputFinished && self.pendingBuffers == 0 { - self.finish(StreamingPlaybackResult(finished: true, interruptedAt: nil)) - } - } - } - - if !self.player.isPlaying { - do { - try self.engine.start() - self.player.play() - } catch { - self.logger.error("pcm engine start failed: \(error.localizedDescription, privacy: .public)") - self.fail(error) - } - } - } - - private func finishInput() { - self.inputFinished = true - if self.pendingBuffers == 0 { - self.finish(StreamingPlaybackResult(finished: true, interruptedAt: nil)) - } - } - - private func fail(_ error: Error) { - self.logger.error("pcm stream failed: \(error.localizedDescription, privacy: .public)") - self.finish(StreamingPlaybackResult(finished: false, interruptedAt: nil)) - } - - private func stopInternal() { - self.player.stop() - self.engine.stop() - self.pendingBuffers = 0 - self.inputFinished = false - } - - private func finish(_ result: StreamingPlaybackResult) { - let continuation = self.continuation - self.continuation = nil - continuation?.resume(returning: result) - } - - private func currentTimeSeconds() -> Double? { - guard let nodeTime = self.player.lastRenderTime, - let playerTime = self.player.playerTime(forNodeTime: nodeTime) - else { return nil } - return Double(playerTime.sampleTime) / playerTime.sampleRate - } -} diff --git a/apps/shared/ClawdisKit/Sources/ClawdisKit/StreamingAudioPlayer.swift b/apps/shared/ClawdisKit/Sources/ClawdisKit/StreamingAudioPlayer.swift deleted file mode 100644 index c7d1d2f5c..000000000 --- a/apps/shared/ClawdisKit/Sources/ClawdisKit/StreamingAudioPlayer.swift +++ /dev/null @@ -1,429 +0,0 @@ -import AudioToolbox -import Foundation -import OSLog - -public struct StreamingPlaybackResult: Sendable { - public let finished: Bool - public let interruptedAt: Double? - - public init(finished: Bool, interruptedAt: Double?) { - self.finished = finished - self.interruptedAt = interruptedAt - } -} - -@MainActor -public final class StreamingAudioPlayer: NSObject { - public static let shared = StreamingAudioPlayer() - - private let logger = Logger(subsystem: "com.steipete.clawdis", category: "talk.tts.stream") - private var playback: Playback? - - public func play(stream: AsyncThrowingStream) async -> StreamingPlaybackResult { - self.stopInternal() - - let playback = Playback(logger: self.logger) - self.playback = playback - - return await withCheckedContinuation { continuation in - playback.setContinuation(continuation) - playback.start() - - Task.detached { - do { - for try await chunk in stream { - playback.append(chunk) - } - playback.finishInput() - } catch { - playback.fail(error) - } - } - } - } - - public func stop() -> Double? { - guard let playback else { return nil } - let interruptedAt = playback.stop(immediate: true) - self.finish(playback: playback, result: StreamingPlaybackResult(finished: false, interruptedAt: interruptedAt)) - return interruptedAt - } - - private func stopInternal() { - guard let playback else { return } - let interruptedAt = playback.stop(immediate: true) - self.finish(playback: playback, result: StreamingPlaybackResult(finished: false, interruptedAt: interruptedAt)) - } - - private func finish(playback: Playback, result: StreamingPlaybackResult) { - playback.finish(result) - guard self.playback === playback else { return } - self.playback = nil - } -} - -private final class Playback: @unchecked Sendable { - private static let bufferCount: Int = 3 - private static let bufferSize: Int = 32 * 1024 - - private let logger: Logger - private let lock = NSLock() - private let parseQueue = DispatchQueue(label: "talk.stream.parse") - fileprivate let bufferLock = NSLock() - fileprivate let bufferSemaphore = DispatchSemaphore(value: bufferCount) - - private var continuation: CheckedContinuation? - private var finished = false - - private var audioFileStream: AudioFileStreamID? - private var audioQueue: AudioQueueRef? - fileprivate var audioFormat: AudioStreamBasicDescription? - fileprivate var maxPacketSize: UInt32 = 0 - - fileprivate var availableBuffers: [AudioQueueBufferRef] = [] - private var currentBuffer: AudioQueueBufferRef? - private var currentBufferSize: Int = 0 - private var currentPacketDescs: [AudioStreamPacketDescription] = [] - - private var isRunning = false - fileprivate var inputFinished = false - private var startRequested = false - - private var sampleRate: Double = 0 - - init(logger: Logger) { - self.logger = logger - } - - func setContinuation(_ continuation: CheckedContinuation) { - self.lock.lock() - self.continuation = continuation - self.lock.unlock() - } - - func start() { - let selfPtr = Unmanaged.passUnretained(self).toOpaque() - let status = AudioFileStreamOpen( - selfPtr, - propertyListenerProc, - packetsProc, - kAudioFileMP3Type, - &self.audioFileStream) - if status != noErr { - self.logger.error("talk stream open failed: \(status)") - self.finish(StreamingPlaybackResult(finished: false, interruptedAt: nil)) - } - } - - func append(_ data: Data) { - guard !data.isEmpty else { return } - self.parseQueue.async { [weak self] in - guard let self else { return } - guard let audioFileStream = self.audioFileStream else { return } - let status = data.withUnsafeBytes { bytes in - AudioFileStreamParseBytes( - audioFileStream, - UInt32(bytes.count), - bytes.baseAddress, - []) - } - if status != noErr { - self.logger.error("talk stream parse failed: \(status)") - self.fail(NSError(domain: "StreamingAudio", code: Int(status))) - } - } - } - - func finishInput() { - self.parseQueue.async { [weak self] in - guard let self else { return } - self.inputFinished = true - if self.audioQueue == nil { - self.finish(StreamingPlaybackResult(finished: false, interruptedAt: nil)) - return - } - self.enqueueCurrentBuffer(flushOnly: true) - _ = self.stop(immediate: false) - } - } - - func fail(_ error: Error) { - self.logger.error("talk stream failed: \(error.localizedDescription, privacy: .public)") - _ = self.stop(immediate: true) - self.finish(StreamingPlaybackResult(finished: false, interruptedAt: nil)) - } - - func stop(immediate: Bool) -> Double? { - guard let audioQueue else { return nil } - let interruptedAt = self.currentTimeSeconds() - AudioQueueStop(audioQueue, immediate) - return interruptedAt - } - - fileprivate func finish(_ result: StreamingPlaybackResult) { - let continuation: CheckedContinuation? - self.lock.lock() - if self.finished { - continuation = nil - } else { - self.finished = true - continuation = self.continuation - self.continuation = nil - } - self.lock.unlock() - - continuation?.resume(returning: result) - self.teardown() - } - - private func teardown() { - if let audioQueue { - AudioQueueDispose(audioQueue, true) - self.audioQueue = nil - } - if let audioFileStream { - AudioFileStreamClose(audioFileStream) - self.audioFileStream = nil - } - self.bufferLock.lock() - self.availableBuffers.removeAll() - self.bufferLock.unlock() - self.currentBuffer = nil - self.currentPacketDescs.removeAll() - } - - fileprivate func setupQueueIfNeeded(_ asbd: AudioStreamBasicDescription) { - guard self.audioQueue == nil else { return } - - var format = asbd - self.audioFormat = format - self.sampleRate = format.mSampleRate - - let selfPtr = Unmanaged.passUnretained(self).toOpaque() - let status = AudioQueueNewOutput( - &format, - outputCallbackProc, - selfPtr, - nil, - nil, - 0, - &self.audioQueue) - if status != noErr { - self.logger.error("talk queue create failed: \(status)") - self.finish(StreamingPlaybackResult(finished: false, interruptedAt: nil)) - return - } - - if let audioQueue { - AudioQueueAddPropertyListener(audioQueue, kAudioQueueProperty_IsRunning, isRunningCallbackProc, selfPtr) - } - - if let audioFileStream { - var cookieSize: UInt32 = 0 - var writable: DarwinBoolean = false - let cookieStatus = AudioFileStreamGetPropertyInfo( - audioFileStream, - kAudioFileStreamProperty_MagicCookieData, - &cookieSize, - &writable) - if cookieStatus == noErr, cookieSize > 0, let audioQueue { - var cookie = [UInt8](repeating: 0, count: Int(cookieSize)) - let readStatus = AudioFileStreamGetProperty( - audioFileStream, - kAudioFileStreamProperty_MagicCookieData, - &cookieSize, - &cookie) - if readStatus == noErr { - AudioQueueSetProperty(audioQueue, kAudioQueueProperty_MagicCookie, cookie, cookieSize) - } - } - } - - if let audioQueue { - for _ in 0.. 0 else { return } - - buffer.pointee.mAudioDataByteSize = UInt32(self.currentBufferSize) - let packetCount = UInt32(self.currentPacketDescs.count) - - let status = self.currentPacketDescs.withUnsafeBufferPointer { descPtr in - AudioQueueEnqueueBuffer(audioQueue, buffer, packetCount, descPtr.baseAddress) - } - if status != noErr { - self.logger.error("talk queue enqueue failed: \(status)") - } else { - if !self.startRequested { - self.startRequested = true - let startStatus = AudioQueueStart(audioQueue, nil) - if startStatus != noErr { - self.logger.error("talk queue start failed: \(startStatus)") - } - } - } - - self.currentBuffer = nil - self.currentBufferSize = 0 - self.currentPacketDescs.removeAll(keepingCapacity: true) - if !flushOnly { - self.bufferSemaphore.wait() - self.bufferLock.lock() - let next = self.availableBuffers.popLast() - self.bufferLock.unlock() - if let next { self.currentBuffer = next } - } - } - - fileprivate func handlePackets( - numberBytes: UInt32, - numberPackets: UInt32, - inputData: UnsafeRawPointer, - packetDescriptions: UnsafeMutablePointer?) - { - if self.audioQueue == nil, let format = self.audioFormat { - self.setupQueueIfNeeded(format) - } - - if self.audioQueue == nil { - return - } - - if self.currentBuffer == nil { - self.bufferSemaphore.wait() - self.bufferLock.lock() - self.currentBuffer = self.availableBuffers.popLast() - self.bufferLock.unlock() - self.currentBufferSize = 0 - self.currentPacketDescs.removeAll(keepingCapacity: true) - } - - let bytes = inputData.assumingMemoryBound(to: UInt8.self) - let packetCount = Int(numberPackets) - for index in 0.. Self.bufferSize { - continue - } - - if self.currentBufferSize + packetSize > Self.bufferSize { - self.enqueueCurrentBuffer() - } - - guard let buffer = self.currentBuffer else { continue } - let dest = buffer.pointee.mAudioData.advanced(by: self.currentBufferSize) - memcpy(dest, bytes.advanced(by: packetOffset), packetSize) - - let desc = AudioStreamPacketDescription( - mStartOffset: Int64(self.currentBufferSize), - mVariableFramesInPacket: 0, - mDataByteSize: UInt32(packetSize)) - self.currentPacketDescs.append(desc) - self.currentBufferSize += packetSize - } - } - - private func currentTimeSeconds() -> Double? { - guard let audioQueue, sampleRate > 0 else { return nil } - var timeStamp = AudioTimeStamp() - let status = AudioQueueGetCurrentTime(audioQueue, nil, &timeStamp, nil) - if status != noErr { return nil } - if timeStamp.mSampleTime.isNaN { return nil } - return timeStamp.mSampleTime / sampleRate - } -} - -private func propertyListenerProc( - inClientData: UnsafeMutableRawPointer, - inAudioFileStream: AudioFileStreamID, - inPropertyID: AudioFileStreamPropertyID, - ioFlags: UnsafeMutablePointer) -{ - let playback = Unmanaged.fromOpaque(inClientData).takeUnretainedValue() - - if inPropertyID == kAudioFileStreamProperty_DataFormat { - var format = AudioStreamBasicDescription() - var size = UInt32(MemoryLayout.size) - let status = AudioFileStreamGetProperty(inAudioFileStream, inPropertyID, &size, &format) - if status == noErr { - playback.audioFormat = format - playback.setupQueueIfNeeded(format) - } - } else if inPropertyID == kAudioFileStreamProperty_PacketSizeUpperBound { - var maxPacketSize: UInt32 = 0 - var size = UInt32(MemoryLayout.size) - let status = AudioFileStreamGetProperty(inAudioFileStream, inPropertyID, &size, &maxPacketSize) - if status == noErr { - playback.maxPacketSize = maxPacketSize - } - } -} - -private func packetsProc( - inClientData: UnsafeMutableRawPointer, - inNumberBytes: UInt32, - inNumberPackets: UInt32, - inInputData: UnsafeRawPointer, - inPacketDescriptions: UnsafeMutablePointer?) -{ - let playback = Unmanaged.fromOpaque(inClientData).takeUnretainedValue() - playback.handlePackets( - numberBytes: inNumberBytes, - numberPackets: inNumberPackets, - inputData: inInputData, - packetDescriptions: inPacketDescriptions) -} - -private func outputCallbackProc( - inUserData: UnsafeMutableRawPointer?, - inAQ: AudioQueueRef, - inBuffer: AudioQueueBufferRef) -{ - guard let inUserData else { return } - let playback = Unmanaged.fromOpaque(inUserData).takeUnretainedValue() - playback.bufferLock.lock() - playback.availableBuffers.append(inBuffer) - playback.bufferLock.unlock() - playback.bufferSemaphore.signal() -} - -private func isRunningCallbackProc( - inUserData: UnsafeMutableRawPointer?, - inAQ: AudioQueueRef, - inID: AudioQueuePropertyID) -{ - guard let inUserData else { return } - guard inID == kAudioQueueProperty_IsRunning else { return } - - let playback = Unmanaged.fromOpaque(inUserData).takeUnretainedValue() - var running: UInt32 = 0 - var size = UInt32(MemoryLayout.size) - let status = AudioQueueGetProperty(inAQ, kAudioQueueProperty_IsRunning, &running, &size) - if status != noErr { return } - - if running == 0, playback.inputFinished { - playback.finish(StreamingPlaybackResult(finished: true, interruptedAt: nil)) - } -} diff --git a/apps/shared/ClawdisKit/Sources/ClawdisKit/TalkTTSValidation.swift b/apps/shared/ClawdisKit/Sources/ClawdisKit/TalkTTSValidation.swift deleted file mode 100644 index e010f9197..000000000 --- a/apps/shared/ClawdisKit/Sources/ClawdisKit/TalkTTSValidation.swift +++ /dev/null @@ -1,51 +0,0 @@ -public enum TalkTTSValidation: Sendable { - private static let v3StabilityValues: Set = [0.0, 0.5, 1.0] - - public static func resolveSpeed(speed: Double?, rateWPM: Int?) -> Double? { - if let rateWPM, rateWPM > 0 { - let resolved = Double(rateWPM) / 175.0 - if resolved <= 0.5 || resolved >= 2.0 { return nil } - return resolved - } - if let speed { - if speed <= 0.5 || speed >= 2.0 { return nil } - return speed - } - return nil - } - - public static func validatedUnit(_ value: Double?) -> Double? { - guard let value else { return nil } - if value < 0 || value > 1 { return nil } - return value - } - - public static func validatedStability(_ value: Double?, modelId: String?) -> Double? { - guard let value else { return nil } - let normalizedModel = (modelId ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased() - if normalizedModel == "eleven_v3" { - return v3StabilityValues.contains(value) ? value : nil - } - return validatedUnit(value) - } - - public static func validatedSeed(_ value: Int?) -> UInt32? { - guard let value else { return nil } - if value < 0 || value > 4294967295 { return nil } - return UInt32(value) - } - - public static func validatedLatencyTier(_ value: Int?) -> Int? { - guard let value else { return nil } - if value < 0 || value > 4 { return nil } - return value - } - - public static func pcmSampleRate(from outputFormat: String?) -> Double? { - let trimmed = (outputFormat ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased() - guard trimmed.hasPrefix("pcm_") else { return nil } - let parts = trimmed.split(separator: "_", maxSplits: 1) - guard parts.count == 2, let rate = Double(parts[1]), rate > 0 else { return nil } - return rate - } -} diff --git a/apps/shared/ClawdisKit/Tests/ClawdisKitTests/ElevenLabsTTSValidationTests.swift b/apps/shared/ClawdisKit/Tests/ClawdisKitTests/ElevenLabsTTSValidationTests.swift index 716c50508..507ca6e76 100644 --- a/apps/shared/ClawdisKit/Tests/ClawdisKitTests/ElevenLabsTTSValidationTests.swift +++ b/apps/shared/ClawdisKit/Tests/ClawdisKitTests/ElevenLabsTTSValidationTests.swift @@ -4,7 +4,7 @@ import XCTest final class ElevenLabsTTSValidationTests: XCTestCase { func testValidatedOutputFormatAllowsOnlyMp3Presets() { XCTAssertEqual(ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128"), "mp3_44100_128") - XCTAssertNil(ElevenLabsTTSClient.validatedOutputFormat("pcm_16000")) + XCTAssertEqual(ElevenLabsTTSClient.validatedOutputFormat("pcm_16000"), "pcm_16000") } func testValidatedLanguageAcceptsTwoLetterCodes() { @@ -17,4 +17,3 @@ final class ElevenLabsTTSValidationTests: XCTestCase { XCTAssertNil(ElevenLabsTTSClient.validatedNormalize("maybe")) } } - diff --git a/apps/shared/ClawdisKit/Tests/ClawdisKitTests/PCMStreamingAudioPlayerTests.swift b/apps/shared/ClawdisKit/Tests/ClawdisKitTests/PCMStreamingAudioPlayerTests.swift deleted file mode 100644 index c502bfdd2..000000000 --- a/apps/shared/ClawdisKit/Tests/ClawdisKitTests/PCMStreamingAudioPlayerTests.swift +++ /dev/null @@ -1,26 +0,0 @@ -import XCTest -@testable import ClawdisKit - -final class PCMStreamingAudioPlayerTests: XCTestCase { - @MainActor - func testStopDuringPCMStreamReturnsInterruptedResult() async { - var continuation: AsyncThrowingStream.Continuation? - let stream = AsyncThrowingStream { cont in - continuation = cont - let samples = Data(repeating: 0, count: 44_100) - cont.yield(samples) - } - - let task = Task { @MainActor in - await PCMStreamingAudioPlayer.shared.play(stream: stream, sampleRate: 44_100) - } - - try? await Task.sleep(nanoseconds: 120_000_000) - let interruptedAt = PCMStreamingAudioPlayer.shared.stop() - continuation?.finish() - - let result = await task.value - XCTAssertFalse(result.finished) - XCTAssertNotNil(interruptedAt) - } -} diff --git a/apps/shared/ClawdisKit/Tests/ClawdisKitTests/TalkTTSValidationTests.swift b/apps/shared/ClawdisKit/Tests/ClawdisKitTests/TalkTTSValidationTests.swift deleted file mode 100644 index d3426d768..000000000 --- a/apps/shared/ClawdisKit/Tests/ClawdisKitTests/TalkTTSValidationTests.swift +++ /dev/null @@ -1,45 +0,0 @@ -import XCTest -@testable import ClawdisKit - -final class TalkTTSValidationTests: XCTestCase { - func testResolveSpeedUsesRateWPMWhenProvided() { - let resolved = TalkTTSValidation.resolveSpeed(speed: nil, rateWPM: 175) - XCTAssertNotNil(resolved) - XCTAssertEqual(resolved ?? 0, 1.0, accuracy: 0.0001) - XCTAssertNil(TalkTTSValidation.resolveSpeed(speed: nil, rateWPM: 400)) - } - - func testValidatedUnitBounds() { - XCTAssertEqual(TalkTTSValidation.validatedUnit(0), 0) - XCTAssertEqual(TalkTTSValidation.validatedUnit(1), 1) - XCTAssertNil(TalkTTSValidation.validatedUnit(-0.01)) - XCTAssertNil(TalkTTSValidation.validatedUnit(1.01)) - } - - func testValidatedStability() { - XCTAssertEqual(TalkTTSValidation.validatedStability(0, modelId: "eleven_v3"), 0) - XCTAssertEqual(TalkTTSValidation.validatedStability(0.5, modelId: "eleven_v3"), 0.5) - XCTAssertEqual(TalkTTSValidation.validatedStability(1, modelId: "eleven_v3"), 1) - XCTAssertNil(TalkTTSValidation.validatedStability(0.7, modelId: "eleven_v3")) - XCTAssertEqual(TalkTTSValidation.validatedStability(0.7, modelId: "eleven_multilingual_v2"), 0.7) - } - - func testValidatedSeedBounds() { - XCTAssertEqual(TalkTTSValidation.validatedSeed(0), 0) - XCTAssertEqual(TalkTTSValidation.validatedSeed(1234), 1234) - XCTAssertNil(TalkTTSValidation.validatedSeed(-1)) - } - - func testValidatedLatencyTier() { - XCTAssertEqual(TalkTTSValidation.validatedLatencyTier(0), 0) - XCTAssertEqual(TalkTTSValidation.validatedLatencyTier(4), 4) - XCTAssertNil(TalkTTSValidation.validatedLatencyTier(-1)) - XCTAssertNil(TalkTTSValidation.validatedLatencyTier(5)) - } - - func testPcmSampleRateParse() { - XCTAssertEqual(TalkTTSValidation.pcmSampleRate(from: "pcm_44100"), 44100) - XCTAssertNil(TalkTTSValidation.pcmSampleRate(from: "mp3_44100_128")) - XCTAssertNil(TalkTTSValidation.pcmSampleRate(from: "pcm_bad")) - } -}