refactor: extract elevenlabs kit

This commit is contained in:
Peter Steinberger
2025-12-30 12:48:09 +01:00
parent 66952a682d
commit 83262a67b1
9 changed files with 16 additions and 1030 deletions

View File

@@ -12,10 +12,15 @@ let package = Package(
.library(name: "ClawdisKit", targets: ["ClawdisKit"]),
.library(name: "ClawdisChatUI", targets: ["ClawdisChatUI"]),
],
dependencies: [
.package(path: "../../../ElevenLabsKit"),
],
targets: [
.target(
name: "ClawdisKit",
dependencies: [],
dependencies: [
.product(name: "ElevenLabsKit", package: "ElevenLabsKit"),
],
resources: [
.process("Resources"),
],

View File

@@ -0,0 +1,9 @@
@_exported import ElevenLabsKit
public typealias ElevenLabsVoice = ElevenLabsKit.ElevenLabsVoice
public typealias ElevenLabsTTSRequest = ElevenLabsKit.ElevenLabsTTSRequest
public typealias ElevenLabsTTSClient = ElevenLabsKit.ElevenLabsTTSClient
public typealias TalkTTSValidation = ElevenLabsKit.TalkTTSValidation
public typealias StreamingAudioPlayer = ElevenLabsKit.StreamingAudioPlayer
public typealias PCMStreamingAudioPlayer = ElevenLabsKit.PCMStreamingAudioPlayer
public typealias StreamingPlaybackResult = ElevenLabsKit.StreamingPlaybackResult

View File

@@ -1,331 +0,0 @@
import Foundation
public struct ElevenLabsVoice: Decodable, Sendable {
public let voiceId: String
public let name: String?
enum CodingKeys: String, CodingKey {
case voiceId = "voice_id"
case name
}
}
public struct ElevenLabsTTSRequest: Sendable {
public var text: String
public var modelId: String?
public var outputFormat: String?
public var speed: Double?
public var stability: Double?
public var similarity: Double?
public var style: Double?
public var speakerBoost: Bool?
public var seed: UInt32?
public var normalize: String?
public var language: String?
public var latencyTier: Int?
public init(
text: String,
modelId: String? = nil,
outputFormat: String? = nil,
speed: Double? = nil,
stability: Double? = nil,
similarity: Double? = nil,
style: Double? = nil,
speakerBoost: Bool? = nil,
seed: UInt32? = nil,
normalize: String? = nil,
language: String? = nil,
latencyTier: Int? = nil)
{
self.text = text
self.modelId = modelId
self.outputFormat = outputFormat
self.speed = speed
self.stability = stability
self.similarity = similarity
self.style = style
self.speakerBoost = speakerBoost
self.seed = seed
self.normalize = normalize
self.language = language
self.latencyTier = latencyTier
}
}
public struct ElevenLabsTTSClient: Sendable {
public var apiKey: String
public var requestTimeoutSeconds: TimeInterval
public var listVoicesTimeoutSeconds: TimeInterval
public var baseUrl: URL
public init(
apiKey: String,
requestTimeoutSeconds: TimeInterval = 45,
listVoicesTimeoutSeconds: TimeInterval = 15,
baseUrl: URL = URL(string: "https://api.elevenlabs.io")!)
{
self.apiKey = apiKey
self.requestTimeoutSeconds = requestTimeoutSeconds
self.listVoicesTimeoutSeconds = listVoicesTimeoutSeconds
self.baseUrl = baseUrl
}
public func synthesizeWithHardTimeout(
voiceId: String,
request: ElevenLabsTTSRequest,
hardTimeoutSeconds: TimeInterval) async throws -> Data
{
try await withThrowingTaskGroup(of: Data.self) { group in
group.addTask {
try await self.synthesize(voiceId: voiceId, request: request)
}
group.addTask {
try await Task.sleep(nanoseconds: UInt64(hardTimeoutSeconds * 1_000_000_000))
throw NSError(domain: "ElevenLabsTTS", code: 408, userInfo: [
NSLocalizedDescriptionKey: "ElevenLabs TTS timed out after \(hardTimeoutSeconds)s",
])
}
let data = try await group.next()!
group.cancelAll()
return data
}
}
public func synthesize(voiceId: String, request: ElevenLabsTTSRequest) async throws -> Data {
var url = self.baseUrl
url.appendPathComponent("v1")
url.appendPathComponent("text-to-speech")
url.appendPathComponent(voiceId)
let body = try JSONSerialization.data(withJSONObject: Self.buildPayload(request), options: [])
var lastError: Error?
for attempt in 0..<3 {
var req = URLRequest(url: url)
req.httpMethod = "POST"
req.httpBody = body
req.timeoutInterval = self.requestTimeoutSeconds
req.setValue("application/json", forHTTPHeaderField: "Content-Type")
req.setValue("audio/mpeg", forHTTPHeaderField: "Accept")
req.setValue(self.apiKey, forHTTPHeaderField: "xi-api-key")
do {
let (data, response) = try await URLSession.shared.data(for: req)
if let http = response as? HTTPURLResponse {
let contentType = (http.value(forHTTPHeaderField: "Content-Type") ?? "unknown").lowercased()
if http.statusCode == 429 || http.statusCode >= 500 {
let message = Self.truncatedErrorBody(data)
lastError = NSError(domain: "ElevenLabsTTS", code: http.statusCode, userInfo: [
NSLocalizedDescriptionKey: "ElevenLabs retryable failure: \(http.statusCode) ct=\(contentType) \(message)",
])
if attempt < 2 {
let retryAfter = Double(http.value(forHTTPHeaderField: "Retry-After") ?? "")
let baseDelay = [0.25, 0.75, 1.5][attempt]
let delaySeconds = max(baseDelay, retryAfter ?? 0)
try? await Task.sleep(nanoseconds: UInt64(delaySeconds * 1_000_000_000))
continue
}
throw lastError!
}
if http.statusCode >= 400 {
let message = Self.truncatedErrorBody(data)
throw NSError(domain: "ElevenLabsTTS", code: http.statusCode, userInfo: [
NSLocalizedDescriptionKey: "ElevenLabs failed: \(http.statusCode) ct=\(contentType) \(message)",
])
}
if !contentType.contains("audio") {
let message = Self.truncatedErrorBody(data)
throw NSError(domain: "ElevenLabsTTS", code: 415, userInfo: [
NSLocalizedDescriptionKey: "ElevenLabs returned non-audio ct=\(contentType) \(message)",
])
}
}
return data
} catch {
lastError = error
if attempt < 2 {
try? await Task.sleep(nanoseconds: UInt64([0.25, 0.75, 1.5][attempt] * 1_000_000_000))
continue
}
throw error
}
}
throw lastError ?? NSError(domain: "ElevenLabsTTS", code: 1, userInfo: [
NSLocalizedDescriptionKey: "ElevenLabs failed",
])
}
public func streamSynthesize(
voiceId: String,
request: ElevenLabsTTSRequest) -> AsyncThrowingStream<Data, Error>
{
AsyncThrowingStream { continuation in
let task = Task {
do {
let url = Self.streamingURL(
baseUrl: self.baseUrl,
voiceId: voiceId,
latencyTier: request.latencyTier)
let body = try JSONSerialization.data(withJSONObject: Self.buildPayload(request), options: [])
var req = URLRequest(url: url)
req.httpMethod = "POST"
req.httpBody = body
req.timeoutInterval = self.requestTimeoutSeconds
req.setValue("application/json", forHTTPHeaderField: "Content-Type")
req.setValue("audio/mpeg", forHTTPHeaderField: "Accept")
req.setValue(self.apiKey, forHTTPHeaderField: "xi-api-key")
let (bytes, response) = try await URLSession.shared.bytes(for: req)
guard let http = response as? HTTPURLResponse else {
throw NSError(domain: "ElevenLabsTTS", code: 1, userInfo: [
NSLocalizedDescriptionKey: "ElevenLabs invalid response",
])
}
let contentType = (http.value(forHTTPHeaderField: "Content-Type") ?? "unknown").lowercased()
if http.statusCode >= 400 {
let message = try await Self.readErrorBody(bytes: bytes)
throw NSError(domain: "ElevenLabsTTS", code: http.statusCode, userInfo: [
NSLocalizedDescriptionKey: "ElevenLabs failed: \(http.statusCode) ct=\(contentType) \(message)",
])
}
if !contentType.contains("audio") {
let message = try await Self.readErrorBody(bytes: bytes)
throw NSError(domain: "ElevenLabsTTS", code: 415, userInfo: [
NSLocalizedDescriptionKey: "ElevenLabs returned non-audio ct=\(contentType) \(message)",
])
}
var buffer = Data()
buffer.reserveCapacity(16_384)
for try await byte in bytes {
buffer.append(byte)
if buffer.count >= 8_192 {
continuation.yield(buffer)
buffer.removeAll(keepingCapacity: true)
}
}
if !buffer.isEmpty {
continuation.yield(buffer)
}
continuation.finish()
} catch {
continuation.finish(throwing: error)
}
}
continuation.onTermination = { _ in
task.cancel()
}
}
}
public func listVoices() async throws -> [ElevenLabsVoice] {
var url = self.baseUrl
url.appendPathComponent("v1")
url.appendPathComponent("voices")
var req = URLRequest(url: url)
req.httpMethod = "GET"
req.timeoutInterval = self.listVoicesTimeoutSeconds
req.setValue(self.apiKey, forHTTPHeaderField: "xi-api-key")
let (data, response) = try await URLSession.shared.data(for: req)
if let http = response as? HTTPURLResponse, http.statusCode >= 400 {
let message = Self.truncatedErrorBody(data)
throw NSError(domain: "ElevenLabsTTS", code: http.statusCode, userInfo: [
NSLocalizedDescriptionKey: "ElevenLabs voices failed: \(http.statusCode) \(message)",
])
}
struct VoicesResponse: Decodable { let voices: [ElevenLabsVoice] }
return try JSONDecoder().decode(VoicesResponse.self, from: data).voices
}
public static func validatedOutputFormat(_ value: String?) -> String? {
let trimmed = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return nil }
guard trimmed.hasPrefix("mp3_") || trimmed.hasPrefix("pcm_") else { return nil }
return trimmed
}
public static func validatedLanguage(_ value: String?) -> String? {
let normalized = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
guard normalized.count == 2, normalized.allSatisfy({ $0 >= "a" && $0 <= "z" }) else { return nil }
return normalized
}
public static func validatedNormalize(_ value: String?) -> String? {
let normalized = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
guard ["auto", "on", "off"].contains(normalized) else { return nil }
return normalized
}
private static func buildPayload(_ request: ElevenLabsTTSRequest) -> [String: Any] {
var payload: [String: Any] = ["text": request.text]
if let modelId = request.modelId?.trimmingCharacters(in: .whitespacesAndNewlines), !modelId.isEmpty {
payload["model_id"] = modelId
}
if let outputFormat = request.outputFormat?.trimmingCharacters(in: .whitespacesAndNewlines), !outputFormat.isEmpty {
payload["output_format"] = outputFormat
}
if let seed = request.seed {
payload["seed"] = seed
}
if let normalize = request.normalize {
payload["apply_text_normalization"] = normalize
}
if let language = request.language {
payload["language_code"] = language
}
var voiceSettings: [String: Any] = [:]
if let speed = request.speed { voiceSettings["speed"] = speed }
if let stability = request.stability { voiceSettings["stability"] = stability }
if let similarity = request.similarity { voiceSettings["similarity_boost"] = similarity }
if let style = request.style { voiceSettings["style"] = style }
if let speakerBoost = request.speakerBoost { voiceSettings["use_speaker_boost"] = speakerBoost }
if !voiceSettings.isEmpty {
payload["voice_settings"] = voiceSettings
}
return payload
}
private static func truncatedErrorBody(_ data: Data) -> String {
let raw = String(data: data.prefix(4096), encoding: .utf8) ?? "unknown"
return raw.replacingOccurrences(of: "\n", with: " ").replacingOccurrences(of: "\r", with: " ")
}
private static func streamingURL(baseUrl: URL, voiceId: String, latencyTier: Int?) -> URL {
var url = baseUrl
url.appendPathComponent("v1")
url.appendPathComponent("text-to-speech")
url.appendPathComponent(voiceId)
url.appendPathComponent("stream")
guard let latencyTier else { return url }
let latencyItem = URLQueryItem(
name: "optimize_streaming_latency",
value: "\(latencyTier)")
guard var components = URLComponents(url: url, resolvingAgainstBaseURL: false) else {
return url
}
var items = components.queryItems ?? []
items.append(latencyItem)
components.queryItems = items
return components.url ?? url
}
private static func readErrorBody(bytes: URLSession.AsyncBytes) async throws -> String {
var data = Data()
for try await byte in bytes {
data.append(byte)
if data.count >= 4096 { break }
}
return truncatedErrorBody(data)
}
}

View File

@@ -1,145 +0,0 @@
import AVFoundation
import Foundation
import OSLog
@MainActor
public final class PCMStreamingAudioPlayer {
public static let shared = PCMStreamingAudioPlayer()
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "talk.tts.pcm")
private var engine = AVAudioEngine()
private var player = AVAudioPlayerNode()
private var format: AVAudioFormat?
private var pendingBuffers: Int = 0
private var inputFinished = false
private var continuation: CheckedContinuation<StreamingPlaybackResult, Never>?
public init() {
self.engine.attach(self.player)
}
public func play(stream: AsyncThrowingStream<Data, Error>, sampleRate: Double) async -> StreamingPlaybackResult {
self.stopInternal()
let format = AVAudioFormat(
commonFormat: .pcmFormatInt16,
sampleRate: sampleRate,
channels: 1,
interleaved: true)
guard let format else {
return StreamingPlaybackResult(finished: false, interruptedAt: nil)
}
self.configure(format: format)
return await withCheckedContinuation { continuation in
self.continuation = continuation
self.pendingBuffers = 0
self.inputFinished = false
Task.detached { [weak self] in
guard let self else { return }
do {
for try await chunk in stream {
await self.enqueuePCM(chunk, format: format)
}
await self.finishInput()
} catch {
await self.fail(error)
}
}
}
}
public func stop() -> Double? {
let interruptedAt = self.currentTimeSeconds()
self.stopInternal()
self.finish(StreamingPlaybackResult(finished: false, interruptedAt: interruptedAt))
return interruptedAt
}
private func configure(format: AVAudioFormat) {
if self.format?.sampleRate != format.sampleRate || self.format?.commonFormat != format.commonFormat {
self.engine.stop()
self.engine = AVAudioEngine()
self.player = AVAudioPlayerNode()
self.engine.attach(self.player)
}
self.format = format
if self.engine.attachedNodes.contains(self.player) {
self.engine.connect(self.player, to: self.engine.mainMixerNode, format: format)
}
}
private func enqueuePCM(_ data: Data, format: AVAudioFormat) async {
guard !data.isEmpty else { return }
let frameCount = data.count / MemoryLayout<Int16>.size
guard frameCount > 0 else { return }
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(frameCount)) else {
return
}
buffer.frameLength = AVAudioFrameCount(frameCount)
data.withUnsafeBytes { raw in
guard let src = raw.baseAddress else { return }
let audioBuffer = buffer.audioBufferList.pointee.mBuffers
if let dst = audioBuffer.mData {
memcpy(dst, src, frameCount * MemoryLayout<Int16>.size)
}
}
self.pendingBuffers += 1
Task.detached { [weak self] in
guard let self else { return }
await self.player.scheduleBuffer(buffer)
await MainActor.run {
self.pendingBuffers = max(0, self.pendingBuffers - 1)
if self.inputFinished && self.pendingBuffers == 0 {
self.finish(StreamingPlaybackResult(finished: true, interruptedAt: nil))
}
}
}
if !self.player.isPlaying {
do {
try self.engine.start()
self.player.play()
} catch {
self.logger.error("pcm engine start failed: \(error.localizedDescription, privacy: .public)")
self.fail(error)
}
}
}
private func finishInput() {
self.inputFinished = true
if self.pendingBuffers == 0 {
self.finish(StreamingPlaybackResult(finished: true, interruptedAt: nil))
}
}
private func fail(_ error: Error) {
self.logger.error("pcm stream failed: \(error.localizedDescription, privacy: .public)")
self.finish(StreamingPlaybackResult(finished: false, interruptedAt: nil))
}
private func stopInternal() {
self.player.stop()
self.engine.stop()
self.pendingBuffers = 0
self.inputFinished = false
}
private func finish(_ result: StreamingPlaybackResult) {
let continuation = self.continuation
self.continuation = nil
continuation?.resume(returning: result)
}
private func currentTimeSeconds() -> Double? {
guard let nodeTime = self.player.lastRenderTime,
let playerTime = self.player.playerTime(forNodeTime: nodeTime)
else { return nil }
return Double(playerTime.sampleTime) / playerTime.sampleRate
}
}

View File

@@ -1,429 +0,0 @@
import AudioToolbox
import Foundation
import OSLog
public struct StreamingPlaybackResult: Sendable {
public let finished: Bool
public let interruptedAt: Double?
public init(finished: Bool, interruptedAt: Double?) {
self.finished = finished
self.interruptedAt = interruptedAt
}
}
@MainActor
public final class StreamingAudioPlayer: NSObject {
public static let shared = StreamingAudioPlayer()
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "talk.tts.stream")
private var playback: Playback?
public func play(stream: AsyncThrowingStream<Data, Error>) async -> StreamingPlaybackResult {
self.stopInternal()
let playback = Playback(logger: self.logger)
self.playback = playback
return await withCheckedContinuation { continuation in
playback.setContinuation(continuation)
playback.start()
Task.detached {
do {
for try await chunk in stream {
playback.append(chunk)
}
playback.finishInput()
} catch {
playback.fail(error)
}
}
}
}
public func stop() -> Double? {
guard let playback else { return nil }
let interruptedAt = playback.stop(immediate: true)
self.finish(playback: playback, result: StreamingPlaybackResult(finished: false, interruptedAt: interruptedAt))
return interruptedAt
}
private func stopInternal() {
guard let playback else { return }
let interruptedAt = playback.stop(immediate: true)
self.finish(playback: playback, result: StreamingPlaybackResult(finished: false, interruptedAt: interruptedAt))
}
private func finish(playback: Playback, result: StreamingPlaybackResult) {
playback.finish(result)
guard self.playback === playback else { return }
self.playback = nil
}
}
private final class Playback: @unchecked Sendable {
private static let bufferCount: Int = 3
private static let bufferSize: Int = 32 * 1024
private let logger: Logger
private let lock = NSLock()
private let parseQueue = DispatchQueue(label: "talk.stream.parse")
fileprivate let bufferLock = NSLock()
fileprivate let bufferSemaphore = DispatchSemaphore(value: bufferCount)
private var continuation: CheckedContinuation<StreamingPlaybackResult, Never>?
private var finished = false
private var audioFileStream: AudioFileStreamID?
private var audioQueue: AudioQueueRef?
fileprivate var audioFormat: AudioStreamBasicDescription?
fileprivate var maxPacketSize: UInt32 = 0
fileprivate var availableBuffers: [AudioQueueBufferRef] = []
private var currentBuffer: AudioQueueBufferRef?
private var currentBufferSize: Int = 0
private var currentPacketDescs: [AudioStreamPacketDescription] = []
private var isRunning = false
fileprivate var inputFinished = false
private var startRequested = false
private var sampleRate: Double = 0
init(logger: Logger) {
self.logger = logger
}
func setContinuation(_ continuation: CheckedContinuation<StreamingPlaybackResult, Never>) {
self.lock.lock()
self.continuation = continuation
self.lock.unlock()
}
func start() {
let selfPtr = Unmanaged.passUnretained(self).toOpaque()
let status = AudioFileStreamOpen(
selfPtr,
propertyListenerProc,
packetsProc,
kAudioFileMP3Type,
&self.audioFileStream)
if status != noErr {
self.logger.error("talk stream open failed: \(status)")
self.finish(StreamingPlaybackResult(finished: false, interruptedAt: nil))
}
}
func append(_ data: Data) {
guard !data.isEmpty else { return }
self.parseQueue.async { [weak self] in
guard let self else { return }
guard let audioFileStream = self.audioFileStream else { return }
let status = data.withUnsafeBytes { bytes in
AudioFileStreamParseBytes(
audioFileStream,
UInt32(bytes.count),
bytes.baseAddress,
[])
}
if status != noErr {
self.logger.error("talk stream parse failed: \(status)")
self.fail(NSError(domain: "StreamingAudio", code: Int(status)))
}
}
}
func finishInput() {
self.parseQueue.async { [weak self] in
guard let self else { return }
self.inputFinished = true
if self.audioQueue == nil {
self.finish(StreamingPlaybackResult(finished: false, interruptedAt: nil))
return
}
self.enqueueCurrentBuffer(flushOnly: true)
_ = self.stop(immediate: false)
}
}
func fail(_ error: Error) {
self.logger.error("talk stream failed: \(error.localizedDescription, privacy: .public)")
_ = self.stop(immediate: true)
self.finish(StreamingPlaybackResult(finished: false, interruptedAt: nil))
}
func stop(immediate: Bool) -> Double? {
guard let audioQueue else { return nil }
let interruptedAt = self.currentTimeSeconds()
AudioQueueStop(audioQueue, immediate)
return interruptedAt
}
fileprivate func finish(_ result: StreamingPlaybackResult) {
let continuation: CheckedContinuation<StreamingPlaybackResult, Never>?
self.lock.lock()
if self.finished {
continuation = nil
} else {
self.finished = true
continuation = self.continuation
self.continuation = nil
}
self.lock.unlock()
continuation?.resume(returning: result)
self.teardown()
}
private func teardown() {
if let audioQueue {
AudioQueueDispose(audioQueue, true)
self.audioQueue = nil
}
if let audioFileStream {
AudioFileStreamClose(audioFileStream)
self.audioFileStream = nil
}
self.bufferLock.lock()
self.availableBuffers.removeAll()
self.bufferLock.unlock()
self.currentBuffer = nil
self.currentPacketDescs.removeAll()
}
fileprivate func setupQueueIfNeeded(_ asbd: AudioStreamBasicDescription) {
guard self.audioQueue == nil else { return }
var format = asbd
self.audioFormat = format
self.sampleRate = format.mSampleRate
let selfPtr = Unmanaged.passUnretained(self).toOpaque()
let status = AudioQueueNewOutput(
&format,
outputCallbackProc,
selfPtr,
nil,
nil,
0,
&self.audioQueue)
if status != noErr {
self.logger.error("talk queue create failed: \(status)")
self.finish(StreamingPlaybackResult(finished: false, interruptedAt: nil))
return
}
if let audioQueue {
AudioQueueAddPropertyListener(audioQueue, kAudioQueueProperty_IsRunning, isRunningCallbackProc, selfPtr)
}
if let audioFileStream {
var cookieSize: UInt32 = 0
var writable: DarwinBoolean = false
let cookieStatus = AudioFileStreamGetPropertyInfo(
audioFileStream,
kAudioFileStreamProperty_MagicCookieData,
&cookieSize,
&writable)
if cookieStatus == noErr, cookieSize > 0, let audioQueue {
var cookie = [UInt8](repeating: 0, count: Int(cookieSize))
let readStatus = AudioFileStreamGetProperty(
audioFileStream,
kAudioFileStreamProperty_MagicCookieData,
&cookieSize,
&cookie)
if readStatus == noErr {
AudioQueueSetProperty(audioQueue, kAudioQueueProperty_MagicCookie, cookie, cookieSize)
}
}
}
if let audioQueue {
for _ in 0..<Self.bufferCount {
var buffer: AudioQueueBufferRef?
let allocStatus = AudioQueueAllocateBuffer(audioQueue, UInt32(Self.bufferSize), &buffer)
if allocStatus == noErr, let buffer {
self.bufferLock.lock()
self.availableBuffers.append(buffer)
self.bufferLock.unlock()
}
}
}
}
private func enqueueCurrentBuffer(flushOnly: Bool = false) {
guard let audioQueue, let buffer = self.currentBuffer else { return }
guard self.currentBufferSize > 0 else { return }
buffer.pointee.mAudioDataByteSize = UInt32(self.currentBufferSize)
let packetCount = UInt32(self.currentPacketDescs.count)
let status = self.currentPacketDescs.withUnsafeBufferPointer { descPtr in
AudioQueueEnqueueBuffer(audioQueue, buffer, packetCount, descPtr.baseAddress)
}
if status != noErr {
self.logger.error("talk queue enqueue failed: \(status)")
} else {
if !self.startRequested {
self.startRequested = true
let startStatus = AudioQueueStart(audioQueue, nil)
if startStatus != noErr {
self.logger.error("talk queue start failed: \(startStatus)")
}
}
}
self.currentBuffer = nil
self.currentBufferSize = 0
self.currentPacketDescs.removeAll(keepingCapacity: true)
if !flushOnly {
self.bufferSemaphore.wait()
self.bufferLock.lock()
let next = self.availableBuffers.popLast()
self.bufferLock.unlock()
if let next { self.currentBuffer = next }
}
}
fileprivate func handlePackets(
numberBytes: UInt32,
numberPackets: UInt32,
inputData: UnsafeRawPointer,
packetDescriptions: UnsafeMutablePointer<AudioStreamPacketDescription>?)
{
if self.audioQueue == nil, let format = self.audioFormat {
self.setupQueueIfNeeded(format)
}
if self.audioQueue == nil {
return
}
if self.currentBuffer == nil {
self.bufferSemaphore.wait()
self.bufferLock.lock()
self.currentBuffer = self.availableBuffers.popLast()
self.bufferLock.unlock()
self.currentBufferSize = 0
self.currentPacketDescs.removeAll(keepingCapacity: true)
}
let bytes = inputData.assumingMemoryBound(to: UInt8.self)
let packetCount = Int(numberPackets)
for index in 0..<packetCount {
let packetOffset: Int
let packetSize: Int
if let packetDescriptions {
packetOffset = Int(packetDescriptions[index].mStartOffset)
packetSize = Int(packetDescriptions[index].mDataByteSize)
} else {
let size = Int(numberBytes) / packetCount
packetOffset = index * size
packetSize = size
}
if packetSize > Self.bufferSize {
continue
}
if self.currentBufferSize + packetSize > Self.bufferSize {
self.enqueueCurrentBuffer()
}
guard let buffer = self.currentBuffer else { continue }
let dest = buffer.pointee.mAudioData.advanced(by: self.currentBufferSize)
memcpy(dest, bytes.advanced(by: packetOffset), packetSize)
let desc = AudioStreamPacketDescription(
mStartOffset: Int64(self.currentBufferSize),
mVariableFramesInPacket: 0,
mDataByteSize: UInt32(packetSize))
self.currentPacketDescs.append(desc)
self.currentBufferSize += packetSize
}
}
private func currentTimeSeconds() -> Double? {
guard let audioQueue, sampleRate > 0 else { return nil }
var timeStamp = AudioTimeStamp()
let status = AudioQueueGetCurrentTime(audioQueue, nil, &timeStamp, nil)
if status != noErr { return nil }
if timeStamp.mSampleTime.isNaN { return nil }
return timeStamp.mSampleTime / sampleRate
}
}
private func propertyListenerProc(
inClientData: UnsafeMutableRawPointer,
inAudioFileStream: AudioFileStreamID,
inPropertyID: AudioFileStreamPropertyID,
ioFlags: UnsafeMutablePointer<AudioFileStreamPropertyFlags>)
{
let playback = Unmanaged<Playback>.fromOpaque(inClientData).takeUnretainedValue()
if inPropertyID == kAudioFileStreamProperty_DataFormat {
var format = AudioStreamBasicDescription()
var size = UInt32(MemoryLayout<AudioStreamBasicDescription>.size)
let status = AudioFileStreamGetProperty(inAudioFileStream, inPropertyID, &size, &format)
if status == noErr {
playback.audioFormat = format
playback.setupQueueIfNeeded(format)
}
} else if inPropertyID == kAudioFileStreamProperty_PacketSizeUpperBound {
var maxPacketSize: UInt32 = 0
var size = UInt32(MemoryLayout<UInt32>.size)
let status = AudioFileStreamGetProperty(inAudioFileStream, inPropertyID, &size, &maxPacketSize)
if status == noErr {
playback.maxPacketSize = maxPacketSize
}
}
}
private func packetsProc(
inClientData: UnsafeMutableRawPointer,
inNumberBytes: UInt32,
inNumberPackets: UInt32,
inInputData: UnsafeRawPointer,
inPacketDescriptions: UnsafeMutablePointer<AudioStreamPacketDescription>?)
{
let playback = Unmanaged<Playback>.fromOpaque(inClientData).takeUnretainedValue()
playback.handlePackets(
numberBytes: inNumberBytes,
numberPackets: inNumberPackets,
inputData: inInputData,
packetDescriptions: inPacketDescriptions)
}
private func outputCallbackProc(
inUserData: UnsafeMutableRawPointer?,
inAQ: AudioQueueRef,
inBuffer: AudioQueueBufferRef)
{
guard let inUserData else { return }
let playback = Unmanaged<Playback>.fromOpaque(inUserData).takeUnretainedValue()
playback.bufferLock.lock()
playback.availableBuffers.append(inBuffer)
playback.bufferLock.unlock()
playback.bufferSemaphore.signal()
}
private func isRunningCallbackProc(
inUserData: UnsafeMutableRawPointer?,
inAQ: AudioQueueRef,
inID: AudioQueuePropertyID)
{
guard let inUserData else { return }
guard inID == kAudioQueueProperty_IsRunning else { return }
let playback = Unmanaged<Playback>.fromOpaque(inUserData).takeUnretainedValue()
var running: UInt32 = 0
var size = UInt32(MemoryLayout<UInt32>.size)
let status = AudioQueueGetProperty(inAQ, kAudioQueueProperty_IsRunning, &running, &size)
if status != noErr { return }
if running == 0, playback.inputFinished {
playback.finish(StreamingPlaybackResult(finished: true, interruptedAt: nil))
}
}

View File

@@ -1,51 +0,0 @@
public enum TalkTTSValidation: Sendable {
private static let v3StabilityValues: Set<Double> = [0.0, 0.5, 1.0]
public static func resolveSpeed(speed: Double?, rateWPM: Int?) -> Double? {
if let rateWPM, rateWPM > 0 {
let resolved = Double(rateWPM) / 175.0
if resolved <= 0.5 || resolved >= 2.0 { return nil }
return resolved
}
if let speed {
if speed <= 0.5 || speed >= 2.0 { return nil }
return speed
}
return nil
}
public static func validatedUnit(_ value: Double?) -> Double? {
guard let value else { return nil }
if value < 0 || value > 1 { return nil }
return value
}
public static func validatedStability(_ value: Double?, modelId: String?) -> Double? {
guard let value else { return nil }
let normalizedModel = (modelId ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
if normalizedModel == "eleven_v3" {
return v3StabilityValues.contains(value) ? value : nil
}
return validatedUnit(value)
}
public static func validatedSeed(_ value: Int?) -> UInt32? {
guard let value else { return nil }
if value < 0 || value > 4294967295 { return nil }
return UInt32(value)
}
public static func validatedLatencyTier(_ value: Int?) -> Int? {
guard let value else { return nil }
if value < 0 || value > 4 { return nil }
return value
}
public static func pcmSampleRate(from outputFormat: String?) -> Double? {
let trimmed = (outputFormat ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
guard trimmed.hasPrefix("pcm_") else { return nil }
let parts = trimmed.split(separator: "_", maxSplits: 1)
guard parts.count == 2, let rate = Double(parts[1]), rate > 0 else { return nil }
return rate
}
}

View File

@@ -4,7 +4,7 @@ import XCTest
final class ElevenLabsTTSValidationTests: XCTestCase {
func testValidatedOutputFormatAllowsOnlyMp3Presets() {
XCTAssertEqual(ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128"), "mp3_44100_128")
XCTAssertNil(ElevenLabsTTSClient.validatedOutputFormat("pcm_16000"))
XCTAssertEqual(ElevenLabsTTSClient.validatedOutputFormat("pcm_16000"), "pcm_16000")
}
func testValidatedLanguageAcceptsTwoLetterCodes() {
@@ -17,4 +17,3 @@ final class ElevenLabsTTSValidationTests: XCTestCase {
XCTAssertNil(ElevenLabsTTSClient.validatedNormalize("maybe"))
}
}

View File

@@ -1,26 +0,0 @@
import XCTest
@testable import ClawdisKit
final class PCMStreamingAudioPlayerTests: XCTestCase {
@MainActor
func testStopDuringPCMStreamReturnsInterruptedResult() async {
var continuation: AsyncThrowingStream<Data, Error>.Continuation?
let stream = AsyncThrowingStream<Data, Error> { cont in
continuation = cont
let samples = Data(repeating: 0, count: 44_100)
cont.yield(samples)
}
let task = Task { @MainActor in
await PCMStreamingAudioPlayer.shared.play(stream: stream, sampleRate: 44_100)
}
try? await Task.sleep(nanoseconds: 120_000_000)
let interruptedAt = PCMStreamingAudioPlayer.shared.stop()
continuation?.finish()
let result = await task.value
XCTAssertFalse(result.finished)
XCTAssertNotNil(interruptedAt)
}
}

View File

@@ -1,45 +0,0 @@
import XCTest
@testable import ClawdisKit
final class TalkTTSValidationTests: XCTestCase {
func testResolveSpeedUsesRateWPMWhenProvided() {
let resolved = TalkTTSValidation.resolveSpeed(speed: nil, rateWPM: 175)
XCTAssertNotNil(resolved)
XCTAssertEqual(resolved ?? 0, 1.0, accuracy: 0.0001)
XCTAssertNil(TalkTTSValidation.resolveSpeed(speed: nil, rateWPM: 400))
}
func testValidatedUnitBounds() {
XCTAssertEqual(TalkTTSValidation.validatedUnit(0), 0)
XCTAssertEqual(TalkTTSValidation.validatedUnit(1), 1)
XCTAssertNil(TalkTTSValidation.validatedUnit(-0.01))
XCTAssertNil(TalkTTSValidation.validatedUnit(1.01))
}
func testValidatedStability() {
XCTAssertEqual(TalkTTSValidation.validatedStability(0, modelId: "eleven_v3"), 0)
XCTAssertEqual(TalkTTSValidation.validatedStability(0.5, modelId: "eleven_v3"), 0.5)
XCTAssertEqual(TalkTTSValidation.validatedStability(1, modelId: "eleven_v3"), 1)
XCTAssertNil(TalkTTSValidation.validatedStability(0.7, modelId: "eleven_v3"))
XCTAssertEqual(TalkTTSValidation.validatedStability(0.7, modelId: "eleven_multilingual_v2"), 0.7)
}
func testValidatedSeedBounds() {
XCTAssertEqual(TalkTTSValidation.validatedSeed(0), 0)
XCTAssertEqual(TalkTTSValidation.validatedSeed(1234), 1234)
XCTAssertNil(TalkTTSValidation.validatedSeed(-1))
}
func testValidatedLatencyTier() {
XCTAssertEqual(TalkTTSValidation.validatedLatencyTier(0), 0)
XCTAssertEqual(TalkTTSValidation.validatedLatencyTier(4), 4)
XCTAssertNil(TalkTTSValidation.validatedLatencyTier(-1))
XCTAssertNil(TalkTTSValidation.validatedLatencyTier(5))
}
func testPcmSampleRateParse() {
XCTAssertEqual(TalkTTSValidation.pcmSampleRate(from: "pcm_44100"), 44100)
XCTAssertNil(TalkTTSValidation.pcmSampleRate(from: "mp3_44100_128"))
XCTAssertNil(TalkTTSValidation.pcmSampleRate(from: "pcm_bad"))
}
}