Files
clawdbot/apps/shared/ClawdbotKit/Sources/ClawdbotKit/TalkSystemSpeechSynthesizer.swift
2026-01-04 16:24:17 +01:00

117 lines
3.6 KiB
Swift

import AVFoundation
import Foundation
@MainActor
public final class TalkSystemSpeechSynthesizer: NSObject {
public enum SpeakError: Error {
case canceled
}
public static let shared = TalkSystemSpeechSynthesizer()
private let synth = AVSpeechSynthesizer()
private var speakContinuation: CheckedContinuation<Void, Error>?
private var currentUtterance: AVSpeechUtterance?
private var currentToken = UUID()
private var watchdog: Task<Void, Never>?
public var isSpeaking: Bool { self.synth.isSpeaking }
override private init() {
super.init()
self.synth.delegate = self
}
public func stop() {
self.currentToken = UUID()
self.watchdog?.cancel()
self.watchdog = nil
self.synth.stopSpeaking(at: .immediate)
self.finishCurrent(with: SpeakError.canceled)
}
public func speak(text: String, language: String? = nil) async throws {
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return }
self.stop()
let token = UUID()
self.currentToken = token
let utterance = AVSpeechUtterance(string: trimmed)
if let language, let voice = AVSpeechSynthesisVoice(language: language) {
utterance.voice = voice
}
self.currentUtterance = utterance
let estimatedSeconds = max(3.0, min(180.0, Double(trimmed.count) * 0.08))
self.watchdog?.cancel()
self.watchdog = Task { @MainActor [weak self] in
guard let self else { return }
try? await Task.sleep(nanoseconds: UInt64(estimatedSeconds * 1_000_000_000))
if Task.isCancelled { return }
guard self.currentToken == token else { return }
if self.synth.isSpeaking {
self.synth.stopSpeaking(at: .immediate)
}
self.finishCurrent(
with: NSError(domain: "TalkSystemSpeechSynthesizer", code: 408, userInfo: [
NSLocalizedDescriptionKey: "system TTS timed out after \(estimatedSeconds)s",
]))
}
try await withTaskCancellationHandler(operation: {
try await withCheckedThrowingContinuation { cont in
self.speakContinuation = cont
self.synth.speak(utterance)
}
}, onCancel: {
Task { @MainActor in
self.stop()
}
})
if self.currentToken != token {
throw SpeakError.canceled
}
}
private func handleFinish(error: Error?) {
guard self.currentUtterance != nil else { return }
self.watchdog?.cancel()
self.watchdog = nil
self.finishCurrent(with: error)
}
private func finishCurrent(with error: Error?) {
self.currentUtterance = nil
let cont = self.speakContinuation
self.speakContinuation = nil
if let error {
cont?.resume(throwing: error)
} else {
cont?.resume(returning: ())
}
}
}
extension TalkSystemSpeechSynthesizer: AVSpeechSynthesizerDelegate {
public nonisolated func speechSynthesizer(
_ synthesizer: AVSpeechSynthesizer,
didFinish utterance: AVSpeechUtterance)
{
Task { @MainActor in
self.handleFinish(error: nil)
}
}
public nonisolated func speechSynthesizer(
_ synthesizer: AVSpeechSynthesizer,
didCancel utterance: AVSpeechUtterance)
{
Task { @MainActor in
self.handleFinish(error: SpeakError.canceled)
}
}
}