chore: lint and format cleanup

This commit is contained in:
Peter Steinberger
2026-01-04 16:24:10 +01:00
parent fd95ededaa
commit 026a25d164
45 changed files with 627 additions and 496 deletions

View File

@@ -436,14 +436,49 @@ actor TalkModeRuntime {
}
}
// swiftlint:disable:next cyclomatic_complexity function_body_length
private func playAssistant(text: String) async {
guard let input = await self.preparePlaybackInput(text: text) else { return }
do {
if let apiKey = input.apiKey, !apiKey.isEmpty, let voiceId = input.voiceId {
try await self.playElevenLabs(input: input, apiKey: apiKey, voiceId: voiceId)
} else {
try await self.playSystemVoice(input: input)
}
} catch {
self.ttsLogger
.error(
"talk TTS failed: \(error.localizedDescription, privacy: .public); " +
"falling back to system voice")
do {
try await self.playSystemVoice(input: input)
} catch {
self.ttsLogger.error("talk system voice failed: \(error.localizedDescription, privacy: .public)")
}
}
if self.phase == .speaking {
self.phase = .thinking
await MainActor.run { TalkModeController.shared.updatePhase(.thinking) }
}
}
private struct TalkPlaybackInput {
let generation: Int
let cleanedText: String
let directive: TalkDirective?
let apiKey: String?
let voiceId: String?
let language: String?
let synthTimeoutSeconds: Double
}
private func preparePlaybackInput(text: String) async -> TalkPlaybackInput? {
let gen = self.lifecycleGeneration
let parse = TalkDirectiveParser.parse(text)
let directive = parse.directive
let cleaned = parse.stripped.trimmingCharacters(in: .whitespacesAndNewlines)
guard !cleaned.isEmpty else { return }
guard self.isCurrent(gen) else { return }
guard !cleaned.isEmpty else { return nil }
guard self.isCurrent(gen) else { return nil }
if !parse.unknownKeys.isEmpty {
self.logger
@@ -504,116 +539,123 @@ actor TalkModeRuntime {
let synthTimeoutSeconds = max(20.0, min(90.0, Double(cleaned.count) * 0.12))
do {
if let apiKey, !apiKey.isEmpty, let voiceId {
let desiredOutputFormat = directive?.outputFormat ?? self.defaultOutputFormat ?? "pcm_44100"
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(desiredOutputFormat)
if outputFormat == nil, !desiredOutputFormat.isEmpty {
self.logger
.warning(
"talk output_format unsupported for local playback: " +
"\(desiredOutputFormat, privacy: .public)")
}
guard self.isCurrent(gen) else { return nil }
let modelId = directive?.modelId ?? self.currentModelId ?? self.defaultModelId
func makeRequest(outputFormat: String?) -> ElevenLabsTTSRequest {
ElevenLabsTTSRequest(
text: cleaned,
modelId: modelId,
outputFormat: outputFormat,
speed: TalkTTSValidation.resolveSpeed(speed: directive?.speed, rateWPM: directive?.rateWPM),
stability: TalkTTSValidation.validatedStability(directive?.stability, modelId: modelId),
similarity: TalkTTSValidation.validatedUnit(directive?.similarity),
style: TalkTTSValidation.validatedUnit(directive?.style),
speakerBoost: directive?.speakerBoost,
seed: TalkTTSValidation.validatedSeed(directive?.seed),
normalize: ElevenLabsTTSClient.validatedNormalize(directive?.normalize),
language: language,
latencyTier: TalkTTSValidation.validatedLatencyTier(directive?.latencyTier))
}
return TalkPlaybackInput(
generation: gen,
cleanedText: cleaned,
directive: directive,
apiKey: apiKey,
voiceId: voiceId,
language: language,
synthTimeoutSeconds: synthTimeoutSeconds)
}
let request = makeRequest(outputFormat: outputFormat)
self.ttsLogger.info("talk TTS synth timeout=\(synthTimeoutSeconds, privacy: .public)s")
let client = ElevenLabsTTSClient(apiKey: apiKey)
let stream = client.streamSynthesize(voiceId: voiceId, request: request)
guard self.isCurrent(gen) else { return }
if self.interruptOnSpeech {
await self.startRecognition()
guard self.isCurrent(gen) else { return }
}
await MainActor.run { TalkModeController.shared.updatePhase(.speaking) }
self.phase = .speaking
let sampleRate = TalkTTSValidation.pcmSampleRate(from: outputFormat)
var result: StreamingPlaybackResult
if let sampleRate {
self.lastPlaybackWasPCM = true
result = await self.playPCM(stream: stream, sampleRate: sampleRate)
if !result.finished, result.interruptedAt == nil {
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100")
self.ttsLogger.warning("talk pcm playback failed; retrying mp3")
self.lastPlaybackWasPCM = false
let mp3Stream = client.streamSynthesize(
voiceId: voiceId,
request: makeRequest(outputFormat: mp3Format))
result = await self.playMP3(stream: mp3Stream)
}
} else {
self.lastPlaybackWasPCM = false
result = await self.playMP3(stream: stream)
}
self.ttsLogger
.info(
"talk audio result finished=\(result.finished, privacy: .public) " +
"interruptedAt=\(String(describing: result.interruptedAt), privacy: .public)")
if !result.finished, result.interruptedAt == nil {
throw NSError(domain: "StreamingAudioPlayer", code: 1, userInfo: [
NSLocalizedDescriptionKey: "audio playback failed",
])
}
if !result.finished, let interruptedAt = result.interruptedAt, self.phase == .speaking {
if self.interruptOnSpeech {
self.lastInterruptedAtSeconds = interruptedAt
}
}
} else {
self.ttsLogger.info("talk system voice start chars=\(cleaned.count, privacy: .public)")
if self.interruptOnSpeech {
await self.startRecognition()
guard self.isCurrent(gen) else { return }
}
await MainActor.run { TalkModeController.shared.updatePhase(.speaking) }
self.phase = .speaking
await TalkSystemSpeechSynthesizer.shared.stop()
try await TalkSystemSpeechSynthesizer.shared.speak(text: cleaned, language: language)
self.ttsLogger.info("talk system voice done")
}
} catch {
self.ttsLogger
.error(
"talk TTS failed: \(error.localizedDescription, privacy: .public); " +
"falling back to system voice")
do {
if self.interruptOnSpeech {
await self.startRecognition()
guard self.isCurrent(gen) else { return }
}
await MainActor.run { TalkModeController.shared.updatePhase(.speaking) }
self.phase = .speaking
await TalkSystemSpeechSynthesizer.shared.stop()
try await TalkSystemSpeechSynthesizer.shared.speak(text: cleaned, language: language)
} catch {
self.ttsLogger.error("talk system voice failed: \(error.localizedDescription, privacy: .public)")
}
private func playElevenLabs(input: TalkPlaybackInput, apiKey: String, voiceId: String) async throws {
let desiredOutputFormat = input.directive?.outputFormat ?? self.defaultOutputFormat ?? "pcm_44100"
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(desiredOutputFormat)
if outputFormat == nil, !desiredOutputFormat.isEmpty {
self.logger
.warning(
"talk output_format unsupported for local playback: " +
"\(desiredOutputFormat, privacy: .public)")
}
if self.phase == .speaking {
self.phase = .thinking
await MainActor.run { TalkModeController.shared.updatePhase(.thinking) }
let modelId = input.directive?.modelId ?? self.currentModelId ?? self.defaultModelId
func makeRequest(outputFormat: String?) -> ElevenLabsTTSRequest {
ElevenLabsTTSRequest(
text: input.cleanedText,
modelId: modelId,
outputFormat: outputFormat,
speed: TalkTTSValidation.resolveSpeed(
speed: input.directive?.speed,
rateWPM: input.directive?.rateWPM),
stability: TalkTTSValidation.validatedStability(
input.directive?.stability,
modelId: modelId),
similarity: TalkTTSValidation.validatedUnit(input.directive?.similarity),
style: TalkTTSValidation.validatedUnit(input.directive?.style),
speakerBoost: input.directive?.speakerBoost,
seed: TalkTTSValidation.validatedSeed(input.directive?.seed),
normalize: ElevenLabsTTSClient.validatedNormalize(input.directive?.normalize),
language: input.language,
latencyTier: TalkTTSValidation.validatedLatencyTier(input.directive?.latencyTier))
}
let request = makeRequest(outputFormat: outputFormat)
self.ttsLogger.info("talk TTS synth timeout=\(input.synthTimeoutSeconds, privacy: .public)s")
let client = ElevenLabsTTSClient(apiKey: apiKey)
let stream = client.streamSynthesize(voiceId: voiceId, request: request)
guard self.isCurrent(input.generation) else { return }
if self.interruptOnSpeech, ! await self.prepareForPlayback(generation: input.generation) { return }
await MainActor.run { TalkModeController.shared.updatePhase(.speaking) }
self.phase = .speaking
let result = await self.playRemoteStream(
client: client,
voiceId: voiceId,
outputFormat: outputFormat,
makeRequest: makeRequest,
stream: stream)
self.ttsLogger
.info(
"talk audio result finished=\(result.finished, privacy: .public) " +
"interruptedAt=\(String(describing: result.interruptedAt), privacy: .public)")
if !result.finished, result.interruptedAt == nil {
throw NSError(domain: "StreamingAudioPlayer", code: 1, userInfo: [
NSLocalizedDescriptionKey: "audio playback failed",
])
}
if !result.finished, let interruptedAt = result.interruptedAt, self.phase == .speaking {
if self.interruptOnSpeech {
self.lastInterruptedAtSeconds = interruptedAt
}
}
}
private func playRemoteStream(
client: ElevenLabsTTSClient,
voiceId: String,
outputFormat: String?,
makeRequest: (String?) -> ElevenLabsTTSRequest,
stream: AsyncThrowingStream<Data, Error>) async -> StreamingPlaybackResult
{
let sampleRate = TalkTTSValidation.pcmSampleRate(from: outputFormat)
if let sampleRate {
self.lastPlaybackWasPCM = true
let result = await self.playPCM(stream: stream, sampleRate: sampleRate)
if result.finished || result.interruptedAt != nil {
return result
}
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100")
self.ttsLogger.warning("talk pcm playback failed; retrying mp3")
self.lastPlaybackWasPCM = false
let mp3Stream = client.streamSynthesize(
voiceId: voiceId,
request: makeRequest(mp3Format))
return await self.playMP3(stream: mp3Stream)
}
self.lastPlaybackWasPCM = false
return await self.playMP3(stream: stream)
}
private func playSystemVoice(input: TalkPlaybackInput) async throws {
self.ttsLogger.info("talk system voice start chars=\(input.cleanedText.count, privacy: .public)")
if self.interruptOnSpeech, ! await self.prepareForPlayback(generation: input.generation) { return }
await MainActor.run { TalkModeController.shared.updatePhase(.speaking) }
self.phase = .speaking
await TalkSystemSpeechSynthesizer.shared.stop()
try await TalkSystemSpeechSynthesizer.shared.speak(
text: input.cleanedText,
language: input.language)
self.ttsLogger.info("talk system voice done")
}
private func prepareForPlayback(generation: Int) async -> Bool {
await self.startRecognition()
return self.isCurrent(generation)
}
private func resolveVoiceId(preferred: String?, apiKey: String) async -> String? {
@@ -682,6 +724,9 @@ actor TalkModeRuntime {
self.phase = .thinking
await MainActor.run { TalkModeController.shared.updatePhase(.thinking) }
}
}
extension TalkModeRuntime {
// MARK: - Audio playback (MainActor helpers)