fix(talk): harden playback, interrupts, and timeouts
This commit is contained in:
@@ -13,8 +13,13 @@
|
|||||||
- macOS Talk Mode: orb overlay refresh, ElevenLabs request logging, API key status in settings, and auto-select first voice when none is configured.
|
- macOS Talk Mode: orb overlay refresh, ElevenLabs request logging, API key status in settings, and auto-select first voice when none is configured.
|
||||||
- macOS Talk Mode: add hard timeout around ElevenLabs TTS synthesis to avoid getting stuck “speaking” forever on hung requests.
|
- macOS Talk Mode: add hard timeout around ElevenLabs TTS synthesis to avoid getting stuck “speaking” forever on hung requests.
|
||||||
- macOS Talk Mode: avoid stuck playback when the audio player never starts (fail-fast + watchdog).
|
- macOS Talk Mode: avoid stuck playback when the audio player never starts (fail-fast + watchdog).
|
||||||
|
- macOS Talk Mode: fix audio stop ordering so disabling Talk Mode always stops in-flight playback.
|
||||||
|
- macOS Talk Mode: throttle audio-level updates (avoid per-buffer task creation) to reduce CPU/task churn.
|
||||||
- macOS Talk Mode: increase overlay window size so wave rings don’t clip; close button is hover-only and closer to the orb.
|
- macOS Talk Mode: increase overlay window size so wave rings don’t clip; close button is hover-only and closer to the orb.
|
||||||
- Talk Mode: wait for chat history to surface the assistant reply before starting TTS (macOS/iOS/Android).
|
- Talk Mode: wait for chat history to surface the assistant reply before starting TTS (macOS/iOS/Android).
|
||||||
|
- iOS Talk Mode: fix chat completion wait to time out even if no events arrive (prevents “Thinking…” hangs).
|
||||||
|
- iOS Talk Mode: keep recognition running during playback to support interrupt-on-speech.
|
||||||
|
- iOS Talk Mode: preserve directive voice/model overrides across config reloads and add ElevenLabs request timeouts.
|
||||||
- iOS/Android Talk Mode: explicitly `chat.subscribe` when Talk Mode is active, so completion events arrive even if the Chat UI isn’t open.
|
- iOS/Android Talk Mode: explicitly `chat.subscribe` when Talk Mode is active, so completion events arrive even if the Chat UI isn’t open.
|
||||||
- Chat UI: refresh history when another client finishes a run in the same session, so Talk Mode + Voice Wake transcripts appear consistently.
|
- Chat UI: refresh history when another client finishes a run in the same session, so Talk Mode + Voice Wake transcripts appear consistently.
|
||||||
- Gateway: `voice.transcript` now also maps agent bus output to `chat` events, ensuring chat UIs refresh for voice-triggered runs.
|
- Gateway: `voice.transcript` now also maps agent bus output to `chat` events, ensuring chat UIs refresh for voice-triggered runs.
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ import AVFAudio
|
|||||||
import ClawdisKit
|
import ClawdisKit
|
||||||
import Foundation
|
import Foundation
|
||||||
import Observation
|
import Observation
|
||||||
import Speech
|
|
||||||
import OSLog
|
import OSLog
|
||||||
|
import Speech
|
||||||
|
|
||||||
@MainActor
|
@MainActor
|
||||||
@Observable
|
@Observable
|
||||||
@@ -29,6 +29,8 @@ final class TalkModeManager: NSObject {
|
|||||||
private var currentVoiceId: String?
|
private var currentVoiceId: String?
|
||||||
private var defaultModelId: String?
|
private var defaultModelId: String?
|
||||||
private var currentModelId: String?
|
private var currentModelId: String?
|
||||||
|
private var voiceOverrideActive = false
|
||||||
|
private var modelOverrideActive = false
|
||||||
private var defaultOutputFormat: String?
|
private var defaultOutputFormat: String?
|
||||||
private var apiKey: String?
|
private var apiKey: String?
|
||||||
private var interruptOnSpeech: Bool = true
|
private var interruptOnSpeech: Bool = true
|
||||||
@@ -101,6 +103,12 @@ final class TalkModeManager: NSObject {
|
|||||||
self.silenceTask = nil
|
self.silenceTask = nil
|
||||||
self.stopRecognition()
|
self.stopRecognition()
|
||||||
self.stopSpeaking()
|
self.stopSpeaking()
|
||||||
|
self.lastInterruptedAtSeconds = nil
|
||||||
|
do {
|
||||||
|
try AVAudioSession.sharedInstance().setActive(false, options: [.notifyOthersOnDeactivation])
|
||||||
|
} catch {
|
||||||
|
self.logger.warning("audio session deactivate failed: \(error.localizedDescription, privacy: .public)")
|
||||||
|
}
|
||||||
Task { await self.unsubscribeAllChats() }
|
Task { await self.unsubscribeAllChats() }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,6 +117,7 @@ final class TalkModeManager: NSObject {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private func startRecognition() throws {
|
private func startRecognition() throws {
|
||||||
|
self.stopRecognition()
|
||||||
self.speechRecognizer = SFSpeechRecognizer()
|
self.speechRecognizer = SFSpeechRecognizer()
|
||||||
guard let recognizer = self.speechRecognizer else {
|
guard let recognizer = self.speechRecognizer else {
|
||||||
throw NSError(domain: "TalkMode", code: 1, userInfo: [
|
throw NSError(domain: "TalkMode", code: 1, userInfo: [
|
||||||
@@ -132,7 +141,10 @@ final class TalkModeManager: NSObject {
|
|||||||
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self] result, error in
|
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self] result, error in
|
||||||
guard let self else { return }
|
guard let self else { return }
|
||||||
if let error {
|
if let error {
|
||||||
self.statusText = "Speech error: \(error.localizedDescription)"
|
if !self.isSpeaking {
|
||||||
|
self.statusText = "Speech error: \(error.localizedDescription)"
|
||||||
|
}
|
||||||
|
self.logger.debug("speech recognition error: \(error.localizedDescription, privacy: .public)")
|
||||||
}
|
}
|
||||||
guard let result else { return }
|
guard let result else { return }
|
||||||
let transcript = result.bestTranscription.formattedString
|
let transcript = result.bestTranscription.formattedString
|
||||||
@@ -189,7 +201,7 @@ final class TalkModeManager: NSObject {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private func checkSilence() async {
|
private func checkSilence() async {
|
||||||
guard self.isListening else { return }
|
guard self.isListening, !self.isSpeaking else { return }
|
||||||
let transcript = self.lastTranscript.trimmingCharacters(in: .whitespacesAndNewlines)
|
let transcript = self.lastTranscript.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
guard !transcript.isEmpty else { return }
|
guard !transcript.isEmpty else { return }
|
||||||
guard let lastHeard else { return }
|
guard let lastHeard else { return }
|
||||||
@@ -219,10 +231,21 @@ final class TalkModeManager: NSObject {
|
|||||||
self.logger.info("chat.send start chars=\(prompt.count, privacy: .public)")
|
self.logger.info("chat.send start chars=\(prompt.count, privacy: .public)")
|
||||||
let runId = try await self.sendChat(prompt, bridge: bridge)
|
let runId = try await self.sendChat(prompt, bridge: bridge)
|
||||||
self.logger.info("chat.send ok runId=\(runId, privacy: .public)")
|
self.logger.info("chat.send ok runId=\(runId, privacy: .public)")
|
||||||
let ok = await self.waitForChatFinal(runId: runId, bridge: bridge)
|
let completion = await self.waitForChatCompletion(runId: runId, bridge: bridge, timeoutSeconds: 120)
|
||||||
if !ok {
|
guard completion == .final else {
|
||||||
self.statusText = "No reply"
|
switch completion {
|
||||||
self.logger.warning("chat final timeout runId=\(runId, privacy: .public)")
|
case .timeout:
|
||||||
|
self.statusText = "No reply"
|
||||||
|
self.logger.warning("chat completion timeout runId=\(runId, privacy: .public)")
|
||||||
|
case .aborted:
|
||||||
|
self.statusText = "Aborted"
|
||||||
|
self.logger.warning("chat completion aborted runId=\(runId, privacy: .public)")
|
||||||
|
case .error:
|
||||||
|
self.statusText = "Chat error"
|
||||||
|
self.logger.warning("chat completion error runId=\(runId, privacy: .public)")
|
||||||
|
case .final:
|
||||||
|
break
|
||||||
|
}
|
||||||
await self.start()
|
await self.start()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -259,7 +282,9 @@ final class TalkModeManager: NSObject {
|
|||||||
self.chatSubscribedSessionKeys.insert(key)
|
self.chatSubscribedSessionKeys.insert(key)
|
||||||
self.logger.info("chat.subscribe ok sessionKey=\(key, privacy: .public)")
|
self.logger.info("chat.subscribe ok sessionKey=\(key, privacy: .public)")
|
||||||
} catch {
|
} catch {
|
||||||
self.logger.warning("chat.subscribe failed sessionKey=\(key, privacy: .public) err=\(error.localizedDescription, privacy: .public)")
|
self.logger
|
||||||
|
.warning(
|
||||||
|
"chat.subscribe failed sessionKey=\(key, privacy: .public) err=\(error.localizedDescription, privacy: .public)")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -294,6 +319,22 @@ final class TalkModeManager: NSObject {
|
|||||||
return lines.joined(separator: "\n")
|
return lines.joined(separator: "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private enum ChatCompletionState: CustomStringConvertible {
|
||||||
|
case final
|
||||||
|
case aborted
|
||||||
|
case error
|
||||||
|
case timeout
|
||||||
|
|
||||||
|
var description: String {
|
||||||
|
switch self {
|
||||||
|
case .final: "final"
|
||||||
|
case .aborted: "aborted"
|
||||||
|
case .error: "error"
|
||||||
|
case .timeout: "timeout"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private func sendChat(_ message: String, bridge: BridgeSession) async throws -> String {
|
private func sendChat(_ message: String, bridge: BridgeSession) async throws -> String {
|
||||||
struct SendResponse: Decodable { let runId: String }
|
struct SendResponse: Decodable { let runId: String }
|
||||||
let payload: [String: Any] = [
|
let payload: [String: Any] = [
|
||||||
@@ -310,20 +351,39 @@ final class TalkModeManager: NSObject {
|
|||||||
return decoded.runId
|
return decoded.runId
|
||||||
}
|
}
|
||||||
|
|
||||||
private func waitForChatFinal(runId: String, bridge: BridgeSession) async -> Bool {
|
private func waitForChatCompletion(
|
||||||
|
runId: String,
|
||||||
|
bridge: BridgeSession,
|
||||||
|
timeoutSeconds: Int = 120) async -> ChatCompletionState
|
||||||
|
{
|
||||||
let stream = await bridge.subscribeServerEvents(bufferingNewest: 200)
|
let stream = await bridge.subscribeServerEvents(bufferingNewest: 200)
|
||||||
let timeout = Date().addingTimeInterval(120)
|
return await withTaskGroup(of: ChatCompletionState.self) { group in
|
||||||
for await evt in stream {
|
group.addTask { [runId] in
|
||||||
if Date() > timeout { return false }
|
for await evt in stream {
|
||||||
guard evt.event == "chat", let payload = evt.payloadJSON else { continue }
|
if Task.isCancelled { return .timeout }
|
||||||
guard let data = payload.data(using: .utf8) else { continue }
|
guard evt.event == "chat", let payload = evt.payloadJSON else { continue }
|
||||||
guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else { continue }
|
guard let data = payload.data(using: .utf8) else { continue }
|
||||||
if (json["runId"] as? String) != runId { continue }
|
guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else { continue }
|
||||||
if let state = json["state"] as? String, state == "final" {
|
if (json["runId"] as? String) != runId { continue }
|
||||||
return true
|
if let state = json["state"] as? String {
|
||||||
|
switch state {
|
||||||
|
case "final": return .final
|
||||||
|
case "aborted": return .aborted
|
||||||
|
case "error": return .error
|
||||||
|
default: break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return .timeout
|
||||||
}
|
}
|
||||||
|
group.addTask {
|
||||||
|
try? await Task.sleep(nanoseconds: UInt64(timeoutSeconds) * 1_000_000_000)
|
||||||
|
return .timeout
|
||||||
|
}
|
||||||
|
let result = await group.next() ?? .timeout
|
||||||
|
group.cancelAll()
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private func waitForAssistantText(
|
private func waitForAssistantText(
|
||||||
@@ -370,11 +430,13 @@ final class TalkModeManager: NSObject {
|
|||||||
if let voice = directive?.voiceId {
|
if let voice = directive?.voiceId {
|
||||||
if directive?.once != true {
|
if directive?.once != true {
|
||||||
self.currentVoiceId = voice
|
self.currentVoiceId = voice
|
||||||
|
self.voiceOverrideActive = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if let model = directive?.modelId {
|
if let model = directive?.modelId {
|
||||||
if directive?.once != true {
|
if directive?.once != true {
|
||||||
self.currentModelId = model
|
self.currentModelId = model
|
||||||
|
self.modelOverrideActive = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -394,16 +456,21 @@ final class TalkModeManager: NSObject {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
self.statusText = "Speaking…"
|
self.statusText = "Generating voice…"
|
||||||
self.isSpeaking = true
|
self.isSpeaking = true
|
||||||
self.lastSpokenText = cleaned
|
self.lastSpokenText = cleaned
|
||||||
|
|
||||||
do {
|
do {
|
||||||
let started = Date()
|
let started = Date()
|
||||||
|
let desiredOutputFormat = directive?.outputFormat ?? self.defaultOutputFormat
|
||||||
|
let outputFormat = TalkModeRuntime.validatedOutputFormat(desiredOutputFormat)
|
||||||
|
if outputFormat == nil, let desiredOutputFormat, !desiredOutputFormat.isEmpty {
|
||||||
|
self.logger.warning("talk output_format unsupported for local playback: \(desiredOutputFormat, privacy: .public)")
|
||||||
|
}
|
||||||
let request = ElevenLabsRequest(
|
let request = ElevenLabsRequest(
|
||||||
text: cleaned,
|
text: cleaned,
|
||||||
modelId: directive?.modelId ?? self.currentModelId ?? self.defaultModelId,
|
modelId: directive?.modelId ?? self.currentModelId ?? self.defaultModelId,
|
||||||
outputFormat: directive?.outputFormat ?? self.defaultOutputFormat,
|
outputFormat: outputFormat,
|
||||||
speed: TalkModeRuntime.resolveSpeed(
|
speed: TalkModeRuntime.resolveSpeed(
|
||||||
speed: directive?.speed,
|
speed: directive?.speed,
|
||||||
rateWPM: directive?.rateWPM),
|
rateWPM: directive?.rateWPM),
|
||||||
@@ -414,16 +481,43 @@ final class TalkModeManager: NSObject {
|
|||||||
seed: TalkModeRuntime.validatedSeed(directive?.seed),
|
seed: TalkModeRuntime.validatedSeed(directive?.seed),
|
||||||
normalize: TalkModeRuntime.validatedNormalize(directive?.normalize),
|
normalize: TalkModeRuntime.validatedNormalize(directive?.normalize),
|
||||||
language: TalkModeRuntime.validatedLanguage(directive?.language))
|
language: TalkModeRuntime.validatedLanguage(directive?.language))
|
||||||
let audio = try await ElevenLabsClient(apiKey: apiKey).synthesize(
|
|
||||||
voiceId: voiceId,
|
let synthTimeoutSeconds = max(20.0, min(90.0, Double(cleaned.count) * 0.12))
|
||||||
request: request)
|
let client = ElevenLabsClient(apiKey: apiKey)
|
||||||
self.logger.info("elevenlabs ok bytes=\(audio.count, privacy: .public) dur=\(Date().timeIntervalSince(started), privacy: .public)s")
|
let audio = try await withThrowingTaskGroup(of: Data.self) { group in
|
||||||
|
group.addTask {
|
||||||
|
try await client.synthesize(voiceId: voiceId, request: request)
|
||||||
|
}
|
||||||
|
group.addTask {
|
||||||
|
try await Task.sleep(nanoseconds: UInt64(synthTimeoutSeconds * 1_000_000_000))
|
||||||
|
throw NSError(domain: "TalkTTS", code: 408, userInfo: [
|
||||||
|
NSLocalizedDescriptionKey: "ElevenLabs TTS timed out after \(synthTimeoutSeconds)s",
|
||||||
|
])
|
||||||
|
}
|
||||||
|
let data = try await group.next()!
|
||||||
|
group.cancelAll()
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
self.logger
|
||||||
|
.info(
|
||||||
|
"elevenlabs ok bytes=\(audio.count, privacy: .public) dur=\(Date().timeIntervalSince(started), privacy: .public)s")
|
||||||
|
|
||||||
|
if self.interruptOnSpeech {
|
||||||
|
do {
|
||||||
|
try self.startRecognition()
|
||||||
|
} catch {
|
||||||
|
self.logger.warning("startRecognition during speak failed: \(error.localizedDescription, privacy: .public)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.statusText = "Speaking…"
|
||||||
try await self.playAudio(data: audio)
|
try await self.playAudio(data: audio)
|
||||||
} catch {
|
} catch {
|
||||||
self.statusText = "Speak failed: \(error.localizedDescription)"
|
self.statusText = "Speak failed: \(error.localizedDescription)"
|
||||||
self.logger.error("speak failed: \(error.localizedDescription, privacy: .public)")
|
self.logger.error("speak failed: \(error.localizedDescription, privacy: .public)")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.stopRecognition()
|
||||||
self.isSpeaking = false
|
self.isSpeaking = false
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -440,9 +534,11 @@ final class TalkModeManager: NSObject {
|
|||||||
self.logger.info("play done")
|
self.logger.info("play done")
|
||||||
}
|
}
|
||||||
|
|
||||||
private func stopSpeaking() {
|
private func stopSpeaking(storeInterruption: Bool = true) {
|
||||||
guard self.isSpeaking else { return }
|
guard self.isSpeaking else { return }
|
||||||
self.lastInterruptedAtSeconds = self.player?.currentTime
|
if storeInterruption {
|
||||||
|
self.lastInterruptedAtSeconds = self.player?.currentTime
|
||||||
|
}
|
||||||
self.player?.stop()
|
self.player?.stop()
|
||||||
self.player = nil
|
self.player = nil
|
||||||
self.isSpeaking = false
|
self.isSpeaking = false
|
||||||
@@ -465,9 +561,13 @@ final class TalkModeManager: NSObject {
|
|||||||
guard let config = json["config"] as? [String: Any] else { return }
|
guard let config = json["config"] as? [String: Any] else { return }
|
||||||
let talk = config["talk"] as? [String: Any]
|
let talk = config["talk"] as? [String: Any]
|
||||||
self.defaultVoiceId = (talk?["voiceId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
self.defaultVoiceId = (talk?["voiceId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
self.currentVoiceId = self.defaultVoiceId
|
if !self.voiceOverrideActive {
|
||||||
|
self.currentVoiceId = self.defaultVoiceId
|
||||||
|
}
|
||||||
self.defaultModelId = (talk?["modelId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
self.defaultModelId = (talk?["modelId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
self.currentModelId = self.defaultModelId
|
if !self.modelOverrideActive {
|
||||||
|
self.currentModelId = self.defaultModelId
|
||||||
|
}
|
||||||
self.defaultOutputFormat = (talk?["outputFormat"] as? String)?
|
self.defaultOutputFormat = (talk?["outputFormat"] as? String)?
|
||||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
self.apiKey = (talk?["apiKey"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
self.apiKey = (talk?["apiKey"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
@@ -561,6 +661,7 @@ private struct ElevenLabsClient {
|
|||||||
var req = URLRequest(url: url)
|
var req = URLRequest(url: url)
|
||||||
req.httpMethod = "POST"
|
req.httpMethod = "POST"
|
||||||
req.httpBody = body
|
req.httpBody = body
|
||||||
|
req.timeoutInterval = 45
|
||||||
req.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
req.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
||||||
req.setValue("audio/mpeg", forHTTPHeaderField: "Accept")
|
req.setValue("audio/mpeg", forHTTPHeaderField: "Accept")
|
||||||
req.setValue(self.apiKey, forHTTPHeaderField: "xi-api-key")
|
req.setValue(self.apiKey, forHTTPHeaderField: "xi-api-key")
|
||||||
@@ -614,4 +715,10 @@ private enum TalkModeRuntime {
|
|||||||
guard normalized.count == 2, normalized.allSatisfy({ $0 >= "a" && $0 <= "z" }) else { return nil }
|
guard normalized.count == 2, normalized.allSatisfy({ $0 >= "a" && $0 <= "z" }) else { return nil }
|
||||||
return normalized
|
return normalized
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static func validatedOutputFormat(_ value: String?) -> String? {
|
||||||
|
let trimmed = value?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||||
|
guard !trimmed.isEmpty else { return nil }
|
||||||
|
return trimmed.hasPrefix("mp3_") ? trimmed : nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,8 +38,7 @@ struct TalkOrbOverlay: View {
|
|||||||
.frame(width: 136, height: 136)
|
.frame(width: 136, height: 136)
|
||||||
.overlay(
|
.overlay(
|
||||||
Circle()
|
Circle()
|
||||||
.stroke(seam.opacity(0.35), lineWidth: 1)
|
.stroke(seam.opacity(0.35), lineWidth: 1))
|
||||||
)
|
|
||||||
.shadow(color: seam.opacity(0.32), radius: 26, x: 0, y: 0)
|
.shadow(color: seam.opacity(0.32), radius: 26, x: 0, y: 0)
|
||||||
.shadow(color: Color.black.opacity(0.50), radius: 22, x: 0, y: 10)
|
.shadow(color: Color.black.opacity(0.50), radius: 22, x: 0, y: 10)
|
||||||
}
|
}
|
||||||
@@ -58,9 +57,7 @@ struct TalkOrbOverlay: View {
|
|||||||
Capsule()
|
Capsule()
|
||||||
.fill(Color.black.opacity(0.40))
|
.fill(Color.black.opacity(0.40))
|
||||||
.overlay(
|
.overlay(
|
||||||
Capsule().stroke(seam.opacity(0.22), lineWidth: 1)
|
Capsule().stroke(seam.opacity(0.22), lineWidth: 1)))
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
.padding(28)
|
.padding(28)
|
||||||
@@ -71,4 +68,3 @@ struct TalkOrbOverlay: View {
|
|||||||
.accessibilityLabel("Talk Mode \(status)")
|
.accessibilityLabel("Talk Mode \(status)")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -57,3 +57,4 @@ Sources/Voice/VoiceWakePreferences.swift
|
|||||||
../shared/ClawdisKit/Sources/ClawdisKit/TalkDirective.swift
|
../shared/ClawdisKit/Sources/ClawdisKit/TalkDirective.swift
|
||||||
../../Swabble/Sources/SwabbleKit/WakeWordGate.swift
|
../../Swabble/Sources/SwabbleKit/WakeWordGate.swift
|
||||||
Sources/Voice/TalkModeManager.swift
|
Sources/Voice/TalkModeManager.swift
|
||||||
|
Sources/Voice/TalkOrbOverlay.swift
|
||||||
|
|||||||
@@ -106,8 +106,13 @@ final class TalkAudioPlayer: NSObject, @preconcurrency AVAudioPlayerDelegate {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private func stopInternal() {
|
private func stopInternal() {
|
||||||
self.playback?.cancelWatchdog()
|
if let playback = self.playback {
|
||||||
self.playback = nil
|
let interruptedAt = self.player?.currentTime
|
||||||
|
self.finish(
|
||||||
|
playback: playback,
|
||||||
|
result: TalkPlaybackResult(finished: false, interruptedAt: interruptedAt))
|
||||||
|
return
|
||||||
|
}
|
||||||
self.player?.stop()
|
self.player?.stop()
|
||||||
self.player = nil
|
self.player = nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,16 +11,37 @@ actor TalkModeRuntime {
|
|||||||
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "talk.runtime")
|
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "talk.runtime")
|
||||||
private let ttsLogger = Logger(subsystem: "com.steipete.clawdis", category: "talk.tts")
|
private let ttsLogger = Logger(subsystem: "com.steipete.clawdis", category: "talk.tts")
|
||||||
|
|
||||||
|
private final class RMSMeter: @unchecked Sendable {
|
||||||
|
private let lock = NSLock()
|
||||||
|
private var latestRMS: Double = 0
|
||||||
|
|
||||||
|
func set(_ rms: Double) {
|
||||||
|
self.lock.lock()
|
||||||
|
self.latestRMS = rms
|
||||||
|
self.lock.unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func get() -> Double {
|
||||||
|
self.lock.lock()
|
||||||
|
let value = self.latestRMS
|
||||||
|
self.lock.unlock()
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private var recognizer: SFSpeechRecognizer?
|
private var recognizer: SFSpeechRecognizer?
|
||||||
private var audioEngine: AVAudioEngine?
|
private var audioEngine: AVAudioEngine?
|
||||||
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
||||||
private var recognitionTask: SFSpeechRecognitionTask?
|
private var recognitionTask: SFSpeechRecognitionTask?
|
||||||
private var recognitionGeneration: Int = 0
|
private var recognitionGeneration: Int = 0
|
||||||
|
private var rmsTask: Task<Void, Never>?
|
||||||
|
private let rmsMeter = RMSMeter()
|
||||||
|
|
||||||
private var captureTask: Task<Void, Never>?
|
private var captureTask: Task<Void, Never>?
|
||||||
private var silenceTask: Task<Void, Never>?
|
private var silenceTask: Task<Void, Never>?
|
||||||
private var phase: TalkModePhase = .idle
|
private var phase: TalkModePhase = .idle
|
||||||
private var isEnabled = false
|
private var isEnabled = false
|
||||||
|
private var lifecycleGeneration: Int = 0
|
||||||
|
|
||||||
private var lastHeard: Date?
|
private var lastHeard: Date?
|
||||||
private var noiseFloorRMS: Double = 1e-4
|
private var noiseFloorRMS: Double = 1e-4
|
||||||
@@ -49,6 +70,7 @@ actor TalkModeRuntime {
|
|||||||
func setEnabled(_ enabled: Bool) async {
|
func setEnabled(_ enabled: Bool) async {
|
||||||
guard enabled != self.isEnabled else { return }
|
guard enabled != self.isEnabled else { return }
|
||||||
self.isEnabled = enabled
|
self.isEnabled = enabled
|
||||||
|
self.lifecycleGeneration &+= 1
|
||||||
if enabled {
|
if enabled {
|
||||||
await self.start()
|
await self.start()
|
||||||
} else {
|
} else {
|
||||||
@@ -56,14 +78,21 @@ actor TalkModeRuntime {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private func isCurrent(_ generation: Int) -> Bool {
|
||||||
|
generation == self.lifecycleGeneration && self.isEnabled
|
||||||
|
}
|
||||||
|
|
||||||
private func start() async {
|
private func start() async {
|
||||||
|
let gen = self.lifecycleGeneration
|
||||||
guard voiceWakeSupported else { return }
|
guard voiceWakeSupported else { return }
|
||||||
guard PermissionManager.voiceWakePermissionsGranted() else {
|
guard PermissionManager.voiceWakePermissionsGranted() else {
|
||||||
self.logger.debug("talk runtime not starting: permissions missing")
|
self.logger.debug("talk runtime not starting: permissions missing")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
await self.reloadConfig()
|
await self.reloadConfig()
|
||||||
|
guard self.isCurrent(gen) else { return }
|
||||||
await self.startRecognition()
|
await self.startRecognition()
|
||||||
|
guard self.isCurrent(gen) else { return }
|
||||||
self.phase = .listening
|
self.phase = .listening
|
||||||
await MainActor.run { TalkModeController.shared.updatePhase(.listening) }
|
await MainActor.run { TalkModeController.shared.updatePhase(.listening) }
|
||||||
self.startSilenceMonitor()
|
self.startSilenceMonitor()
|
||||||
@@ -74,12 +103,15 @@ actor TalkModeRuntime {
|
|||||||
self.captureTask = nil
|
self.captureTask = nil
|
||||||
self.silenceTask?.cancel()
|
self.silenceTask?.cancel()
|
||||||
self.silenceTask = nil
|
self.silenceTask = nil
|
||||||
|
|
||||||
|
// Stop audio before changing phase (stopSpeaking is gated on .speaking).
|
||||||
|
await self.stopSpeaking(reason: .manual)
|
||||||
|
|
||||||
self.lastTranscript = ""
|
self.lastTranscript = ""
|
||||||
self.lastHeard = nil
|
self.lastHeard = nil
|
||||||
self.lastSpeechEnergyAt = nil
|
self.lastSpeechEnergyAt = nil
|
||||||
self.phase = .idle
|
self.phase = .idle
|
||||||
await self.stopRecognition()
|
await self.stopRecognition()
|
||||||
await self.stopSpeaking(reason: .manual)
|
|
||||||
await MainActor.run {
|
await MainActor.run {
|
||||||
TalkModeController.shared.updateLevel(0)
|
TalkModeController.shared.updateLevel(0)
|
||||||
TalkModeController.shared.updatePhase(.idle)
|
TalkModeController.shared.updatePhase(.idle)
|
||||||
@@ -120,12 +152,11 @@ actor TalkModeRuntime {
|
|||||||
let input = audioEngine.inputNode
|
let input = audioEngine.inputNode
|
||||||
let format = input.outputFormat(forBus: 0)
|
let format = input.outputFormat(forBus: 0)
|
||||||
input.removeTap(onBus: 0)
|
input.removeTap(onBus: 0)
|
||||||
input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak self, weak request] buffer, _ in
|
let meter = self.rmsMeter
|
||||||
|
input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request, meter] buffer, _ in
|
||||||
request?.append(buffer)
|
request?.append(buffer)
|
||||||
if let rms = Self.rmsLevel(buffer: buffer) {
|
if let rms = Self.rmsLevel(buffer: buffer) {
|
||||||
Task.detached { [weak self] in
|
meter.set(rms)
|
||||||
await self?.noteAudioLevel(rms: rms)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -137,6 +168,8 @@ actor TalkModeRuntime {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.startRMSTicker(meter: meter)
|
||||||
|
|
||||||
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self, generation] result, error in
|
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self, generation] result, error in
|
||||||
guard let self else { return }
|
guard let self else { return }
|
||||||
let segments = result?.bestTranscription.segments ?? []
|
let segments = result?.bestTranscription.segments ?? []
|
||||||
@@ -161,6 +194,19 @@ actor TalkModeRuntime {
|
|||||||
self.audioEngine?.stop()
|
self.audioEngine?.stop()
|
||||||
self.audioEngine = nil
|
self.audioEngine = nil
|
||||||
self.recognizer = nil
|
self.recognizer = nil
|
||||||
|
self.rmsTask?.cancel()
|
||||||
|
self.rmsTask = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
private func startRMSTicker(meter: RMSMeter) {
|
||||||
|
self.rmsTask?.cancel()
|
||||||
|
self.rmsTask = Task { [weak self, meter] in
|
||||||
|
while let self {
|
||||||
|
try? await Task.sleep(nanoseconds: 50_000_000)
|
||||||
|
if Task.isCancelled { return }
|
||||||
|
await self.noteAudioLevel(rms: meter.get())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private func handleRecognition(_ update: RecognitionUpdate) async {
|
private func handleRecognition(_ update: RecognitionUpdate) async {
|
||||||
@@ -241,43 +287,42 @@ actor TalkModeRuntime {
|
|||||||
// MARK: - Gateway + TTS
|
// MARK: - Gateway + TTS
|
||||||
|
|
||||||
private func sendAndSpeak(_ transcript: String) async {
|
private func sendAndSpeak(_ transcript: String) async {
|
||||||
|
let gen = self.lifecycleGeneration
|
||||||
await self.reloadConfig()
|
await self.reloadConfig()
|
||||||
|
guard self.isCurrent(gen) else { return }
|
||||||
let prompt = self.buildPrompt(transcript: transcript)
|
let prompt = self.buildPrompt(transcript: transcript)
|
||||||
|
let sessionKey = await GatewayConnection.shared.mainSessionKey()
|
||||||
let runId = UUID().uuidString
|
let runId = UUID().uuidString
|
||||||
let startedAt = Date().timeIntervalSince1970
|
let startedAt = Date().timeIntervalSince1970
|
||||||
self.logger.info("talk send start runId=\(runId, privacy: .public) chars=\(prompt.count, privacy: .public)")
|
self.logger.info(
|
||||||
|
"talk send start runId=\(runId, privacy: .public) session=\(sessionKey, privacy: .public) chars=\(prompt.count, privacy: .public)")
|
||||||
|
|
||||||
do {
|
do {
|
||||||
let response = try await GatewayConnection.shared.chatSend(
|
let response = try await GatewayConnection.shared.chatSend(
|
||||||
sessionKey: "main",
|
sessionKey: sessionKey,
|
||||||
message: prompt,
|
message: prompt,
|
||||||
thinking: "low",
|
thinking: "low",
|
||||||
idempotencyKey: runId,
|
idempotencyKey: runId,
|
||||||
attachments: [])
|
attachments: [])
|
||||||
self.logger.info("talk chat.send ok runId=\(response.runId, privacy: .public)")
|
guard self.isCurrent(gen) else { return }
|
||||||
let completion = await self.waitForChatCompletion(
|
self.logger.info(
|
||||||
runId: response.runId,
|
"talk chat.send ok runId=\(response.runId, privacy: .public) session=\(sessionKey, privacy: .public)")
|
||||||
timeoutSeconds: 120)
|
|
||||||
self.logger.info("talk chat completion runId=\(response.runId, privacy: .public) state=\(String(describing: completion), privacy: .public)")
|
|
||||||
guard completion == .final else {
|
|
||||||
await self.startListening()
|
|
||||||
await self.startRecognition()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
guard let assistantText = await self.waitForAssistantText(
|
guard let assistantText = await self.waitForAssistantText(
|
||||||
sessionKey: "main",
|
sessionKey: sessionKey,
|
||||||
since: startedAt,
|
since: startedAt,
|
||||||
timeoutSeconds: 12)
|
timeoutSeconds: 45)
|
||||||
else {
|
else {
|
||||||
self.logger.warning("talk assistant text missing after completion")
|
self.logger.warning("talk assistant text missing after timeout")
|
||||||
await self.startListening()
|
await self.startListening()
|
||||||
await self.startRecognition()
|
await self.startRecognition()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
guard self.isCurrent(gen) else { return }
|
||||||
|
|
||||||
self.logger.info("talk assistant text len=\(assistantText.count, privacy: .public)")
|
self.logger.info("talk assistant text len=\(assistantText.count, privacy: .public)")
|
||||||
await self.playAssistant(text: assistantText)
|
await self.playAssistant(text: assistantText)
|
||||||
|
guard self.isCurrent(gen) else { return }
|
||||||
await self.startListening()
|
await self.startListening()
|
||||||
await self.startRecognition()
|
await self.startRecognition()
|
||||||
return
|
return
|
||||||
@@ -306,54 +351,6 @@ actor TalkModeRuntime {
|
|||||||
return lines.joined(separator: "\n")
|
return lines.joined(separator: "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
private enum ChatCompletionState: CustomStringConvertible {
|
|
||||||
case final
|
|
||||||
case aborted
|
|
||||||
case error
|
|
||||||
case timeout
|
|
||||||
|
|
||||||
var description: String {
|
|
||||||
switch self {
|
|
||||||
case .final: return "final"
|
|
||||||
case .aborted: return "aborted"
|
|
||||||
case .error: return "error"
|
|
||||||
case .timeout: return "timeout"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private func waitForChatCompletion(runId: String, timeoutSeconds: Int) async -> ChatCompletionState {
|
|
||||||
let stream = await GatewayConnection.shared.subscribe()
|
|
||||||
return await withTaskGroup(of: ChatCompletionState.self) { group in
|
|
||||||
group.addTask { [runId] in
|
|
||||||
for await push in stream {
|
|
||||||
if case let .event(evt) = push, evt.event == "chat", let payload = evt.payload {
|
|
||||||
if let chat = try? JSONDecoder().decode(
|
|
||||||
ClawdisChatEventPayload.self,
|
|
||||||
from: JSONEncoder().encode(payload))
|
|
||||||
{
|
|
||||||
guard chat.runId == runId else { continue }
|
|
||||||
switch chat.state {
|
|
||||||
case .some("final"): return .final
|
|
||||||
case .some("aborted"): return .aborted
|
|
||||||
case .some("error"): return .error
|
|
||||||
default: break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return .timeout
|
|
||||||
}
|
|
||||||
group.addTask {
|
|
||||||
try? await Task.sleep(nanoseconds: UInt64(timeoutSeconds) * 1_000_000_000)
|
|
||||||
return .timeout
|
|
||||||
}
|
|
||||||
let result = await group.next() ?? .timeout
|
|
||||||
group.cancelAll()
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private func waitForAssistantText(
|
private func waitForAssistantText(
|
||||||
sessionKey: String,
|
sessionKey: String,
|
||||||
since: Double,
|
since: Double,
|
||||||
@@ -394,10 +391,12 @@ actor TalkModeRuntime {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private func playAssistant(text: String) async {
|
private func playAssistant(text: String) async {
|
||||||
|
let gen = self.lifecycleGeneration
|
||||||
let parse = TalkDirectiveParser.parse(text)
|
let parse = TalkDirectiveParser.parse(text)
|
||||||
let directive = parse.directive
|
let directive = parse.directive
|
||||||
let cleaned = parse.stripped.trimmingCharacters(in: .whitespacesAndNewlines)
|
let cleaned = parse.stripped.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
guard !cleaned.isEmpty else { return }
|
guard !cleaned.isEmpty else { return }
|
||||||
|
guard self.isCurrent(gen) else { return }
|
||||||
|
|
||||||
if !parse.unknownKeys.isEmpty {
|
if !parse.unknownKeys.isEmpty {
|
||||||
self.logger.warning("talk directive ignored keys: \(parse.unknownKeys.joined(separator: ","), privacy: .public)")
|
self.logger.warning("talk directive ignored keys: \(parse.unknownKeys.joined(separator: ","), privacy: .public)")
|
||||||
@@ -435,9 +434,11 @@ actor TalkModeRuntime {
|
|||||||
self.logger.error("talk missing voiceId; set talk.voiceId or ELEVENLABS_VOICE_ID")
|
self.logger.error("talk missing voiceId; set talk.voiceId or ELEVENLABS_VOICE_ID")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
guard self.isCurrent(gen) else { return }
|
||||||
self.ttsLogger.info("talk TTS request voiceId=\(voiceId, privacy: .public) chars=\(cleaned.count, privacy: .public)")
|
self.ttsLogger.info("talk TTS request voiceId=\(voiceId, privacy: .public) chars=\(cleaned.count, privacy: .public)")
|
||||||
|
|
||||||
await self.startRecognition()
|
await self.startRecognition()
|
||||||
|
guard self.isCurrent(gen) else { return }
|
||||||
await MainActor.run { TalkModeController.shared.updatePhase(.speaking) }
|
await MainActor.run { TalkModeController.shared.updatePhase(.speaking) }
|
||||||
self.phase = .speaking
|
self.phase = .speaking
|
||||||
self.lastSpokenText = cleaned
|
self.lastSpokenText = cleaned
|
||||||
@@ -450,7 +451,7 @@ actor TalkModeRuntime {
|
|||||||
let request = ElevenLabsRequest(
|
let request = ElevenLabsRequest(
|
||||||
text: cleaned,
|
text: cleaned,
|
||||||
modelId: directive?.modelId ?? self.currentModelId ?? self.defaultModelId,
|
modelId: directive?.modelId ?? self.currentModelId ?? self.defaultModelId,
|
||||||
outputFormat: directive?.outputFormat ?? self.defaultOutputFormat,
|
outputFormat: Self.validatedOutputFormat(directive?.outputFormat ?? self.defaultOutputFormat, logger: self.logger),
|
||||||
speed: resolvedSpeed,
|
speed: resolvedSpeed,
|
||||||
stability: Self.validatedUnit(directive?.stability, name: "stability", logger: self.logger),
|
stability: Self.validatedUnit(directive?.stability, name: "stability", logger: self.logger),
|
||||||
similarity: Self.validatedUnit(directive?.similarity, name: "similarity", logger: self.logger),
|
similarity: Self.validatedUnit(directive?.similarity, name: "similarity", logger: self.logger),
|
||||||
@@ -479,6 +480,7 @@ actor TalkModeRuntime {
|
|||||||
group.cancelAll()
|
group.cancelAll()
|
||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
guard self.isCurrent(gen) else { return }
|
||||||
self.ttsLogger.info("talk TTS response bytes=\(audio.count, privacy: .public)")
|
self.ttsLogger.info("talk TTS response bytes=\(audio.count, privacy: .public)")
|
||||||
let result = await TalkAudioPlayer.shared.play(data: audio)
|
let result = await TalkAudioPlayer.shared.play(data: audio)
|
||||||
self.ttsLogger.info("talk audio result finished=\(result.finished, privacy: .public) interruptedAt=\(String(describing: result.interruptedAt), privacy: .public)")
|
self.ttsLogger.info("talk audio result finished=\(result.finished, privacy: .public) interruptedAt=\(String(describing: result.interruptedAt), privacy: .public)")
|
||||||
@@ -491,8 +493,10 @@ actor TalkModeRuntime {
|
|||||||
self.logger.error("talk TTS failed: \(error.localizedDescription, privacy: .public)")
|
self.logger.error("talk TTS failed: \(error.localizedDescription, privacy: .public)")
|
||||||
}
|
}
|
||||||
|
|
||||||
self.phase = .thinking
|
if self.phase == .speaking {
|
||||||
await MainActor.run { TalkModeController.shared.updatePhase(.thinking) }
|
self.phase = .thinking
|
||||||
|
await MainActor.run { TalkModeController.shared.updatePhase(.thinking) }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private func resolveVoiceId(preferred: String?, apiKey: String) async -> String? {
|
private func resolveVoiceId(preferred: String?, apiKey: String) async -> String? {
|
||||||
@@ -523,11 +527,18 @@ actor TalkModeRuntime {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func stopSpeaking(reason: TalkStopReason) async {
|
func stopSpeaking(reason: TalkStopReason) async {
|
||||||
guard self.phase == .speaking else { return }
|
|
||||||
let interruptedAt = await MainActor.run { TalkAudioPlayer.shared.stop() }
|
let interruptedAt = await MainActor.run { TalkAudioPlayer.shared.stop() }
|
||||||
|
guard self.phase == .speaking else { return }
|
||||||
if reason == .speech, let interruptedAt {
|
if reason == .speech, let interruptedAt {
|
||||||
self.lastInterruptedAtSeconds = interruptedAt
|
self.lastInterruptedAtSeconds = interruptedAt
|
||||||
}
|
}
|
||||||
|
if reason == .manual {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if reason == .speech || reason == .userTap {
|
||||||
|
await self.startListening()
|
||||||
|
return
|
||||||
|
}
|
||||||
self.phase = .thinking
|
self.phase = .thinking
|
||||||
await MainActor.run { TalkModeController.shared.updatePhase(.thinking) }
|
await MainActor.run { TalkModeController.shared.updatePhase(.thinking) }
|
||||||
}
|
}
|
||||||
@@ -718,6 +729,16 @@ actor TalkModeRuntime {
|
|||||||
}
|
}
|
||||||
return normalized
|
return normalized
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static func validatedOutputFormat(_ value: String?, logger: Logger) -> String? {
|
||||||
|
let trimmed = value?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||||
|
guard !trimmed.isEmpty else { return nil }
|
||||||
|
guard trimmed.hasPrefix("mp3_") else {
|
||||||
|
logger.warning("talk output_format unsupported for local playback: \(trimmed, privacy: .public)")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return trimmed
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private struct ElevenLabsRequest {
|
private struct ElevenLabsRequest {
|
||||||
|
|||||||
@@ -14,6 +14,24 @@ import Testing
|
|||||||
|
|
||||||
#expect(true)
|
#expect(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@MainActor
|
||||||
|
@Test func playDoesNotHangWhenPlayIsCalledTwice() async throws {
|
||||||
|
let wav = makeWav16Mono(sampleRate: 8000, samples: 800)
|
||||||
|
defer { _ = TalkAudioPlayer.shared.stop() }
|
||||||
|
|
||||||
|
let first = Task { @MainActor in
|
||||||
|
await TalkAudioPlayer.shared.play(data: wav)
|
||||||
|
}
|
||||||
|
|
||||||
|
await Task.yield()
|
||||||
|
_ = await TalkAudioPlayer.shared.play(data: wav)
|
||||||
|
|
||||||
|
_ = try await withTimeout(seconds: 2.0) {
|
||||||
|
await first.value
|
||||||
|
}
|
||||||
|
#expect(true)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private struct TimeoutError: Error {}
|
private struct TimeoutError: Error {}
|
||||||
|
|||||||
Reference in New Issue
Block a user