fix(voice): sync talk mode chat events
This commit is contained in:
@@ -84,20 +84,24 @@ class TalkModeManager(
|
|||||||
private var session: BridgeSession? = null
|
private var session: BridgeSession? = null
|
||||||
private var pendingRunId: String? = null
|
private var pendingRunId: String? = null
|
||||||
private var pendingFinal: CompletableDeferred<Boolean>? = null
|
private var pendingFinal: CompletableDeferred<Boolean>? = null
|
||||||
|
private var chatSubscribed = false
|
||||||
|
|
||||||
private var player: MediaPlayer? = null
|
private var player: MediaPlayer? = null
|
||||||
private var currentAudioFile: File? = null
|
private var currentAudioFile: File? = null
|
||||||
|
|
||||||
fun attachSession(session: BridgeSession) {
|
fun attachSession(session: BridgeSession) {
|
||||||
this.session = session
|
this.session = session
|
||||||
|
chatSubscribed = false
|
||||||
}
|
}
|
||||||
|
|
||||||
fun setEnabled(enabled: Boolean) {
|
fun setEnabled(enabled: Boolean) {
|
||||||
if (_isEnabled.value == enabled) return
|
if (_isEnabled.value == enabled) return
|
||||||
_isEnabled.value = enabled
|
_isEnabled.value = enabled
|
||||||
if (enabled) {
|
if (enabled) {
|
||||||
|
Log.d(tag, "enabled")
|
||||||
start()
|
start()
|
||||||
} else {
|
} else {
|
||||||
|
Log.d(tag, "disabled")
|
||||||
stop()
|
stop()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -127,9 +131,11 @@ class TalkModeManager(
|
|||||||
if (_isListening.value) return@post
|
if (_isListening.value) return@post
|
||||||
stopRequested = false
|
stopRequested = false
|
||||||
listeningMode = true
|
listeningMode = true
|
||||||
|
Log.d(tag, "start")
|
||||||
|
|
||||||
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
|
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
|
||||||
_statusText.value = "Speech recognizer unavailable"
|
_statusText.value = "Speech recognizer unavailable"
|
||||||
|
Log.w(tag, "speech recognizer unavailable")
|
||||||
return@post
|
return@post
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,6 +144,7 @@ class TalkModeManager(
|
|||||||
PackageManager.PERMISSION_GRANTED
|
PackageManager.PERMISSION_GRANTED
|
||||||
if (!micOk) {
|
if (!micOk) {
|
||||||
_statusText.value = "Microphone permission required"
|
_statusText.value = "Microphone permission required"
|
||||||
|
Log.w(tag, "microphone permission required")
|
||||||
return@post
|
return@post
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,8 +153,10 @@ class TalkModeManager(
|
|||||||
recognizer = SpeechRecognizer.createSpeechRecognizer(context).also { it.setRecognitionListener(listener) }
|
recognizer = SpeechRecognizer.createSpeechRecognizer(context).also { it.setRecognitionListener(listener) }
|
||||||
startListeningInternal(markListening = true)
|
startListeningInternal(markListening = true)
|
||||||
startSilenceMonitor()
|
startSilenceMonitor()
|
||||||
|
Log.d(tag, "listening")
|
||||||
} catch (err: Throwable) {
|
} catch (err: Throwable) {
|
||||||
_statusText.value = "Start failed: ${err.message ?: err::class.simpleName}"
|
_statusText.value = "Start failed: ${err.message ?: err::class.simpleName}"
|
||||||
|
Log.w(tag, "start failed: ${err.message ?: err::class.simpleName}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -164,6 +173,7 @@ class TalkModeManager(
|
|||||||
_isListening.value = false
|
_isListening.value = false
|
||||||
_statusText.value = "Off"
|
_statusText.value = "Off"
|
||||||
stopSpeaking()
|
stopSpeaking()
|
||||||
|
chatSubscribed = false
|
||||||
|
|
||||||
mainHandler.post {
|
mainHandler.post {
|
||||||
recognizer?.cancel()
|
recognizer?.cancel()
|
||||||
@@ -264,28 +274,36 @@ class TalkModeManager(
|
|||||||
val bridge = session
|
val bridge = session
|
||||||
if (bridge == null) {
|
if (bridge == null) {
|
||||||
_statusText.value = "Bridge not connected"
|
_statusText.value = "Bridge not connected"
|
||||||
|
Log.w(tag, "finalize: bridge not connected")
|
||||||
start()
|
start()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
val startedAt = System.currentTimeMillis().toDouble() / 1000.0
|
val startedAt = System.currentTimeMillis().toDouble() / 1000.0
|
||||||
|
subscribeChatIfNeeded(bridge = bridge, sessionKey = "main")
|
||||||
|
Log.d(tag, "chat.send start chars=${prompt.length}")
|
||||||
val runId = sendChat(prompt, bridge)
|
val runId = sendChat(prompt, bridge)
|
||||||
|
Log.d(tag, "chat.send ok runId=$runId")
|
||||||
val ok = waitForChatFinal(runId)
|
val ok = waitForChatFinal(runId)
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
_statusText.value = "No reply"
|
_statusText.value = "No reply"
|
||||||
|
Log.w(tag, "chat final timeout runId=$runId")
|
||||||
start()
|
start()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
val assistant = waitForAssistantText(bridge, startedAt, 12_000)
|
val assistant = waitForAssistantText(bridge, startedAt, 12_000)
|
||||||
if (assistant.isNullOrBlank()) {
|
if (assistant.isNullOrBlank()) {
|
||||||
_statusText.value = "No reply"
|
_statusText.value = "No reply"
|
||||||
|
Log.w(tag, "assistant text timeout runId=$runId")
|
||||||
start()
|
start()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
Log.d(tag, "assistant text ok chars=${assistant.length}")
|
||||||
playAssistant(assistant)
|
playAssistant(assistant)
|
||||||
} catch (err: Throwable) {
|
} catch (err: Throwable) {
|
||||||
_statusText.value = "Talk failed: ${err.message ?: err::class.simpleName}"
|
_statusText.value = "Talk failed: ${err.message ?: err::class.simpleName}"
|
||||||
|
Log.w(tag, "finalize failed: ${err.message ?: err::class.simpleName}")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_isEnabled.value) {
|
if (_isEnabled.value) {
|
||||||
@@ -293,6 +311,19 @@ class TalkModeManager(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private suspend fun subscribeChatIfNeeded(bridge: BridgeSession, sessionKey: String) {
|
||||||
|
if (chatSubscribed) return
|
||||||
|
val key = sessionKey.trim()
|
||||||
|
if (key.isEmpty()) return
|
||||||
|
try {
|
||||||
|
bridge.sendEvent("chat.subscribe", """{"sessionKey":"$key"}""")
|
||||||
|
chatSubscribed = true
|
||||||
|
Log.d(tag, "chat.subscribe ok sessionKey=$key")
|
||||||
|
} catch (err: Throwable) {
|
||||||
|
Log.w(tag, "chat.subscribe failed sessionKey=$key err=${err.message ?: err::class.java.simpleName}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private fun buildPrompt(transcript: String): String {
|
private fun buildPrompt(transcript: String): String {
|
||||||
val lines = mutableListOf(
|
val lines = mutableListOf(
|
||||||
"Talk Mode active. Reply in a concise, spoken tone.",
|
"Talk Mode active. Reply in a concise, spoken tone.",
|
||||||
@@ -410,6 +441,7 @@ class TalkModeManager(
|
|||||||
val voiceId = directive?.voiceId ?: currentVoiceId ?: defaultVoiceId
|
val voiceId = directive?.voiceId ?: currentVoiceId ?: defaultVoiceId
|
||||||
if (voiceId.isNullOrBlank()) {
|
if (voiceId.isNullOrBlank()) {
|
||||||
_statusText.value = "Missing voice ID"
|
_statusText.value = "Missing voice ID"
|
||||||
|
Log.w(tag, "missing voiceId")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -418,6 +450,7 @@ class TalkModeManager(
|
|||||||
?: System.getenv("ELEVENLABS_API_KEY")?.trim()
|
?: System.getenv("ELEVENLABS_API_KEY")?.trim()
|
||||||
if (apiKey.isNullOrEmpty()) {
|
if (apiKey.isNullOrEmpty()) {
|
||||||
_statusText.value = "Missing ELEVENLABS_API_KEY"
|
_statusText.value = "Missing ELEVENLABS_API_KEY"
|
||||||
|
Log.w(tag, "missing ELEVENLABS_API_KEY")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -427,6 +460,7 @@ class TalkModeManager(
|
|||||||
ensureInterruptListener()
|
ensureInterruptListener()
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
val ttsStarted = SystemClock.elapsedRealtime()
|
||||||
val request =
|
val request =
|
||||||
ElevenLabsRequest(
|
ElevenLabsRequest(
|
||||||
text = cleaned,
|
text = cleaned,
|
||||||
@@ -442,9 +476,11 @@ class TalkModeManager(
|
|||||||
language = TalkModeRuntime.validatedLanguage(directive?.language),
|
language = TalkModeRuntime.validatedLanguage(directive?.language),
|
||||||
)
|
)
|
||||||
val audio = synthesize(voiceId = voiceId, apiKey = apiKey, request = request)
|
val audio = synthesize(voiceId = voiceId, apiKey = apiKey, request = request)
|
||||||
|
Log.d(tag, "elevenlabs ok bytes=${audio.size} durMs=${SystemClock.elapsedRealtime() - ttsStarted}")
|
||||||
playAudio(audio)
|
playAudio(audio)
|
||||||
} catch (err: Throwable) {
|
} catch (err: Throwable) {
|
||||||
_statusText.value = "Speak failed: ${err.message ?: err::class.simpleName}"
|
_statusText.value = "Speak failed: ${err.message ?: err::class.simpleName}"
|
||||||
|
Log.w(tag, "speak failed: ${err.message ?: err::class.simpleName}")
|
||||||
}
|
}
|
||||||
|
|
||||||
_isSpeaking.value = false
|
_isSpeaking.value = false
|
||||||
@@ -480,11 +516,13 @@ class TalkModeManager(
|
|||||||
player.prepareAsync()
|
player.prepareAsync()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Log.d(tag, "play start")
|
||||||
try {
|
try {
|
||||||
finished.await()
|
finished.await()
|
||||||
} finally {
|
} finally {
|
||||||
cleanupPlayer()
|
cleanupPlayer()
|
||||||
}
|
}
|
||||||
|
Log.d(tag, "play done")
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun stopSpeaking(resetInterrupt: Boolean = true) {
|
private fun stopSpeaking(resetInterrupt: Boolean = true) {
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import ClawdisKit
|
|||||||
import Foundation
|
import Foundation
|
||||||
import Observation
|
import Observation
|
||||||
import Speech
|
import Speech
|
||||||
|
import OSLog
|
||||||
|
|
||||||
@MainActor
|
@MainActor
|
||||||
@Observable
|
@Observable
|
||||||
@@ -36,6 +37,9 @@ final class TalkModeManager: NSObject {
|
|||||||
private let silenceWindow: TimeInterval = 0.7
|
private let silenceWindow: TimeInterval = 0.7
|
||||||
|
|
||||||
private var player: AVAudioPlayer?
|
private var player: AVAudioPlayer?
|
||||||
|
private var chatSubscribedSessionKeys = Set<String>()
|
||||||
|
|
||||||
|
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "TalkMode")
|
||||||
|
|
||||||
func attachBridge(_ bridge: BridgeSession) {
|
func attachBridge(_ bridge: BridgeSession) {
|
||||||
self.bridge = bridge
|
self.bridge = bridge
|
||||||
@@ -44,8 +48,10 @@ final class TalkModeManager: NSObject {
|
|||||||
func setEnabled(_ enabled: Bool) {
|
func setEnabled(_ enabled: Bool) {
|
||||||
self.isEnabled = enabled
|
self.isEnabled = enabled
|
||||||
if enabled {
|
if enabled {
|
||||||
|
self.logger.info("enabled")
|
||||||
Task { await self.start() }
|
Task { await self.start() }
|
||||||
} else {
|
} else {
|
||||||
|
self.logger.info("disabled")
|
||||||
self.stop()
|
self.stop()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -54,14 +60,17 @@ final class TalkModeManager: NSObject {
|
|||||||
guard self.isEnabled else { return }
|
guard self.isEnabled else { return }
|
||||||
if self.isListening { return }
|
if self.isListening { return }
|
||||||
|
|
||||||
|
self.logger.info("start")
|
||||||
self.statusText = "Requesting permissions…"
|
self.statusText = "Requesting permissions…"
|
||||||
let micOk = await Self.requestMicrophonePermission()
|
let micOk = await Self.requestMicrophonePermission()
|
||||||
guard micOk else {
|
guard micOk else {
|
||||||
|
self.logger.warning("start blocked: microphone permission denied")
|
||||||
self.statusText = "Microphone permission denied"
|
self.statusText = "Microphone permission denied"
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
let speechOk = await Self.requestSpeechPermission()
|
let speechOk = await Self.requestSpeechPermission()
|
||||||
guard speechOk else {
|
guard speechOk else {
|
||||||
|
self.logger.warning("start blocked: speech permission denied")
|
||||||
self.statusText = "Speech recognition permission denied"
|
self.statusText = "Speech recognition permission denied"
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -73,9 +82,12 @@ final class TalkModeManager: NSObject {
|
|||||||
self.isListening = true
|
self.isListening = true
|
||||||
self.statusText = "Listening"
|
self.statusText = "Listening"
|
||||||
self.startSilenceMonitor()
|
self.startSilenceMonitor()
|
||||||
|
await self.subscribeChatIfNeeded(sessionKey: "main")
|
||||||
|
self.logger.info("listening")
|
||||||
} catch {
|
} catch {
|
||||||
self.isListening = false
|
self.isListening = false
|
||||||
self.statusText = "Start failed: \(error.localizedDescription)"
|
self.statusText = "Start failed: \(error.localizedDescription)"
|
||||||
|
self.logger.error("start failed: \(error.localizedDescription, privacy: .public)")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,6 +101,11 @@ final class TalkModeManager: NSObject {
|
|||||||
self.silenceTask = nil
|
self.silenceTask = nil
|
||||||
self.stopRecognition()
|
self.stopRecognition()
|
||||||
self.stopSpeaking()
|
self.stopSpeaking()
|
||||||
|
Task { await self.unsubscribeAllChats() }
|
||||||
|
}
|
||||||
|
|
||||||
|
func userTappedOrb() {
|
||||||
|
self.stopSpeaking()
|
||||||
}
|
}
|
||||||
|
|
||||||
private func startRecognition() throws {
|
private func startRecognition() throws {
|
||||||
@@ -191,16 +208,21 @@ final class TalkModeManager: NSObject {
|
|||||||
let prompt = self.buildPrompt(transcript: transcript)
|
let prompt = self.buildPrompt(transcript: transcript)
|
||||||
guard let bridge else {
|
guard let bridge else {
|
||||||
self.statusText = "Bridge not connected"
|
self.statusText = "Bridge not connected"
|
||||||
|
self.logger.warning("finalize: bridge not connected")
|
||||||
await self.start()
|
await self.start()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
let startedAt = Date().timeIntervalSince1970
|
let startedAt = Date().timeIntervalSince1970
|
||||||
|
await self.subscribeChatIfNeeded(sessionKey: "main")
|
||||||
|
self.logger.info("chat.send start chars=\(prompt.count, privacy: .public)")
|
||||||
let runId = try await self.sendChat(prompt, bridge: bridge)
|
let runId = try await self.sendChat(prompt, bridge: bridge)
|
||||||
|
self.logger.info("chat.send ok runId=\(runId, privacy: .public)")
|
||||||
let ok = await self.waitForChatFinal(runId: runId, bridge: bridge)
|
let ok = await self.waitForChatFinal(runId: runId, bridge: bridge)
|
||||||
if !ok {
|
if !ok {
|
||||||
self.statusText = "No reply"
|
self.statusText = "No reply"
|
||||||
|
self.logger.warning("chat final timeout runId=\(runId, privacy: .public)")
|
||||||
await self.start()
|
await self.start()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -211,17 +233,50 @@ final class TalkModeManager: NSObject {
|
|||||||
timeoutSeconds: 12)
|
timeoutSeconds: 12)
|
||||||
else {
|
else {
|
||||||
self.statusText = "No reply"
|
self.statusText = "No reply"
|
||||||
|
self.logger.warning("assistant text timeout runId=\(runId, privacy: .public)")
|
||||||
await self.start()
|
await self.start()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
self.logger.info("assistant text ok chars=\(assistantText.count, privacy: .public)")
|
||||||
await self.playAssistant(text: assistantText)
|
await self.playAssistant(text: assistantText)
|
||||||
} catch {
|
} catch {
|
||||||
self.statusText = "Talk failed: \(error.localizedDescription)"
|
self.statusText = "Talk failed: \(error.localizedDescription)"
|
||||||
|
self.logger.error("finalize failed: \(error.localizedDescription, privacy: .public)")
|
||||||
}
|
}
|
||||||
|
|
||||||
await self.start()
|
await self.start()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private func subscribeChatIfNeeded(sessionKey: String) async {
|
||||||
|
let key = sessionKey.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
|
guard !key.isEmpty else { return }
|
||||||
|
guard let bridge else { return }
|
||||||
|
guard !self.chatSubscribedSessionKeys.contains(key) else { return }
|
||||||
|
|
||||||
|
do {
|
||||||
|
let payload = "{\"sessionKey\":\"\(key)\"}"
|
||||||
|
try await bridge.sendEvent(event: "chat.subscribe", payloadJSON: payload)
|
||||||
|
self.chatSubscribedSessionKeys.insert(key)
|
||||||
|
self.logger.info("chat.subscribe ok sessionKey=\(key, privacy: .public)")
|
||||||
|
} catch {
|
||||||
|
self.logger.warning("chat.subscribe failed sessionKey=\(key, privacy: .public) err=\(error.localizedDescription, privacy: .public)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func unsubscribeAllChats() async {
|
||||||
|
guard let bridge else { return }
|
||||||
|
let keys = self.chatSubscribedSessionKeys
|
||||||
|
self.chatSubscribedSessionKeys.removeAll()
|
||||||
|
for key in keys {
|
||||||
|
do {
|
||||||
|
let payload = "{\"sessionKey\":\"\(key)\"}"
|
||||||
|
try await bridge.sendEvent(event: "chat.unsubscribe", payloadJSON: payload)
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private func buildPrompt(transcript: String) -> String {
|
private func buildPrompt(transcript: String) -> String {
|
||||||
var lines: [String] = [
|
var lines: [String] = [
|
||||||
"Talk Mode active. Reply in a concise, spoken tone.",
|
"Talk Mode active. Reply in a concise, spoken tone.",
|
||||||
@@ -326,6 +381,7 @@ final class TalkModeManager: NSObject {
|
|||||||
let voiceId = directive?.voiceId ?? self.currentVoiceId ?? self.defaultVoiceId
|
let voiceId = directive?.voiceId ?? self.currentVoiceId ?? self.defaultVoiceId
|
||||||
guard let voiceId, !voiceId.isEmpty else {
|
guard let voiceId, !voiceId.isEmpty else {
|
||||||
self.statusText = "Missing voice ID"
|
self.statusText = "Missing voice ID"
|
||||||
|
self.logger.error("missing voiceId")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -334,6 +390,7 @@ final class TalkModeManager: NSObject {
|
|||||||
ProcessInfo.processInfo.environment["ELEVENLABS_API_KEY"]
|
ProcessInfo.processInfo.environment["ELEVENLABS_API_KEY"]
|
||||||
guard let apiKey = resolvedKey, !apiKey.isEmpty else {
|
guard let apiKey = resolvedKey, !apiKey.isEmpty else {
|
||||||
self.statusText = "Missing ELEVENLABS_API_KEY"
|
self.statusText = "Missing ELEVENLABS_API_KEY"
|
||||||
|
self.logger.error("missing ELEVENLABS_API_KEY")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -342,6 +399,7 @@ final class TalkModeManager: NSObject {
|
|||||||
self.lastSpokenText = cleaned
|
self.lastSpokenText = cleaned
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
let started = Date()
|
||||||
let request = ElevenLabsRequest(
|
let request = ElevenLabsRequest(
|
||||||
text: cleaned,
|
text: cleaned,
|
||||||
modelId: directive?.modelId ?? self.currentModelId ?? self.defaultModelId,
|
modelId: directive?.modelId ?? self.currentModelId ?? self.defaultModelId,
|
||||||
@@ -359,9 +417,11 @@ final class TalkModeManager: NSObject {
|
|||||||
let audio = try await ElevenLabsClient(apiKey: apiKey).synthesize(
|
let audio = try await ElevenLabsClient(apiKey: apiKey).synthesize(
|
||||||
voiceId: voiceId,
|
voiceId: voiceId,
|
||||||
request: request)
|
request: request)
|
||||||
|
self.logger.info("elevenlabs ok bytes=\(audio.count, privacy: .public) dur=\(Date().timeIntervalSince(started), privacy: .public)s")
|
||||||
try await self.playAudio(data: audio)
|
try await self.playAudio(data: audio)
|
||||||
} catch {
|
} catch {
|
||||||
self.statusText = "Speak failed: \(error.localizedDescription)"
|
self.statusText = "Speak failed: \(error.localizedDescription)"
|
||||||
|
self.logger.error("speak failed: \(error.localizedDescription, privacy: .public)")
|
||||||
}
|
}
|
||||||
|
|
||||||
self.isSpeaking = false
|
self.isSpeaking = false
|
||||||
@@ -372,10 +432,12 @@ final class TalkModeManager: NSObject {
|
|||||||
let player = try AVAudioPlayer(data: data)
|
let player = try AVAudioPlayer(data: data)
|
||||||
self.player = player
|
self.player = player
|
||||||
player.prepareToPlay()
|
player.prepareToPlay()
|
||||||
|
self.logger.info("play start")
|
||||||
player.play()
|
player.play()
|
||||||
while player.isPlaying {
|
while player.isPlaying {
|
||||||
try? await Task.sleep(nanoseconds: 120_000_000)
|
try? await Task.sleep(nanoseconds: 120_000_000)
|
||||||
}
|
}
|
||||||
|
self.logger.info("play done")
|
||||||
}
|
}
|
||||||
|
|
||||||
private func stopSpeaking() {
|
private func stopSpeaking() {
|
||||||
|
|||||||
@@ -473,7 +473,10 @@ extension GatewayConnection {
|
|||||||
params["attachments"] = AnyCodable(encoded)
|
params["attachments"] = AnyCodable(encoded)
|
||||||
}
|
}
|
||||||
|
|
||||||
return try await self.requestDecoded(method: .chatSend, params: params)
|
return try await self.requestDecoded(
|
||||||
|
method: .chatSend,
|
||||||
|
params: params,
|
||||||
|
timeoutMs: Double(timeoutMs))
|
||||||
}
|
}
|
||||||
|
|
||||||
func chatAbort(sessionKey: String, runId: String) async throws -> Bool {
|
func chatAbort(sessionKey: String, runId: String) async throws -> Bool {
|
||||||
|
|||||||
@@ -293,8 +293,15 @@ public final class ClawdisChatViewModel {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if let runId = chat.runId, !self.pendingRuns.contains(runId) {
|
let isOurRun = chat.runId.flatMap { self.pendingRuns.contains($0) } ?? false
|
||||||
// Ignore events for other runs.
|
if !isOurRun {
|
||||||
|
// Keep multiple clients in sync: if another client finishes a run for our session, refresh history.
|
||||||
|
switch chat.state {
|
||||||
|
case "final", "aborted", "error":
|
||||||
|
Task { await self.refreshHistoryAfterRun() }
|
||||||
|
default:
|
||||||
|
break
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -435,7 +435,10 @@ async function waitForSystemEvent(timeoutMs = 2000) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
describe("gateway server", () => {
|
describe("gateway server", () => {
|
||||||
test("voicewake.get returns defaults and voicewake.set broadcasts", async () => {
|
test(
|
||||||
|
"voicewake.get returns defaults and voicewake.set broadcasts",
|
||||||
|
{ timeout: 15_000 },
|
||||||
|
async () => {
|
||||||
const homeDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-home-"));
|
const homeDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-home-"));
|
||||||
const prevHome = process.env.HOME;
|
const prevHome = process.env.HOME;
|
||||||
process.env.HOME = homeDir;
|
process.env.HOME = homeDir;
|
||||||
@@ -486,7 +489,8 @@ describe("gateway server", () => {
|
|||||||
} else {
|
} else {
|
||||||
process.env.HOME = prevHome;
|
process.env.HOME = prevHome;
|
||||||
}
|
}
|
||||||
});
|
},
|
||||||
|
);
|
||||||
|
|
||||||
test("models.list returns model catalog", async () => {
|
test("models.list returns model catalog", async () => {
|
||||||
piSdkMock.enabled = true;
|
piSdkMock.enabled = true;
|
||||||
@@ -3328,6 +3332,90 @@ describe("gateway server", () => {
|
|||||||
await server.close();
|
await server.close();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("bridge voice transcript triggers chat events for webchat clients", async () => {
|
||||||
|
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-gw-"));
|
||||||
|
testSessionStorePath = path.join(dir, "sessions.json");
|
||||||
|
await fs.writeFile(
|
||||||
|
testSessionStorePath,
|
||||||
|
JSON.stringify(
|
||||||
|
{
|
||||||
|
main: {
|
||||||
|
sessionId: "sess-main",
|
||||||
|
updatedAt: Date.now(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
),
|
||||||
|
"utf-8",
|
||||||
|
);
|
||||||
|
|
||||||
|
const { server, ws } = await startServerWithClient();
|
||||||
|
await connectOk(ws, {
|
||||||
|
client: {
|
||||||
|
name: "webchat",
|
||||||
|
version: "1.0.0",
|
||||||
|
platform: "test",
|
||||||
|
mode: "webchat",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const bridgeCall = bridgeStartCalls.at(-1);
|
||||||
|
expect(bridgeCall?.onEvent).toBeDefined();
|
||||||
|
|
||||||
|
const isVoiceFinalChatEvent = (o: unknown) => {
|
||||||
|
if (!o || typeof o !== "object") return false;
|
||||||
|
const rec = o as Record<string, unknown>;
|
||||||
|
if (rec.type !== "event" || rec.event !== "chat") return false;
|
||||||
|
if (!rec.payload || typeof rec.payload !== "object") return false;
|
||||||
|
const payload = rec.payload as Record<string, unknown>;
|
||||||
|
const runId = typeof payload.runId === "string" ? payload.runId : "";
|
||||||
|
const state = typeof payload.state === "string" ? payload.state : "";
|
||||||
|
return runId.startsWith("voice-") && state === "final";
|
||||||
|
};
|
||||||
|
|
||||||
|
const finalChatP = onceMessage<{
|
||||||
|
type: "event";
|
||||||
|
event: string;
|
||||||
|
payload?: unknown;
|
||||||
|
}>(ws, isVoiceFinalChatEvent, 8000);
|
||||||
|
|
||||||
|
await bridgeCall?.onEvent?.("ios-node", {
|
||||||
|
event: "voice.transcript",
|
||||||
|
payloadJSON: JSON.stringify({ text: "hello", sessionKey: "main" }),
|
||||||
|
});
|
||||||
|
|
||||||
|
emitAgentEvent({
|
||||||
|
runId: "sess-main",
|
||||||
|
seq: 1,
|
||||||
|
ts: Date.now(),
|
||||||
|
stream: "assistant",
|
||||||
|
data: { text: "hi from agent" },
|
||||||
|
});
|
||||||
|
emitAgentEvent({
|
||||||
|
runId: "sess-main",
|
||||||
|
seq: 2,
|
||||||
|
ts: Date.now(),
|
||||||
|
stream: "job",
|
||||||
|
data: { state: "done" },
|
||||||
|
});
|
||||||
|
|
||||||
|
const evt = await finalChatP;
|
||||||
|
const payload =
|
||||||
|
evt.payload && typeof evt.payload === "object"
|
||||||
|
? (evt.payload as Record<string, unknown>)
|
||||||
|
: {};
|
||||||
|
expect(payload.sessionKey).toBe("main");
|
||||||
|
const message =
|
||||||
|
payload.message && typeof payload.message === "object"
|
||||||
|
? (payload.message as Record<string, unknown>)
|
||||||
|
: {};
|
||||||
|
expect(message.role).toBe("assistant");
|
||||||
|
|
||||||
|
ws.close();
|
||||||
|
await server.close();
|
||||||
|
});
|
||||||
|
|
||||||
test("bridge chat.abort cancels while saving the session store", async () => {
|
test("bridge chat.abort cancels while saving the session store", async () => {
|
||||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-gw-"));
|
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-gw-"));
|
||||||
testSessionStorePath = path.join(dir, "sessions.json");
|
testSessionStorePath = path.join(dir, "sessions.json");
|
||||||
|
|||||||
@@ -3064,6 +3064,13 @@ export async function startGatewayServer(
|
|||||||
await saveSessionStore(storePath, store);
|
await saveSessionStore(storePath, store);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure chat UI clients refresh when this run completes (even though it wasn't started via chat.send).
|
||||||
|
// This maps agent bus events (keyed by sessionId) to chat events (keyed by clientRunId).
|
||||||
|
chatRunSessions.set(sessionId, {
|
||||||
|
sessionKey,
|
||||||
|
clientRunId: `voice-${randomUUID()}`,
|
||||||
|
});
|
||||||
|
|
||||||
void agentCommand(
|
void agentCommand(
|
||||||
{
|
{
|
||||||
message: text,
|
message: text,
|
||||||
|
|||||||
Reference in New Issue
Block a user