diff --git a/apps/macos/Sources/Clawdis/VoicePushToTalk.swift b/apps/macos/Sources/Clawdis/VoicePushToTalk.swift index ff3d98cf6..8c2ef24c0 100644 --- a/apps/macos/Sources/Clawdis/VoicePushToTalk.swift +++ b/apps/macos/Sources/Clawdis/VoicePushToTalk.swift @@ -116,8 +116,8 @@ actor VoicePushToTalk { self.triggerChimePlayed = false self.finalized = false self.timeoutTask?.cancel(); self.timeoutTask = nil - let snapshot = await MainActor.run { VoiceWakeOverlayController.shared.snapshot() } - self.adoptedPrefix = snapshot.isVisible ? snapshot.text.trimmingCharacters(in: .whitespacesAndNewlines) : "" + let snapshot = await MainActor.run { VoiceSessionCoordinator.shared.snapshot() } + self.adoptedPrefix = snapshot.visible ? snapshot.text.trimmingCharacters(in: .whitespacesAndNewlines) : "" self.logger.info("ptt begin adopted_prefix_len=\(self.adoptedPrefix.count, privacy: .public)") if config.triggerChime != .none { self.triggerChimePlayed = true @@ -131,10 +131,11 @@ actor VoicePushToTalk { volatile: "", isFinal: false) self.overlayToken = await MainActor.run { - VoiceWakeOverlayController.shared.startSession( + VoiceSessionCoordinator.shared.startSession( source: .pushToTalk, - transcript: adoptedPrefix, - attributed: adoptedAttributed) + text: adoptedPrefix, + attributed: adoptedAttributed, + forwardEnabled: true) } do { @@ -222,9 +223,9 @@ actor VoicePushToTalk { let attributed = Self.makeAttributed(committed: committedWithPrefix, volatile: self.volatile, isFinal: isFinal) if let token = self.overlayToken { await MainActor.run { - VoiceWakeOverlayController.shared.updatePartial( + VoiceSessionCoordinator.shared.updatePartial( token: token, - transcript: snapshot, + text: snapshot, attributed: attributed) } } @@ -243,10 +244,6 @@ actor VoicePushToTalk { }() let finalText = Self.join(self.adoptedPrefix, finalRecognized) - let attributed = Self.makeAttributed( - committed: Self.join(self.adoptedPrefix, self.committed), - volatile: self.volatile, - isFinal: true) let forward: VoiceWakeForwardConfig = if let cached = self.activeConfig?.forwardConfig { cached } else { @@ -258,18 +255,15 @@ actor VoicePushToTalk { let logger = self.logger await MainActor.run { logger.info("ptt finalize reason=\(reason, privacy: .public) len=\(finalText.count, privacy: .public)") - if finalText.isEmpty { - VoiceWakeOverlayController.shared.dismiss(token: token, reason: .empty) - } else if let token { - VoiceWakeOverlayController.shared.presentFinal( + if let token { + VoiceSessionCoordinator.shared.finalize( token: token, - transcript: finalText, + text: finalText, forwardConfig: forward, - autoSendAfter: nil, sendChime: chime, - attributed: attributed) - VoiceWakeOverlayController.shared.sendNow(token: token, sendChime: chime) - } else { + autoSendAfter: nil) + VoiceSessionCoordinator.shared.sendNow(token: token, reason: reason) + } else if !finalText.isEmpty, forward.enabled { if chime != .none { VoiceWakeChimePlayer.play(chime, reason: "ptt.fallback_send") } diff --git a/apps/macos/Sources/Clawdis/VoiceSessionCoordinator.swift b/apps/macos/Sources/Clawdis/VoiceSessionCoordinator.swift new file mode 100644 index 000000000..75f4db841 --- /dev/null +++ b/apps/macos/Sources/Clawdis/VoiceSessionCoordinator.swift @@ -0,0 +1,137 @@ +import AppKit +import Foundation +import OSLog + +@MainActor +final class VoiceSessionCoordinator: ObservableObject { + static let shared = VoiceSessionCoordinator() + + enum Source: String { case wakeWord, pushToTalk } + + struct Session { + let token: UUID + let source: Source + var text: String + var attributed: NSAttributedString? + var isFinal: Bool + var forwardConfig: VoiceWakeForwardConfig? + var sendChime: VoiceWakeChime + var autoSendDelay: TimeInterval? + } + + private let logger = Logger(subsystem: "com.steipete.clawdis", category: "voicewake.coordinator") + private var session: Session? + private var autoSendTask: Task? + + // MARK: - API + + func startSession( + source: Source, + text: String, + attributed: NSAttributedString? = nil, + forwardEnabled: Bool = false + ) -> UUID { + // If a send is in-flight, ignore new sessions to avoid token churn. + if VoiceWakeOverlayController.shared.model.isSending { + self.logger.info("coordinator drop start while sending") + return self.session?.token ?? UUID() + } + + let token = UUID() + self.logger.info("coordinator start token=\(token.uuidString) source=\(source.rawValue) len=\(text.count)") + let attributedText = attributed ?? VoiceWakeOverlayController.shared.makeAttributed(from: text) + self.session = Session( + token: token, + source: source, + text: text, + attributed: attributedText, + isFinal: false, + forwardConfig: forwardEnabled ? AppStateStore.shared.voiceWakeForwardConfig : nil, + sendChime: .none, + autoSendDelay: nil) + VoiceWakeOverlayController.shared.startSession( + source: VoiceWakeOverlayController.Source(rawValue: source.rawValue) ?? .wakeWord, + transcript: text, + attributed: attributedText, + forwardEnabled: forwardEnabled, + isFinal: false) + return token + } + + func updatePartial(token: UUID, text: String, attributed: NSAttributedString? = nil) { + guard let session, session.token == token else { return } + self.session?.text = text + self.session?.attributed = attributed + VoiceWakeOverlayController.shared.updatePartial(token: token, transcript: text, attributed: attributed) + } + + func finalize( + token: UUID, + text: String, + forwardConfig: VoiceWakeForwardConfig, + sendChime: VoiceWakeChime, + autoSendAfter: TimeInterval?) + { + guard let session, session.token == token else { return } + self.logger.info("coordinator finalize token=\(token.uuidString) len=\(text.count) autoSendAfter=\(autoSendAfter ?? -1)") + self.autoSendTask?.cancel(); self.autoSendTask = nil + self.session?.text = text + self.session?.isFinal = true + self.session?.forwardConfig = forwardConfig + self.session?.sendChime = sendChime + self.session?.autoSendDelay = autoSendAfter + + let attributed = VoiceWakeOverlayController.shared.makeAttributed(from: text) + VoiceWakeOverlayController.shared.presentFinal( + token: token, + transcript: text, + forwardConfig: forwardConfig, + autoSendAfter: autoSendAfter, + sendChime: sendChime, + attributed: attributed) + } + + func sendNow(token: UUID, reason: String = "explicit") { + guard let session, session.token == token else { return } + let text = session.text.trimmingCharacters(in: .whitespacesAndNewlines) + guard let forward = session.forwardConfig, forward.enabled else { + self.logger.info("coordinator sendNow \(reason) no forward config -> dismiss") + VoiceWakeOverlayController.shared.dismiss(token: token, reason: .explicit, outcome: .empty) + self.clearSession() + return + } + guard !text.isEmpty else { + self.logger.info("coordinator sendNow \(reason) empty -> dismiss") + VoiceWakeOverlayController.shared.dismiss(token: token, reason: .empty, outcome: .empty) + self.clearSession() + return + } + VoiceWakeOverlayController.shared.sendNow(token: token, sendChime: session.sendChime) + Task.detached { + _ = await VoiceWakeForwarder.forward(transcript: VoiceWakeForwarder.prefixedTranscript(text), config: forward) + } + } + + func dismiss(token: UUID, reason: VoiceWakeOverlayController.DismissReason, outcome: VoiceWakeOverlayController.SendOutcome) { + guard let session, session.token == token else { return } + VoiceWakeOverlayController.shared.dismiss(token: token, reason: reason, outcome: outcome) + self.clearSession() + } + + func updateLevel(token: UUID, _ level: Double) { + guard let session, session.token == token else { return } + VoiceWakeOverlayController.shared.updateLevel(token: token, level) + } + + func snapshot() -> (token: UUID?, text: String, visible: Bool) { + (self.session?.token, self.session?.text ?? "", VoiceWakeOverlayController.shared.isVisible) + } + + // MARK: - Private + + private func clearSession() { + self.session = nil + self.autoSendTask?.cancel() + self.autoSendTask = nil + } +} diff --git a/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift b/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift index 9a1fbc0c1..1a2c1dfc1 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift @@ -417,7 +417,7 @@ final class VoiceWakeOverlayController: ObservableObject { } } - private func makeAttributed(from text: String) -> NSAttributedString { + func makeAttributed(from text: String) -> NSAttributedString { NSAttributedString( string: text, attributes: [ diff --git a/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift b/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift index e9c4f5edc..e11130574 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift @@ -172,7 +172,11 @@ actor VoiceWakeRuntime { self.overlayToken = nil guard dismissOverlay else { return } Task { @MainActor in - VoiceWakeOverlayController.shared.dismiss(token: token) + if let token { + VoiceSessionCoordinator.shared.dismiss(token: token, reason: .explicit, outcome: .empty) + } else { + VoiceWakeOverlayController.shared.dismiss() + } } } @@ -218,9 +222,9 @@ actor VoiceWakeRuntime { let snapshot = self.committedTranscript + self.volatileTranscript if let token = self.overlayToken { await MainActor.run { - VoiceWakeOverlayController.shared.updatePartial( + VoiceSessionCoordinator.shared.updatePartial( token: token, - transcript: snapshot, + text: snapshot, attributed: attributed) } } @@ -271,10 +275,11 @@ actor VoiceWakeRuntime { volatile: self.volatileTranscript, isFinal: false) self.overlayToken = await MainActor.run { - VoiceWakeOverlayController.shared.startSession( + VoiceSessionCoordinator.shared.startSession( source: .wakeWord, - transcript: snapshot, - attributed: attributed) + text: snapshot, + attributed: attributed, + forwardEnabled: true) } // Keep the "ears" boosted for the capture window so the status icon animates while recording. @@ -326,27 +331,20 @@ actor VoiceWakeRuntime { await MainActor.run { AppStateStore.shared.stopVoiceEars() } if let token = self.overlayToken { - await MainActor.run { VoiceWakeOverlayController.shared.updateLevel(token: token, 0) } + await MainActor.run { VoiceSessionCoordinator.shared.updateLevel(token: token, 0) } } let forwardConfig = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig } - // Auto-send should fire as soon as the silence threshold is satisfied (2s after speech, 5s after trigger-only). - // Keep the overlay visible during capture; once we finalize, we dispatch immediately. let delay: TimeInterval = 0.0 - let finalAttributed = Self.makeAttributed( - committed: finalTranscript, - volatile: "", - isFinal: true) let sendChime = finalTranscript.isEmpty ? .none : config.sendChime if let token = self.overlayToken { await MainActor.run { - VoiceWakeOverlayController.shared.presentFinal( + VoiceSessionCoordinator.shared.finalize( token: token, - transcript: finalTranscript, + text: finalTranscript, forwardConfig: forwardConfig, - autoSendAfter: delay, sendChime: sendChime, - attributed: finalAttributed) + autoSendAfter: delay) } } else if forwardConfig.enabled, !finalTranscript.isEmpty { if sendChime != .none { @@ -380,7 +378,7 @@ actor VoiceWakeRuntime { let clamped = min(1.0, max(0.0, rms / max(self.minSpeechRMS, threshold))) if let token = self.overlayToken { Task { @MainActor in - VoiceWakeOverlayController.shared.updateLevel(token: token, clamped) + VoiceSessionCoordinator.shared.updateLevel(token: token, clamped) } } }