Coordinator: centralize voice sessions for wake and push-to-talk

This commit is contained in:
Peter Steinberger
2025-12-09 05:41:31 +01:00
parent fc1d58b631
commit 9ef1545d06
4 changed files with 168 additions and 39 deletions

View File

@@ -116,8 +116,8 @@ actor VoicePushToTalk {
self.triggerChimePlayed = false
self.finalized = false
self.timeoutTask?.cancel(); self.timeoutTask = nil
let snapshot = await MainActor.run { VoiceWakeOverlayController.shared.snapshot() }
self.adoptedPrefix = snapshot.isVisible ? snapshot.text.trimmingCharacters(in: .whitespacesAndNewlines) : ""
let snapshot = await MainActor.run { VoiceSessionCoordinator.shared.snapshot() }
self.adoptedPrefix = snapshot.visible ? snapshot.text.trimmingCharacters(in: .whitespacesAndNewlines) : ""
self.logger.info("ptt begin adopted_prefix_len=\(self.adoptedPrefix.count, privacy: .public)")
if config.triggerChime != .none {
self.triggerChimePlayed = true
@@ -131,10 +131,11 @@ actor VoicePushToTalk {
volatile: "",
isFinal: false)
self.overlayToken = await MainActor.run {
VoiceWakeOverlayController.shared.startSession(
VoiceSessionCoordinator.shared.startSession(
source: .pushToTalk,
transcript: adoptedPrefix,
attributed: adoptedAttributed)
text: adoptedPrefix,
attributed: adoptedAttributed,
forwardEnabled: true)
}
do {
@@ -222,9 +223,9 @@ actor VoicePushToTalk {
let attributed = Self.makeAttributed(committed: committedWithPrefix, volatile: self.volatile, isFinal: isFinal)
if let token = self.overlayToken {
await MainActor.run {
VoiceWakeOverlayController.shared.updatePartial(
VoiceSessionCoordinator.shared.updatePartial(
token: token,
transcript: snapshot,
text: snapshot,
attributed: attributed)
}
}
@@ -243,10 +244,6 @@ actor VoicePushToTalk {
}()
let finalText = Self.join(self.adoptedPrefix, finalRecognized)
let attributed = Self.makeAttributed(
committed: Self.join(self.adoptedPrefix, self.committed),
volatile: self.volatile,
isFinal: true)
let forward: VoiceWakeForwardConfig = if let cached = self.activeConfig?.forwardConfig {
cached
} else {
@@ -258,18 +255,15 @@ actor VoicePushToTalk {
let logger = self.logger
await MainActor.run {
logger.info("ptt finalize reason=\(reason, privacy: .public) len=\(finalText.count, privacy: .public)")
if finalText.isEmpty {
VoiceWakeOverlayController.shared.dismiss(token: token, reason: .empty)
} else if let token {
VoiceWakeOverlayController.shared.presentFinal(
if let token {
VoiceSessionCoordinator.shared.finalize(
token: token,
transcript: finalText,
text: finalText,
forwardConfig: forward,
autoSendAfter: nil,
sendChime: chime,
attributed: attributed)
VoiceWakeOverlayController.shared.sendNow(token: token, sendChime: chime)
} else {
autoSendAfter: nil)
VoiceSessionCoordinator.shared.sendNow(token: token, reason: reason)
} else if !finalText.isEmpty, forward.enabled {
if chime != .none {
VoiceWakeChimePlayer.play(chime, reason: "ptt.fallback_send")
}

View File

@@ -0,0 +1,137 @@
import AppKit
import Foundation
import OSLog
@MainActor
final class VoiceSessionCoordinator: ObservableObject {
static let shared = VoiceSessionCoordinator()
enum Source: String { case wakeWord, pushToTalk }
struct Session {
let token: UUID
let source: Source
var text: String
var attributed: NSAttributedString?
var isFinal: Bool
var forwardConfig: VoiceWakeForwardConfig?
var sendChime: VoiceWakeChime
var autoSendDelay: TimeInterval?
}
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "voicewake.coordinator")
private var session: Session?
private var autoSendTask: Task<Void, Never>?
// MARK: - API
func startSession(
source: Source,
text: String,
attributed: NSAttributedString? = nil,
forwardEnabled: Bool = false
) -> UUID {
// If a send is in-flight, ignore new sessions to avoid token churn.
if VoiceWakeOverlayController.shared.model.isSending {
self.logger.info("coordinator drop start while sending")
return self.session?.token ?? UUID()
}
let token = UUID()
self.logger.info("coordinator start token=\(token.uuidString) source=\(source.rawValue) len=\(text.count)")
let attributedText = attributed ?? VoiceWakeOverlayController.shared.makeAttributed(from: text)
self.session = Session(
token: token,
source: source,
text: text,
attributed: attributedText,
isFinal: false,
forwardConfig: forwardEnabled ? AppStateStore.shared.voiceWakeForwardConfig : nil,
sendChime: .none,
autoSendDelay: nil)
VoiceWakeOverlayController.shared.startSession(
source: VoiceWakeOverlayController.Source(rawValue: source.rawValue) ?? .wakeWord,
transcript: text,
attributed: attributedText,
forwardEnabled: forwardEnabled,
isFinal: false)
return token
}
func updatePartial(token: UUID, text: String, attributed: NSAttributedString? = nil) {
guard let session, session.token == token else { return }
self.session?.text = text
self.session?.attributed = attributed
VoiceWakeOverlayController.shared.updatePartial(token: token, transcript: text, attributed: attributed)
}
func finalize(
token: UUID,
text: String,
forwardConfig: VoiceWakeForwardConfig,
sendChime: VoiceWakeChime,
autoSendAfter: TimeInterval?)
{
guard let session, session.token == token else { return }
self.logger.info("coordinator finalize token=\(token.uuidString) len=\(text.count) autoSendAfter=\(autoSendAfter ?? -1)")
self.autoSendTask?.cancel(); self.autoSendTask = nil
self.session?.text = text
self.session?.isFinal = true
self.session?.forwardConfig = forwardConfig
self.session?.sendChime = sendChime
self.session?.autoSendDelay = autoSendAfter
let attributed = VoiceWakeOverlayController.shared.makeAttributed(from: text)
VoiceWakeOverlayController.shared.presentFinal(
token: token,
transcript: text,
forwardConfig: forwardConfig,
autoSendAfter: autoSendAfter,
sendChime: sendChime,
attributed: attributed)
}
func sendNow(token: UUID, reason: String = "explicit") {
guard let session, session.token == token else { return }
let text = session.text.trimmingCharacters(in: .whitespacesAndNewlines)
guard let forward = session.forwardConfig, forward.enabled else {
self.logger.info("coordinator sendNow \(reason) no forward config -> dismiss")
VoiceWakeOverlayController.shared.dismiss(token: token, reason: .explicit, outcome: .empty)
self.clearSession()
return
}
guard !text.isEmpty else {
self.logger.info("coordinator sendNow \(reason) empty -> dismiss")
VoiceWakeOverlayController.shared.dismiss(token: token, reason: .empty, outcome: .empty)
self.clearSession()
return
}
VoiceWakeOverlayController.shared.sendNow(token: token, sendChime: session.sendChime)
Task.detached {
_ = await VoiceWakeForwarder.forward(transcript: VoiceWakeForwarder.prefixedTranscript(text), config: forward)
}
}
func dismiss(token: UUID, reason: VoiceWakeOverlayController.DismissReason, outcome: VoiceWakeOverlayController.SendOutcome) {
guard let session, session.token == token else { return }
VoiceWakeOverlayController.shared.dismiss(token: token, reason: reason, outcome: outcome)
self.clearSession()
}
func updateLevel(token: UUID, _ level: Double) {
guard let session, session.token == token else { return }
VoiceWakeOverlayController.shared.updateLevel(token: token, level)
}
func snapshot() -> (token: UUID?, text: String, visible: Bool) {
(self.session?.token, self.session?.text ?? "", VoiceWakeOverlayController.shared.isVisible)
}
// MARK: - Private
private func clearSession() {
self.session = nil
self.autoSendTask?.cancel()
self.autoSendTask = nil
}
}

View File

@@ -417,7 +417,7 @@ final class VoiceWakeOverlayController: ObservableObject {
}
}
private func makeAttributed(from text: String) -> NSAttributedString {
func makeAttributed(from text: String) -> NSAttributedString {
NSAttributedString(
string: text,
attributes: [

View File

@@ -172,7 +172,11 @@ actor VoiceWakeRuntime {
self.overlayToken = nil
guard dismissOverlay else { return }
Task { @MainActor in
VoiceWakeOverlayController.shared.dismiss(token: token)
if let token {
VoiceSessionCoordinator.shared.dismiss(token: token, reason: .explicit, outcome: .empty)
} else {
VoiceWakeOverlayController.shared.dismiss()
}
}
}
@@ -218,9 +222,9 @@ actor VoiceWakeRuntime {
let snapshot = self.committedTranscript + self.volatileTranscript
if let token = self.overlayToken {
await MainActor.run {
VoiceWakeOverlayController.shared.updatePartial(
VoiceSessionCoordinator.shared.updatePartial(
token: token,
transcript: snapshot,
text: snapshot,
attributed: attributed)
}
}
@@ -271,10 +275,11 @@ actor VoiceWakeRuntime {
volatile: self.volatileTranscript,
isFinal: false)
self.overlayToken = await MainActor.run {
VoiceWakeOverlayController.shared.startSession(
VoiceSessionCoordinator.shared.startSession(
source: .wakeWord,
transcript: snapshot,
attributed: attributed)
text: snapshot,
attributed: attributed,
forwardEnabled: true)
}
// Keep the "ears" boosted for the capture window so the status icon animates while recording.
@@ -326,27 +331,20 @@ actor VoiceWakeRuntime {
await MainActor.run { AppStateStore.shared.stopVoiceEars() }
if let token = self.overlayToken {
await MainActor.run { VoiceWakeOverlayController.shared.updateLevel(token: token, 0) }
await MainActor.run { VoiceSessionCoordinator.shared.updateLevel(token: token, 0) }
}
let forwardConfig = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig }
// Auto-send should fire as soon as the silence threshold is satisfied (2s after speech, 5s after trigger-only).
// Keep the overlay visible during capture; once we finalize, we dispatch immediately.
let delay: TimeInterval = 0.0
let finalAttributed = Self.makeAttributed(
committed: finalTranscript,
volatile: "",
isFinal: true)
let sendChime = finalTranscript.isEmpty ? .none : config.sendChime
if let token = self.overlayToken {
await MainActor.run {
VoiceWakeOverlayController.shared.presentFinal(
VoiceSessionCoordinator.shared.finalize(
token: token,
transcript: finalTranscript,
text: finalTranscript,
forwardConfig: forwardConfig,
autoSendAfter: delay,
sendChime: sendChime,
attributed: finalAttributed)
autoSendAfter: delay)
}
} else if forwardConfig.enabled, !finalTranscript.isEmpty {
if sendChime != .none {
@@ -380,7 +378,7 @@ actor VoiceWakeRuntime {
let clamped = min(1.0, max(0.0, rms / max(self.minSpeechRMS, threshold)))
if let token = self.overlayToken {
Task { @MainActor in
VoiceWakeOverlayController.shared.updateLevel(token: token, clamped)
VoiceSessionCoordinator.shared.updateLevel(token: token, clamped)
}
}
}