Coordinator: centralize voice sessions for wake and push-to-talk
This commit is contained in:
@@ -116,8 +116,8 @@ actor VoicePushToTalk {
|
|||||||
self.triggerChimePlayed = false
|
self.triggerChimePlayed = false
|
||||||
self.finalized = false
|
self.finalized = false
|
||||||
self.timeoutTask?.cancel(); self.timeoutTask = nil
|
self.timeoutTask?.cancel(); self.timeoutTask = nil
|
||||||
let snapshot = await MainActor.run { VoiceWakeOverlayController.shared.snapshot() }
|
let snapshot = await MainActor.run { VoiceSessionCoordinator.shared.snapshot() }
|
||||||
self.adoptedPrefix = snapshot.isVisible ? snapshot.text.trimmingCharacters(in: .whitespacesAndNewlines) : ""
|
self.adoptedPrefix = snapshot.visible ? snapshot.text.trimmingCharacters(in: .whitespacesAndNewlines) : ""
|
||||||
self.logger.info("ptt begin adopted_prefix_len=\(self.adoptedPrefix.count, privacy: .public)")
|
self.logger.info("ptt begin adopted_prefix_len=\(self.adoptedPrefix.count, privacy: .public)")
|
||||||
if config.triggerChime != .none {
|
if config.triggerChime != .none {
|
||||||
self.triggerChimePlayed = true
|
self.triggerChimePlayed = true
|
||||||
@@ -131,10 +131,11 @@ actor VoicePushToTalk {
|
|||||||
volatile: "",
|
volatile: "",
|
||||||
isFinal: false)
|
isFinal: false)
|
||||||
self.overlayToken = await MainActor.run {
|
self.overlayToken = await MainActor.run {
|
||||||
VoiceWakeOverlayController.shared.startSession(
|
VoiceSessionCoordinator.shared.startSession(
|
||||||
source: .pushToTalk,
|
source: .pushToTalk,
|
||||||
transcript: adoptedPrefix,
|
text: adoptedPrefix,
|
||||||
attributed: adoptedAttributed)
|
attributed: adoptedAttributed,
|
||||||
|
forwardEnabled: true)
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@@ -222,9 +223,9 @@ actor VoicePushToTalk {
|
|||||||
let attributed = Self.makeAttributed(committed: committedWithPrefix, volatile: self.volatile, isFinal: isFinal)
|
let attributed = Self.makeAttributed(committed: committedWithPrefix, volatile: self.volatile, isFinal: isFinal)
|
||||||
if let token = self.overlayToken {
|
if let token = self.overlayToken {
|
||||||
await MainActor.run {
|
await MainActor.run {
|
||||||
VoiceWakeOverlayController.shared.updatePartial(
|
VoiceSessionCoordinator.shared.updatePartial(
|
||||||
token: token,
|
token: token,
|
||||||
transcript: snapshot,
|
text: snapshot,
|
||||||
attributed: attributed)
|
attributed: attributed)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -243,10 +244,6 @@ actor VoicePushToTalk {
|
|||||||
}()
|
}()
|
||||||
let finalText = Self.join(self.adoptedPrefix, finalRecognized)
|
let finalText = Self.join(self.adoptedPrefix, finalRecognized)
|
||||||
|
|
||||||
let attributed = Self.makeAttributed(
|
|
||||||
committed: Self.join(self.adoptedPrefix, self.committed),
|
|
||||||
volatile: self.volatile,
|
|
||||||
isFinal: true)
|
|
||||||
let forward: VoiceWakeForwardConfig = if let cached = self.activeConfig?.forwardConfig {
|
let forward: VoiceWakeForwardConfig = if let cached = self.activeConfig?.forwardConfig {
|
||||||
cached
|
cached
|
||||||
} else {
|
} else {
|
||||||
@@ -258,18 +255,15 @@ actor VoicePushToTalk {
|
|||||||
let logger = self.logger
|
let logger = self.logger
|
||||||
await MainActor.run {
|
await MainActor.run {
|
||||||
logger.info("ptt finalize reason=\(reason, privacy: .public) len=\(finalText.count, privacy: .public)")
|
logger.info("ptt finalize reason=\(reason, privacy: .public) len=\(finalText.count, privacy: .public)")
|
||||||
if finalText.isEmpty {
|
if let token {
|
||||||
VoiceWakeOverlayController.shared.dismiss(token: token, reason: .empty)
|
VoiceSessionCoordinator.shared.finalize(
|
||||||
} else if let token {
|
|
||||||
VoiceWakeOverlayController.shared.presentFinal(
|
|
||||||
token: token,
|
token: token,
|
||||||
transcript: finalText,
|
text: finalText,
|
||||||
forwardConfig: forward,
|
forwardConfig: forward,
|
||||||
autoSendAfter: nil,
|
|
||||||
sendChime: chime,
|
sendChime: chime,
|
||||||
attributed: attributed)
|
autoSendAfter: nil)
|
||||||
VoiceWakeOverlayController.shared.sendNow(token: token, sendChime: chime)
|
VoiceSessionCoordinator.shared.sendNow(token: token, reason: reason)
|
||||||
} else {
|
} else if !finalText.isEmpty, forward.enabled {
|
||||||
if chime != .none {
|
if chime != .none {
|
||||||
VoiceWakeChimePlayer.play(chime, reason: "ptt.fallback_send")
|
VoiceWakeChimePlayer.play(chime, reason: "ptt.fallback_send")
|
||||||
}
|
}
|
||||||
|
|||||||
137
apps/macos/Sources/Clawdis/VoiceSessionCoordinator.swift
Normal file
137
apps/macos/Sources/Clawdis/VoiceSessionCoordinator.swift
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
import AppKit
|
||||||
|
import Foundation
|
||||||
|
import OSLog
|
||||||
|
|
||||||
|
@MainActor
|
||||||
|
final class VoiceSessionCoordinator: ObservableObject {
|
||||||
|
static let shared = VoiceSessionCoordinator()
|
||||||
|
|
||||||
|
enum Source: String { case wakeWord, pushToTalk }
|
||||||
|
|
||||||
|
struct Session {
|
||||||
|
let token: UUID
|
||||||
|
let source: Source
|
||||||
|
var text: String
|
||||||
|
var attributed: NSAttributedString?
|
||||||
|
var isFinal: Bool
|
||||||
|
var forwardConfig: VoiceWakeForwardConfig?
|
||||||
|
var sendChime: VoiceWakeChime
|
||||||
|
var autoSendDelay: TimeInterval?
|
||||||
|
}
|
||||||
|
|
||||||
|
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "voicewake.coordinator")
|
||||||
|
private var session: Session?
|
||||||
|
private var autoSendTask: Task<Void, Never>?
|
||||||
|
|
||||||
|
// MARK: - API
|
||||||
|
|
||||||
|
func startSession(
|
||||||
|
source: Source,
|
||||||
|
text: String,
|
||||||
|
attributed: NSAttributedString? = nil,
|
||||||
|
forwardEnabled: Bool = false
|
||||||
|
) -> UUID {
|
||||||
|
// If a send is in-flight, ignore new sessions to avoid token churn.
|
||||||
|
if VoiceWakeOverlayController.shared.model.isSending {
|
||||||
|
self.logger.info("coordinator drop start while sending")
|
||||||
|
return self.session?.token ?? UUID()
|
||||||
|
}
|
||||||
|
|
||||||
|
let token = UUID()
|
||||||
|
self.logger.info("coordinator start token=\(token.uuidString) source=\(source.rawValue) len=\(text.count)")
|
||||||
|
let attributedText = attributed ?? VoiceWakeOverlayController.shared.makeAttributed(from: text)
|
||||||
|
self.session = Session(
|
||||||
|
token: token,
|
||||||
|
source: source,
|
||||||
|
text: text,
|
||||||
|
attributed: attributedText,
|
||||||
|
isFinal: false,
|
||||||
|
forwardConfig: forwardEnabled ? AppStateStore.shared.voiceWakeForwardConfig : nil,
|
||||||
|
sendChime: .none,
|
||||||
|
autoSendDelay: nil)
|
||||||
|
VoiceWakeOverlayController.shared.startSession(
|
||||||
|
source: VoiceWakeOverlayController.Source(rawValue: source.rawValue) ?? .wakeWord,
|
||||||
|
transcript: text,
|
||||||
|
attributed: attributedText,
|
||||||
|
forwardEnabled: forwardEnabled,
|
||||||
|
isFinal: false)
|
||||||
|
return token
|
||||||
|
}
|
||||||
|
|
||||||
|
func updatePartial(token: UUID, text: String, attributed: NSAttributedString? = nil) {
|
||||||
|
guard let session, session.token == token else { return }
|
||||||
|
self.session?.text = text
|
||||||
|
self.session?.attributed = attributed
|
||||||
|
VoiceWakeOverlayController.shared.updatePartial(token: token, transcript: text, attributed: attributed)
|
||||||
|
}
|
||||||
|
|
||||||
|
func finalize(
|
||||||
|
token: UUID,
|
||||||
|
text: String,
|
||||||
|
forwardConfig: VoiceWakeForwardConfig,
|
||||||
|
sendChime: VoiceWakeChime,
|
||||||
|
autoSendAfter: TimeInterval?)
|
||||||
|
{
|
||||||
|
guard let session, session.token == token else { return }
|
||||||
|
self.logger.info("coordinator finalize token=\(token.uuidString) len=\(text.count) autoSendAfter=\(autoSendAfter ?? -1)")
|
||||||
|
self.autoSendTask?.cancel(); self.autoSendTask = nil
|
||||||
|
self.session?.text = text
|
||||||
|
self.session?.isFinal = true
|
||||||
|
self.session?.forwardConfig = forwardConfig
|
||||||
|
self.session?.sendChime = sendChime
|
||||||
|
self.session?.autoSendDelay = autoSendAfter
|
||||||
|
|
||||||
|
let attributed = VoiceWakeOverlayController.shared.makeAttributed(from: text)
|
||||||
|
VoiceWakeOverlayController.shared.presentFinal(
|
||||||
|
token: token,
|
||||||
|
transcript: text,
|
||||||
|
forwardConfig: forwardConfig,
|
||||||
|
autoSendAfter: autoSendAfter,
|
||||||
|
sendChime: sendChime,
|
||||||
|
attributed: attributed)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sendNow(token: UUID, reason: String = "explicit") {
|
||||||
|
guard let session, session.token == token else { return }
|
||||||
|
let text = session.text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
|
guard let forward = session.forwardConfig, forward.enabled else {
|
||||||
|
self.logger.info("coordinator sendNow \(reason) no forward config -> dismiss")
|
||||||
|
VoiceWakeOverlayController.shared.dismiss(token: token, reason: .explicit, outcome: .empty)
|
||||||
|
self.clearSession()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
guard !text.isEmpty else {
|
||||||
|
self.logger.info("coordinator sendNow \(reason) empty -> dismiss")
|
||||||
|
VoiceWakeOverlayController.shared.dismiss(token: token, reason: .empty, outcome: .empty)
|
||||||
|
self.clearSession()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
VoiceWakeOverlayController.shared.sendNow(token: token, sendChime: session.sendChime)
|
||||||
|
Task.detached {
|
||||||
|
_ = await VoiceWakeForwarder.forward(transcript: VoiceWakeForwarder.prefixedTranscript(text), config: forward)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func dismiss(token: UUID, reason: VoiceWakeOverlayController.DismissReason, outcome: VoiceWakeOverlayController.SendOutcome) {
|
||||||
|
guard let session, session.token == token else { return }
|
||||||
|
VoiceWakeOverlayController.shared.dismiss(token: token, reason: reason, outcome: outcome)
|
||||||
|
self.clearSession()
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateLevel(token: UUID, _ level: Double) {
|
||||||
|
guard let session, session.token == token else { return }
|
||||||
|
VoiceWakeOverlayController.shared.updateLevel(token: token, level)
|
||||||
|
}
|
||||||
|
|
||||||
|
func snapshot() -> (token: UUID?, text: String, visible: Bool) {
|
||||||
|
(self.session?.token, self.session?.text ?? "", VoiceWakeOverlayController.shared.isVisible)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Private
|
||||||
|
|
||||||
|
private func clearSession() {
|
||||||
|
self.session = nil
|
||||||
|
self.autoSendTask?.cancel()
|
||||||
|
self.autoSendTask = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -417,7 +417,7 @@ final class VoiceWakeOverlayController: ObservableObject {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private func makeAttributed(from text: String) -> NSAttributedString {
|
func makeAttributed(from text: String) -> NSAttributedString {
|
||||||
NSAttributedString(
|
NSAttributedString(
|
||||||
string: text,
|
string: text,
|
||||||
attributes: [
|
attributes: [
|
||||||
|
|||||||
@@ -172,7 +172,11 @@ actor VoiceWakeRuntime {
|
|||||||
self.overlayToken = nil
|
self.overlayToken = nil
|
||||||
guard dismissOverlay else { return }
|
guard dismissOverlay else { return }
|
||||||
Task { @MainActor in
|
Task { @MainActor in
|
||||||
VoiceWakeOverlayController.shared.dismiss(token: token)
|
if let token {
|
||||||
|
VoiceSessionCoordinator.shared.dismiss(token: token, reason: .explicit, outcome: .empty)
|
||||||
|
} else {
|
||||||
|
VoiceWakeOverlayController.shared.dismiss()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,9 +222,9 @@ actor VoiceWakeRuntime {
|
|||||||
let snapshot = self.committedTranscript + self.volatileTranscript
|
let snapshot = self.committedTranscript + self.volatileTranscript
|
||||||
if let token = self.overlayToken {
|
if let token = self.overlayToken {
|
||||||
await MainActor.run {
|
await MainActor.run {
|
||||||
VoiceWakeOverlayController.shared.updatePartial(
|
VoiceSessionCoordinator.shared.updatePartial(
|
||||||
token: token,
|
token: token,
|
||||||
transcript: snapshot,
|
text: snapshot,
|
||||||
attributed: attributed)
|
attributed: attributed)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -271,10 +275,11 @@ actor VoiceWakeRuntime {
|
|||||||
volatile: self.volatileTranscript,
|
volatile: self.volatileTranscript,
|
||||||
isFinal: false)
|
isFinal: false)
|
||||||
self.overlayToken = await MainActor.run {
|
self.overlayToken = await MainActor.run {
|
||||||
VoiceWakeOverlayController.shared.startSession(
|
VoiceSessionCoordinator.shared.startSession(
|
||||||
source: .wakeWord,
|
source: .wakeWord,
|
||||||
transcript: snapshot,
|
text: snapshot,
|
||||||
attributed: attributed)
|
attributed: attributed,
|
||||||
|
forwardEnabled: true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Keep the "ears" boosted for the capture window so the status icon animates while recording.
|
// Keep the "ears" boosted for the capture window so the status icon animates while recording.
|
||||||
@@ -326,27 +331,20 @@ actor VoiceWakeRuntime {
|
|||||||
|
|
||||||
await MainActor.run { AppStateStore.shared.stopVoiceEars() }
|
await MainActor.run { AppStateStore.shared.stopVoiceEars() }
|
||||||
if let token = self.overlayToken {
|
if let token = self.overlayToken {
|
||||||
await MainActor.run { VoiceWakeOverlayController.shared.updateLevel(token: token, 0) }
|
await MainActor.run { VoiceSessionCoordinator.shared.updateLevel(token: token, 0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
let forwardConfig = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig }
|
let forwardConfig = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig }
|
||||||
// Auto-send should fire as soon as the silence threshold is satisfied (2s after speech, 5s after trigger-only).
|
|
||||||
// Keep the overlay visible during capture; once we finalize, we dispatch immediately.
|
|
||||||
let delay: TimeInterval = 0.0
|
let delay: TimeInterval = 0.0
|
||||||
let finalAttributed = Self.makeAttributed(
|
|
||||||
committed: finalTranscript,
|
|
||||||
volatile: "",
|
|
||||||
isFinal: true)
|
|
||||||
let sendChime = finalTranscript.isEmpty ? .none : config.sendChime
|
let sendChime = finalTranscript.isEmpty ? .none : config.sendChime
|
||||||
if let token = self.overlayToken {
|
if let token = self.overlayToken {
|
||||||
await MainActor.run {
|
await MainActor.run {
|
||||||
VoiceWakeOverlayController.shared.presentFinal(
|
VoiceSessionCoordinator.shared.finalize(
|
||||||
token: token,
|
token: token,
|
||||||
transcript: finalTranscript,
|
text: finalTranscript,
|
||||||
forwardConfig: forwardConfig,
|
forwardConfig: forwardConfig,
|
||||||
autoSendAfter: delay,
|
|
||||||
sendChime: sendChime,
|
sendChime: sendChime,
|
||||||
attributed: finalAttributed)
|
autoSendAfter: delay)
|
||||||
}
|
}
|
||||||
} else if forwardConfig.enabled, !finalTranscript.isEmpty {
|
} else if forwardConfig.enabled, !finalTranscript.isEmpty {
|
||||||
if sendChime != .none {
|
if sendChime != .none {
|
||||||
@@ -380,7 +378,7 @@ actor VoiceWakeRuntime {
|
|||||||
let clamped = min(1.0, max(0.0, rms / max(self.minSpeechRMS, threshold)))
|
let clamped = min(1.0, max(0.0, rms / max(self.minSpeechRMS, threshold)))
|
||||||
if let token = self.overlayToken {
|
if let token = self.overlayToken {
|
||||||
Task { @MainActor in
|
Task { @MainActor in
|
||||||
VoiceWakeOverlayController.shared.updateLevel(token: token, clamped)
|
VoiceSessionCoordinator.shared.updateLevel(token: token, clamped)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user