diff --git a/apps/macos/Sources/Clawdis/VoicePushToTalk.swift b/apps/macos/Sources/Clawdis/VoicePushToTalk.swift index 4988a64b8..2f93e1b5e 100644 --- a/apps/macos/Sources/Clawdis/VoicePushToTalk.swift +++ b/apps/macos/Sources/Clawdis/VoicePushToTalk.swift @@ -151,6 +151,9 @@ actor VoicePushToTalk { VoiceWakeOverlayController.shared.dismiss() } self.isCapturing = false + // If push-to-talk fails to start after pausing wake-word, ensure we resume listening. + await VoiceWakeRuntime.shared.applyPushToTalkCooldown() + await VoiceWakeRuntime.shared.refresh(state: AppStateStore.shared) } } diff --git a/apps/macos/Sources/Clawdis/VoiceSessionCoordinator.swift b/apps/macos/Sources/Clawdis/VoiceSessionCoordinator.swift index f0c2dc8d1..0bf00713c 100644 --- a/apps/macos/Sources/Clawdis/VoiceSessionCoordinator.swift +++ b/apps/macos/Sources/Clawdis/VoiceSessionCoordinator.swift @@ -29,12 +29,6 @@ final class VoiceSessionCoordinator: ObservableObject { attributed: NSAttributedString? = nil, forwardEnabled: Bool = false) -> UUID { - // If a send is in-flight, ignore new sessions to avoid token churn. - if VoiceWakeOverlayController.shared.model.isSending { - self.logger.info("coordinator drop start while sending") - return self.session?.token ?? UUID() - } - let token = UUID() self.logger.info("coordinator start token=\(token.uuidString) source=\(source.rawValue) len=\(text.count)") let attributedText = attributed ?? VoiceWakeOverlayController.shared.makeAttributed(from: text) @@ -127,4 +121,13 @@ final class VoiceSessionCoordinator: ObservableObject { private func clearSession() { self.session = nil } + + /// Overlay dismiss completion callback (manual X, empty, auto-dismiss after send). + /// Ensures the wake-word recognizer is resumed if Voice Wake is enabled. + func overlayDidDismiss(token: UUID?) { + if let token, self.session?.token == token { + self.clearSession() + } + Task { await VoiceWakeRuntime.shared.refresh(state: AppStateStore.shared) } + } } diff --git a/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift b/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift index dd6bf9a0e..f409aa02f 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift @@ -52,10 +52,6 @@ final class VoiceWakeOverlayController: ObservableObject { forwardEnabled: Bool = false, isFinal: Bool = false) -> UUID { - if self.model.isSending { - self.logger.log(level: .info, "overlay drop session_start while sending") - return self.activeToken ?? UUID() - } let message = """ overlay session_start source=\(source.rawValue) \ len=\(transcript.count) @@ -218,6 +214,7 @@ final class VoiceWakeOverlayController: ObservableObject { window.animator().alphaValue = 0 } completionHandler: { Task { @MainActor in + let dismissedToken = self.activeToken window.orderOut(nil) self.model.isVisible = false self.model.level = 0 @@ -229,6 +226,7 @@ final class VoiceWakeOverlayController: ObservableObject { AppStateStore.shared.celebrateSend() } AppStateStore.shared.stopVoiceEars() + VoiceSessionCoordinator.shared.overlayDidDismiss(token: dismissedToken) } } } diff --git a/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift b/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift index 21f8d616b..2d7f4591a 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift @@ -404,8 +404,6 @@ actor VoiceWakeRuntime { private func restartRecognizerIfIdleAndOverlayHidden() async { if self.isCapturing { return } - let overlayVisible = await MainActor.run { VoiceWakeOverlayController.shared.isVisible } - if overlayVisible { return } self.restartRecognizer() } diff --git a/docs/mac/voicewake.md b/docs/mac/voicewake.md index e95440c76..f423d356e 100644 --- a/docs/mac/voicewake.md +++ b/docs/mac/voicewake.md @@ -19,6 +19,17 @@ Updated: 2025-12-12 · Owners: mac app - Overlay is driven via `VoiceWakeOverlayController` with committed/volatile coloring. - After send, recognizer restarts cleanly to listen for the next trigger. +## Lifecycle invariants +- If Voice Wake is enabled and permissions are granted, the wake-word recognizer should be listening (except during an explicit push-to-talk capture). +- Overlay visibility (including manual dismiss via the X button) must never prevent the recognizer from resuming. + +## Sticky overlay failure mode (previous) +Previously, if the overlay got stuck visible and you manually closed it, Voice Wake could appear “dead” because the runtime’s restart attempt could be blocked by overlay visibility and no subsequent restart was scheduled. + +Hardening: +- Wake runtime restart is no longer blocked by overlay visibility. +- Overlay dismiss completion triggers a `VoiceWakeRuntime.refresh(...)` via `VoiceSessionCoordinator`, so manual X-dismiss always resumes listening. + ## Push-to-talk specifics - Hotkey detection uses a global `.flagsChanged` monitor for **right Option** (`keyCode 61` + `.option`). We only observe events (no swallowing). - Capture pipeline lives in `VoicePushToTalk`: starts Speech immediately, streams partials to the overlay, and calls `VoiceWakeForwarder` on release.