From 764761cfa510f617676edd4fed443a3736859a7c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 8 Dec 2025 16:29:13 +0100 Subject: [PATCH] feat: add voice wake overlay --- .../Sources/Clawdis/VoiceWakeOverlay.swift | 291 ++++++++++++++++++ .../Sources/Clawdis/VoiceWakeRuntime.swift | 39 ++- 2 files changed, 317 insertions(+), 13 deletions(-) create mode 100644 apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift diff --git a/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift b/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift new file mode 100644 index 000000000..1d27c67e3 --- /dev/null +++ b/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift @@ -0,0 +1,291 @@ +import AppKit +import SwiftUI + +/// Lightweight, borderless panel that shows the current voice wake transcript near the menu bar. +@MainActor +final class VoiceWakeOverlayController: ObservableObject { + static let shared = VoiceWakeOverlayController() + + @Published private(set) var model = Model() + + struct Model { + var text: String = "" + var isFinal: Bool = false + var isVisible: Bool = false + var forwardEnabled: Bool = false + } + + private var window: NSPanel? + private var hostingView: NSHostingView? + private var autoSendTask: Task? + private var forwardConfig: VoiceWakeForwardConfig? + + private let width: CGFloat = 360 + private let padding: CGFloat = 10 + + func showPartial(transcript: String) { + self.autoSendTask?.cancel() + self.forwardConfig = nil + self.model.text = transcript + self.model.isFinal = false + self.model.forwardEnabled = false + self.present() + } + + func presentFinal(transcript: String, forwardConfig: VoiceWakeForwardConfig) { + self.autoSendTask?.cancel() + self.forwardConfig = forwardConfig + self.model.text = transcript + self.model.isFinal = true + self.model.forwardEnabled = forwardConfig.enabled + self.present() + self.scheduleAutoSend() + } + + func userBeganEditing() { + self.autoSendTask?.cancel() + } + + func updateText(_ text: String) { + self.model.text = text + } + + func sendNow() { + self.autoSendTask?.cancel() + guard let forwardConfig, forwardConfig.enabled else { + self.dismiss() + return + } + let text = self.model.text.trimmingCharacters(in: .whitespacesAndNewlines) + guard !text.isEmpty else { + self.dismiss() + return + } + + let payload = VoiceWakeForwarder.prefixedTranscript(text) + Task.detached { + await VoiceWakeForwarder.forward(transcript: payload, config: forwardConfig) + } + self.dismiss() + } + + func dismiss(reason: DismissReason = .explicit) { + self.autoSendTask?.cancel() + guard let window else { return } + NSAnimationContext.runAnimationGroup { context in + context.duration = 0.18 + window.animator().alphaValue = 0 + } completionHandler: { + Task { @MainActor in + window.orderOut(nil) + self.model.isVisible = false + } + } + } + + enum DismissReason { case explicit, empty } + + // MARK: - Private + + private func present() { + self.ensureWindow() + self.hostingView?.rootView = VoiceWakeOverlayView(controller: self) + self.updateWindowFrame() + + guard let window else { return } + if !self.model.isVisible { + self.model.isVisible = true + window.alphaValue = 0 + window.orderFrontRegardless() + NSAnimationContext.runAnimationGroup { context in + context.duration = 0.18 + window.animator().alphaValue = 1 + } + } else { + window.orderFrontRegardless() + } + } + + private func ensureWindow() { + if self.window != nil { return } + let panel = NSPanel( + contentRect: NSRect(x: 0, y: 0, width: self.width, height: 60), + styleMask: [.nonactivatingPanel, .borderless], + backing: .buffered, + defer: false) + panel.isOpaque = false + panel.backgroundColor = .clear + panel.hasShadow = true + panel.level = .statusBar + panel.collectionBehavior = [.canJoinAllSpaces, .fullScreenAuxiliary, .transient] + panel.hidesOnDeactivate = false + panel.isMovable = false + panel.isFloatingPanel = true + panel.becomesKeyOnlyIfNeeded = true + panel.titleVisibility = .hidden + panel.titlebarAppearsTransparent = true + + let host = NSHostingView(rootView: VoiceWakeOverlayView(controller: self)) + host.translatesAutoresizingMaskIntoConstraints = false + panel.contentView = host + self.hostingView = host + self.window = panel + } + + private func updateWindowFrame() { + guard let screen = NSScreen.main, let window, let host = self.hostingView else { return } + let fit = host.fittingSize + let height = max(42, min(fit.height, 140)) + let size = NSSize(width: self.width, height: height) + let visible = screen.visibleFrame + let origin = CGPoint( + x: visible.maxX - size.width - self.padding, + y: visible.maxY - size.height - self.padding) + window.setFrame(NSRect(origin: origin, size: size), display: true, animate: false) + } + + private func scheduleAutoSend() { + guard let forwardConfig, forwardConfig.enabled else { return } + self.autoSendTask = Task { [weak self] in + try? await Task.sleep(nanoseconds: 250_000_000) + self?.sendNow() + } + } +} + +private struct VoiceWakeOverlayView: View { + @ObservedObject var controller: VoiceWakeOverlayController + @FocusState private var focused: Bool + + var body: some View { + HStack(spacing: 8) { + TranscriptTextView( + text: Binding( + get: { self.controller.model.text }, + set: { self.controller.updateText($0) }), + isFinal: self.controller.model.isFinal, + onBeginEditing: { + self.controller.userBeganEditing() + }, + onSend: { + self.controller.sendNow() + }) + .focused(self.$focused) + .frame(minHeight: 32, maxHeight: 80) + + Button { + self.controller.sendNow() + } label: { + Image(systemName: "paperplane.fill") + .imageScale(.small) + .padding(.vertical, 6) + .padding(.horizontal, 10) + .background(Color.accentColor.opacity(0.12)) + .clipShape(RoundedRectangle(cornerRadius: 8, style: .continuous)) + } + .buttonStyle(.plain) + .disabled(!self.controller.model.forwardEnabled) + .keyboardShortcut(.return, modifiers: [.command]) + } + .padding(.vertical, 8) + .padding(.horizontal, 10) + .background(.regularMaterial) + .clipShape(RoundedRectangle(cornerRadius: 12, style: .continuous)) + .shadow(color: .black.opacity(0.15), radius: 12, y: 6) + .onAppear { self.focused = false } + .onChange(of: self.controller.model.text) { _, _ in + self.focused = false + } + .onChange(of: self.controller.model.isVisible) { _, visible in + if visible { self.focused = false } + } + } +} + +private struct TranscriptTextView: NSViewRepresentable { + @Binding var text: String + var isFinal: Bool + var onBeginEditing: () -> Void + var onSend: () -> Void + + func makeCoordinator() -> Coordinator { Coordinator(self) } + + func makeNSView(context: Context) -> NSScrollView { + let textView = TranscriptNSTextView() + textView.delegate = context.coordinator + textView.drawsBackground = false + textView.isRichText = false + textView.isAutomaticQuoteSubstitutionEnabled = false + textView.isAutomaticTextReplacementEnabled = false + textView.font = .systemFont(ofSize: 13, weight: .regular) + textView.textContainerInset = NSSize(width: 2, height: 6) + textView.textContainer?.lineBreakMode = .byWordWrapping + textView.maxSize = NSSize(width: CGFloat.greatestFiniteMagnitude, height: CGFloat.greatestFiniteMagnitude) + textView.textContainer?.widthTracksTextView = true + textView.textContainer?.containerSize = NSSize(width: CGFloat.greatestFiniteMagnitude, height: CGFloat.greatestFiniteMagnitude) + textView.string = self.text + textView.onSend = { [weak textView] in + textView?.window?.makeFirstResponder(nil) + self.onSend() + } + textView.onBeginEditing = self.onBeginEditing + + let scroll = NSScrollView() + scroll.drawsBackground = false + scroll.borderType = .noBorder + scroll.hasVerticalScroller = false + scroll.hasHorizontalScroller = false + scroll.documentView = textView + return scroll + } + + func updateNSView(_ scrollView: NSScrollView, context: Context) { + guard let textView = scrollView.documentView as? TranscriptNSTextView else { return } + if textView.string != self.text { + textView.string = self.text + } + textView.textColor = self.isFinal ? .labelColor : .secondaryLabelColor + } + + final class Coordinator: NSObject, NSTextViewDelegate { + var parent: TranscriptTextView + + init(_ parent: TranscriptTextView) { self.parent = parent } + + func textDidBeginEditing(_ notification: Notification) { + self.parent.onBeginEditing() + } + + func textDidChange(_ notification: Notification) { + guard let view = notification.object as? NSTextView else { return } + self.parent.text = view.string + } + } +} + +private final class TranscriptNSTextView: NSTextView { + var onSend: (() -> Void)? + var onBeginEditing: (() -> Void)? + + override func becomeFirstResponder() -> Bool { + self.onBeginEditing?() + return super.becomeFirstResponder() + } + + override func keyDown(with event: NSEvent) { + let isReturn = event.keyCode == 36 + if isReturn && event.modifierFlags.contains(.command) { + self.onSend?() + return + } + if isReturn { + if event.modifierFlags.contains(.shift) { + super.insertNewline(nil) + return + } + self.onSend?() + return + } + super.keyDown(with: event) + } +} diff --git a/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift b/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift index 3320b4b68..e07210cb7 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift @@ -117,6 +117,10 @@ actor VoiceWakeRuntime { self.audioEngine.stop() self.currentConfig = nil self.logger.debug("voicewake runtime stopped") + + Task { @MainActor in + VoiceWakeOverlayController.shared.dismiss() + } } private func configureSession(localeID: String?) { @@ -140,6 +144,9 @@ actor VoiceWakeRuntime { self.lastHeard = now if self.isCapturing { self.capturedTranscript = transcript + await MainActor.run { + VoiceWakeOverlayController.shared.showPartial(transcript: transcript) + } } } @@ -170,6 +177,10 @@ actor VoiceWakeRuntime { self.captureStartedAt = Date() self.cooldownUntil = nil + await MainActor.run { + VoiceWakeOverlayController.shared.showPartial(transcript: transcript) + } + await MainActor.run { AppStateStore.shared.triggerVoiceEars(ttl: nil) } self.captureTask?.cancel() @@ -218,26 +229,28 @@ actor VoiceWakeRuntime { await MainActor.run { AppStateStore.shared.stopVoiceEars() } - if !finalTranscript.isEmpty { - await self.send(transcript: finalTranscript, config: config) + guard !finalTranscript.isEmpty else { + await MainActor.run { VoiceWakeOverlayController.shared.dismiss(reason: .empty) } + self.cooldownUntil = Date().addingTimeInterval(self.debounceAfterSend) + self.restartRecognizer() + return + } + + let forwardConfig = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig } + await MainActor.run { + VoiceWakeOverlayController.shared.presentFinal(transcript: finalTranscript, forwardConfig: forwardConfig) } self.cooldownUntil = Date().addingTimeInterval(self.debounceAfterSend) + self.restartRecognizer() + } + private func restartRecognizer() { // Restart the recognizer so we listen for the next trigger with a clean buffer. let current = self.currentConfig self.stop() - if let current { await self.start(with: current) } - } - - private func send(transcript: String, config: RuntimeConfig) async { - let forwardConfig = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig } - guard forwardConfig.enabled else { return } - - let payload = VoiceWakeForwarder.prefixedTranscript(transcript) - - Task.detached { - await VoiceWakeForwarder.forward(transcript: payload, config: forwardConfig) + if let current { + Task { await self.start(with: current) } } }