From 04b5002d8f0d40de51f0911b51d35df62ff6da80 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 8 Dec 2025 22:23:24 +0100 Subject: [PATCH] macos: polish voice overlay and remote command handling --- .../Sources/Clawdis/ControlChannel.swift | 22 ++++----- apps/macos/Sources/Clawdis/Utilities.swift | 29 +++++++++--- .../Sources/Clawdis/VoiceWakeOverlay.swift | 45 ++++--------------- .../Sources/Clawdis/VoiceWakeRuntime.swift | 21 ++++++++- 4 files changed, 58 insertions(+), 59 deletions(-) diff --git a/apps/macos/Sources/Clawdis/ControlChannel.swift b/apps/macos/Sources/Clawdis/ControlChannel.swift index ad9ae3093..0105153f3 100644 --- a/apps/macos/Sources/Clawdis/ControlChannel.swift +++ b/apps/macos/Sources/Clawdis/ControlChannel.swift @@ -170,27 +170,21 @@ final class ControlChannel: ObservableObject { self.connection = conn try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in - var resumed = false - let resume: (Result) -> Void = { result in - guard !resumed else { return } - resumed = true - switch result { - case .success: cont.resume(returning: ()) - case let .failure(err): cont.resume(throwing: err) - } - } - - conn.stateUpdateHandler = { state in + conn.stateUpdateHandler = { [weak self, weak conn] state in + guard let self else { return } switch state { case .ready: Task { @MainActor in self.state = .connected } - resume(.success(())) + conn?.stateUpdateHandler = nil + cont.resume(returning: ()) case let .failed(err): Task { @MainActor in self.state = .degraded(err.localizedDescription) } - resume(.failure(err)) + conn?.stateUpdateHandler = nil + cont.resume(throwing: err) case let .waiting(err): Task { @MainActor in self.state = .degraded(err.localizedDescription) } - resume(.failure(err)) + conn?.stateUpdateHandler = nil + cont.resume(throwing: err) default: break } diff --git a/apps/macos/Sources/Clawdis/Utilities.swift b/apps/macos/Sources/Clawdis/Utilities.swift index ef23841a2..63e6ee9f9 100644 --- a/apps/macos/Sources/Clawdis/Utilities.swift +++ b/apps/macos/Sources/Clawdis/Utilities.swift @@ -350,21 +350,36 @@ enum CommandResolver { // Run the real clawdis CLI on the remote host; do not fall back to clawdis-mac. let exportedPath = "/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Users/steipete/Library/pnpm:$PATH" let quotedArgs = ([subcommand] + extraArgs).map(self.shellQuote).joined(separator: " ") - let userPRJ = settings.projectRoot - let prjInit = userPRJ.isEmpty ? "" : "PRJ=\(self.shellQuote(userPRJ));" - let scriptBody = """ - PATH=\(exportedPath); - \(prjInit) + let userPRJ = settings.projectRoot.trimmingCharacters(in: .whitespacesAndNewlines) + + let projectSection: String + if userPRJ.isEmpty { + projectSection = """ DEFAULT_PRJ="$HOME/Projects/clawdis" - if [ -z "${PRJ:-}" ] && [ -d "$DEFAULT_PRJ" ]; then PRJ="$DEFAULT_PRJ"; fi - if [ -n "${PRJ:-}" ]; then + if [ -d "$DEFAULT_PRJ" ]; then + PRJ="$DEFAULT_PRJ" cd "$PRJ" || { echo "Project root not found: $PRJ"; exit 127; } fi + """ + } else { + projectSection = """ + PRJ=\(self.shellQuote(userPRJ)) + cd \(self.shellQuote(userPRJ)) || { echo "Project root not found: \(userPRJ)"; exit 127; } + """ + } + + let scriptBody = """ + PATH=\(exportedPath); + CLI=""; + \(projectSection) if command -v clawdis >/dev/null 2>&1; then + CLI="$(command -v clawdis)" clawdis \(quotedArgs); elif [ -n "${PRJ:-}" ] && [ -f "$PRJ/bin/clawdis.js" ] && command -v node >/dev/null 2>&1; then + CLI="node $PRJ/bin/clawdis.js" node "$PRJ/bin/clawdis.js" \(quotedArgs); elif command -v pnpm >/dev/null 2>&1; then + CLI="pnpm --silent clawdis" pnpm --silent clawdis \(quotedArgs); else echo "clawdis CLI missing on remote host"; exit 127; diff --git a/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift b/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift index 1da5af7ea..14b69dab9 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift @@ -284,27 +284,6 @@ final class VoiceWakeOverlayController: ObservableObject { ]) } } - -private struct CloseHoverButton: View { - var onClose: () -> Void - - var body: some View { - Button(action: self.onClose) { - Image(systemName: "xmark") - .font(.system(size: 12, weight: .bold)) - .foregroundColor(Color.white.opacity(0.85)) - .frame(width: 22, height: 22) - .background(Color.black.opacity(0.42)) - .clipShape(Circle()) - .shadow(color: Color.black.opacity(0.4), radius: 8, y: 2) - } - .buttonStyle(.plain) - .focusable(false) - .contentShape(Circle()) - .padding(6) - } -} - private struct VoiceWakeOverlayView: View { @ObservedObject var controller: VoiceWakeOverlayController @FocusState private var textFocused: Bool @@ -334,7 +313,7 @@ private struct VoiceWakeOverlayView: View { self.controller.sendNow() }) .focused(self.$textFocused) - .frame(minHeight: 32, maxHeight: .infinity) + .frame(maxWidth: .infinity, minHeight: 32, maxHeight: .infinity, alignment: .topLeading) .id("editing") } else { VibrantLabelView( @@ -343,7 +322,7 @@ private struct VoiceWakeOverlayView: View { self.controller.userBeganEditing() self.textFocused = true }) - .frame(minHeight: 32, maxHeight: .infinity) + .frame(maxWidth: .infinity, minHeight: 32, maxHeight: .infinity, alignment: .topLeading) .id("display") } @@ -450,7 +429,7 @@ private struct TranscriptTextView: NSViewRepresentable { let scroll = NSScrollView() scroll.drawsBackground = false scroll.borderType = .noBorder - scroll.hasVerticalScroller = self.isOverflowing + scroll.hasVerticalScroller = true scroll.autohidesScrollers = true scroll.scrollerStyle = .overlay scroll.hasHorizontalScroller = false @@ -502,8 +481,7 @@ private struct VibrantLabelView: NSViewRepresentable { var onTap: () -> Void func makeNSView(context: Context) -> NSView { - let display = self.attributed.strippingForegroundColor() - let label = NSTextField(labelWithAttributedString: display) + let label = NSTextField(labelWithAttributedString: self.attributed) label.isEditable = false label.isBordered = false label.drawsBackground = false @@ -514,7 +492,9 @@ private struct VibrantLabelView: NSViewRepresentable { label.cell?.isScrollable = false label.setContentHuggingPriority(.defaultLow, for: .horizontal) label.setContentCompressionResistancePriority(.defaultLow, for: .horizontal) - label.textColor = .textColor + label.setContentHuggingPriority(.required, for: .vertical) + label.setContentCompressionResistancePriority(.required, for: .vertical) + label.textColor = .labelColor let container = ClickCatcher(onTap: onTap) container.addSubview(label) @@ -532,7 +512,7 @@ private struct VibrantLabelView: NSViewRepresentable { func updateNSView(_ nsView: NSView, context: Context) { guard let container = nsView as? ClickCatcher, let label = container.subviews.first as? NSTextField else { return } - label.attributedStringValue = self.attributed.strippingForegroundColor() + label.attributedStringValue = self.attributed } } @@ -588,15 +568,6 @@ private struct CloseButtonOverlay: View { .allowsHitTesting(isVisible) } } -} - -private extension NSAttributedString { - func strippingForegroundColor() -> NSAttributedString { - let mutable = NSMutableAttributedString(attributedString: self) - mutable.removeAttribute(.foregroundColor, range: NSRange(location: 0, length: mutable.length)) - return mutable - } -} private final class TranscriptNSTextView: NSTextView { var onSend: (() -> Void)? diff --git a/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift b/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift index 3153c74a5..eddb50495 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift @@ -17,6 +17,7 @@ actor VoiceWakeRuntime { private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? private var recognitionTask: SFSpeechRecognitionTask? private var lastHeard: Date? + private var noiseFloorRMS: Double = 1e-4 private var captureStartedAt: Date? private var captureTask: Task? private var capturedTranscript: String = "" @@ -36,6 +37,19 @@ actor VoiceWakeRuntime { // Maximum capture duration from trigger until we force-send, to avoid runaway sessions. private let captureHardStop: TimeInterval = 120.0 private let debounceAfterSend: TimeInterval = 0.35 + // Voice activity detection parameters (RMS-based). + private let minSpeechRMS: Double = 1e-3 + private let speechBoostFactor: Double = 6.0 // how far above noise floor we require to mark speech + + /// Stops the active Speech pipeline without clearing the stored config, so we can restart cleanly. + private func haltRecognitionPipeline() { + self.recognitionTask?.cancel() + self.recognitionTask = nil + self.recognitionRequest?.endAudio() + self.recognitionRequest = nil + self.audioEngine.inputNode.removeTap(onBus: 0) + self.audioEngine.stop() + } struct RuntimeConfig: Equatable { let triggers: [String] @@ -94,8 +108,11 @@ actor VoiceWakeRuntime { let input = self.audioEngine.inputNode let format = input.outputFormat(forBus: 0) input.removeTap(onBus: 0) - input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request] buffer, _ in + input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak self, weak request] buffer, _ in request?.append(buffer) + if let rms = Self.rmsLevel(buffer: buffer) { + Task { await self?.noteAudioLevel(rms: rms) } + } } self.audioEngine.prepare() @@ -266,6 +283,8 @@ actor VoiceWakeRuntime { self.captureTask = nil let finalTranscript = self.capturedTranscript.trimmingCharacters(in: .whitespacesAndNewlines) + // Stop further recognition events so we don't retrigger immediately with buffered audio. + self.haltRecognitionPipeline() self.capturedTranscript = "" self.captureStartedAt = nil self.lastHeard = nil