macos: polish voice overlay and remote command handling

This commit is contained in:
Peter Steinberger
2025-12-08 22:23:24 +01:00
parent 9bde7a6daa
commit 04b5002d8f
4 changed files with 58 additions and 59 deletions

View File

@@ -170,27 +170,21 @@ final class ControlChannel: ObservableObject {
self.connection = conn self.connection = conn
try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Void, Error>) in try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Void, Error>) in
var resumed = false conn.stateUpdateHandler = { [weak self, weak conn] state in
let resume: (Result<Void, Error>) -> Void = { result in guard let self else { return }
guard !resumed else { return }
resumed = true
switch result {
case .success: cont.resume(returning: ())
case let .failure(err): cont.resume(throwing: err)
}
}
conn.stateUpdateHandler = { state in
switch state { switch state {
case .ready: case .ready:
Task { @MainActor in self.state = .connected } Task { @MainActor in self.state = .connected }
resume(.success(())) conn?.stateUpdateHandler = nil
cont.resume(returning: ())
case let .failed(err): case let .failed(err):
Task { @MainActor in self.state = .degraded(err.localizedDescription) } Task { @MainActor in self.state = .degraded(err.localizedDescription) }
resume(.failure(err)) conn?.stateUpdateHandler = nil
cont.resume(throwing: err)
case let .waiting(err): case let .waiting(err):
Task { @MainActor in self.state = .degraded(err.localizedDescription) } Task { @MainActor in self.state = .degraded(err.localizedDescription) }
resume(.failure(err)) conn?.stateUpdateHandler = nil
cont.resume(throwing: err)
default: default:
break break
} }

View File

@@ -350,21 +350,36 @@ enum CommandResolver {
// Run the real clawdis CLI on the remote host; do not fall back to clawdis-mac. // Run the real clawdis CLI on the remote host; do not fall back to clawdis-mac.
let exportedPath = "/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Users/steipete/Library/pnpm:$PATH" let exportedPath = "/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Users/steipete/Library/pnpm:$PATH"
let quotedArgs = ([subcommand] + extraArgs).map(self.shellQuote).joined(separator: " ") let quotedArgs = ([subcommand] + extraArgs).map(self.shellQuote).joined(separator: " ")
let userPRJ = settings.projectRoot let userPRJ = settings.projectRoot.trimmingCharacters(in: .whitespacesAndNewlines)
let prjInit = userPRJ.isEmpty ? "" : "PRJ=\(self.shellQuote(userPRJ));"
let scriptBody = """ let projectSection: String
PATH=\(exportedPath); if userPRJ.isEmpty {
\(prjInit) projectSection = """
DEFAULT_PRJ="$HOME/Projects/clawdis" DEFAULT_PRJ="$HOME/Projects/clawdis"
if [ -z "${PRJ:-}" ] && [ -d "$DEFAULT_PRJ" ]; then PRJ="$DEFAULT_PRJ"; fi if [ -d "$DEFAULT_PRJ" ]; then
if [ -n "${PRJ:-}" ]; then PRJ="$DEFAULT_PRJ"
cd "$PRJ" || { echo "Project root not found: $PRJ"; exit 127; } cd "$PRJ" || { echo "Project root not found: $PRJ"; exit 127; }
fi fi
"""
} else {
projectSection = """
PRJ=\(self.shellQuote(userPRJ))
cd \(self.shellQuote(userPRJ)) || { echo "Project root not found: \(userPRJ)"; exit 127; }
"""
}
let scriptBody = """
PATH=\(exportedPath);
CLI="";
\(projectSection)
if command -v clawdis >/dev/null 2>&1; then if command -v clawdis >/dev/null 2>&1; then
CLI="$(command -v clawdis)"
clawdis \(quotedArgs); clawdis \(quotedArgs);
elif [ -n "${PRJ:-}" ] && [ -f "$PRJ/bin/clawdis.js" ] && command -v node >/dev/null 2>&1; then elif [ -n "${PRJ:-}" ] && [ -f "$PRJ/bin/clawdis.js" ] && command -v node >/dev/null 2>&1; then
CLI="node $PRJ/bin/clawdis.js"
node "$PRJ/bin/clawdis.js" \(quotedArgs); node "$PRJ/bin/clawdis.js" \(quotedArgs);
elif command -v pnpm >/dev/null 2>&1; then elif command -v pnpm >/dev/null 2>&1; then
CLI="pnpm --silent clawdis"
pnpm --silent clawdis \(quotedArgs); pnpm --silent clawdis \(quotedArgs);
else else
echo "clawdis CLI missing on remote host"; exit 127; echo "clawdis CLI missing on remote host"; exit 127;

View File

@@ -284,27 +284,6 @@ final class VoiceWakeOverlayController: ObservableObject {
]) ])
} }
} }
private struct CloseHoverButton: View {
var onClose: () -> Void
var body: some View {
Button(action: self.onClose) {
Image(systemName: "xmark")
.font(.system(size: 12, weight: .bold))
.foregroundColor(Color.white.opacity(0.85))
.frame(width: 22, height: 22)
.background(Color.black.opacity(0.42))
.clipShape(Circle())
.shadow(color: Color.black.opacity(0.4), radius: 8, y: 2)
}
.buttonStyle(.plain)
.focusable(false)
.contentShape(Circle())
.padding(6)
}
}
private struct VoiceWakeOverlayView: View { private struct VoiceWakeOverlayView: View {
@ObservedObject var controller: VoiceWakeOverlayController @ObservedObject var controller: VoiceWakeOverlayController
@FocusState private var textFocused: Bool @FocusState private var textFocused: Bool
@@ -334,7 +313,7 @@ private struct VoiceWakeOverlayView: View {
self.controller.sendNow() self.controller.sendNow()
}) })
.focused(self.$textFocused) .focused(self.$textFocused)
.frame(minHeight: 32, maxHeight: .infinity) .frame(maxWidth: .infinity, minHeight: 32, maxHeight: .infinity, alignment: .topLeading)
.id("editing") .id("editing")
} else { } else {
VibrantLabelView( VibrantLabelView(
@@ -343,7 +322,7 @@ private struct VoiceWakeOverlayView: View {
self.controller.userBeganEditing() self.controller.userBeganEditing()
self.textFocused = true self.textFocused = true
}) })
.frame(minHeight: 32, maxHeight: .infinity) .frame(maxWidth: .infinity, minHeight: 32, maxHeight: .infinity, alignment: .topLeading)
.id("display") .id("display")
} }
@@ -450,7 +429,7 @@ private struct TranscriptTextView: NSViewRepresentable {
let scroll = NSScrollView() let scroll = NSScrollView()
scroll.drawsBackground = false scroll.drawsBackground = false
scroll.borderType = .noBorder scroll.borderType = .noBorder
scroll.hasVerticalScroller = self.isOverflowing scroll.hasVerticalScroller = true
scroll.autohidesScrollers = true scroll.autohidesScrollers = true
scroll.scrollerStyle = .overlay scroll.scrollerStyle = .overlay
scroll.hasHorizontalScroller = false scroll.hasHorizontalScroller = false
@@ -502,8 +481,7 @@ private struct VibrantLabelView: NSViewRepresentable {
var onTap: () -> Void var onTap: () -> Void
func makeNSView(context: Context) -> NSView { func makeNSView(context: Context) -> NSView {
let display = self.attributed.strippingForegroundColor() let label = NSTextField(labelWithAttributedString: self.attributed)
let label = NSTextField(labelWithAttributedString: display)
label.isEditable = false label.isEditable = false
label.isBordered = false label.isBordered = false
label.drawsBackground = false label.drawsBackground = false
@@ -514,7 +492,9 @@ private struct VibrantLabelView: NSViewRepresentable {
label.cell?.isScrollable = false label.cell?.isScrollable = false
label.setContentHuggingPriority(.defaultLow, for: .horizontal) label.setContentHuggingPriority(.defaultLow, for: .horizontal)
label.setContentCompressionResistancePriority(.defaultLow, for: .horizontal) label.setContentCompressionResistancePriority(.defaultLow, for: .horizontal)
label.textColor = .textColor label.setContentHuggingPriority(.required, for: .vertical)
label.setContentCompressionResistancePriority(.required, for: .vertical)
label.textColor = .labelColor
let container = ClickCatcher(onTap: onTap) let container = ClickCatcher(onTap: onTap)
container.addSubview(label) container.addSubview(label)
@@ -532,7 +512,7 @@ private struct VibrantLabelView: NSViewRepresentable {
func updateNSView(_ nsView: NSView, context: Context) { func updateNSView(_ nsView: NSView, context: Context) {
guard let container = nsView as? ClickCatcher, guard let container = nsView as? ClickCatcher,
let label = container.subviews.first as? NSTextField else { return } let label = container.subviews.first as? NSTextField else { return }
label.attributedStringValue = self.attributed.strippingForegroundColor() label.attributedStringValue = self.attributed
} }
} }
@@ -588,15 +568,6 @@ private struct CloseButtonOverlay: View {
.allowsHitTesting(isVisible) .allowsHitTesting(isVisible)
} }
} }
}
private extension NSAttributedString {
func strippingForegroundColor() -> NSAttributedString {
let mutable = NSMutableAttributedString(attributedString: self)
mutable.removeAttribute(.foregroundColor, range: NSRange(location: 0, length: mutable.length))
return mutable
}
}
private final class TranscriptNSTextView: NSTextView { private final class TranscriptNSTextView: NSTextView {
var onSend: (() -> Void)? var onSend: (() -> Void)?

View File

@@ -17,6 +17,7 @@ actor VoiceWakeRuntime {
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask? private var recognitionTask: SFSpeechRecognitionTask?
private var lastHeard: Date? private var lastHeard: Date?
private var noiseFloorRMS: Double = 1e-4
private var captureStartedAt: Date? private var captureStartedAt: Date?
private var captureTask: Task<Void, Never>? private var captureTask: Task<Void, Never>?
private var capturedTranscript: String = "" private var capturedTranscript: String = ""
@@ -36,6 +37,19 @@ actor VoiceWakeRuntime {
// Maximum capture duration from trigger until we force-send, to avoid runaway sessions. // Maximum capture duration from trigger until we force-send, to avoid runaway sessions.
private let captureHardStop: TimeInterval = 120.0 private let captureHardStop: TimeInterval = 120.0
private let debounceAfterSend: TimeInterval = 0.35 private let debounceAfterSend: TimeInterval = 0.35
// Voice activity detection parameters (RMS-based).
private let minSpeechRMS: Double = 1e-3
private let speechBoostFactor: Double = 6.0 // how far above noise floor we require to mark speech
/// Stops the active Speech pipeline without clearing the stored config, so we can restart cleanly.
private func haltRecognitionPipeline() {
self.recognitionTask?.cancel()
self.recognitionTask = nil
self.recognitionRequest?.endAudio()
self.recognitionRequest = nil
self.audioEngine.inputNode.removeTap(onBus: 0)
self.audioEngine.stop()
}
struct RuntimeConfig: Equatable { struct RuntimeConfig: Equatable {
let triggers: [String] let triggers: [String]
@@ -94,8 +108,11 @@ actor VoiceWakeRuntime {
let input = self.audioEngine.inputNode let input = self.audioEngine.inputNode
let format = input.outputFormat(forBus: 0) let format = input.outputFormat(forBus: 0)
input.removeTap(onBus: 0) input.removeTap(onBus: 0)
input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request] buffer, _ in input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak self, weak request] buffer, _ in
request?.append(buffer) request?.append(buffer)
if let rms = Self.rmsLevel(buffer: buffer) {
Task { await self?.noteAudioLevel(rms: rms) }
}
} }
self.audioEngine.prepare() self.audioEngine.prepare()
@@ -266,6 +283,8 @@ actor VoiceWakeRuntime {
self.captureTask = nil self.captureTask = nil
let finalTranscript = self.capturedTranscript.trimmingCharacters(in: .whitespacesAndNewlines) let finalTranscript = self.capturedTranscript.trimmingCharacters(in: .whitespacesAndNewlines)
// Stop further recognition events so we don't retrigger immediately with buffered audio.
self.haltRecognitionPipeline()
self.capturedTranscript = "" self.capturedTranscript = ""
self.captureStartedAt = nil self.captureStartedAt = nil
self.lastHeard = nil self.lastHeard = nil