macos: polish voice overlay and remote command handling

This commit is contained in:
Peter Steinberger
2025-12-08 22:23:24 +01:00
parent 9bde7a6daa
commit 04b5002d8f
4 changed files with 58 additions and 59 deletions

View File

@@ -170,27 +170,21 @@ final class ControlChannel: ObservableObject {
self.connection = conn
try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Void, Error>) in
var resumed = false
let resume: (Result<Void, Error>) -> Void = { result in
guard !resumed else { return }
resumed = true
switch result {
case .success: cont.resume(returning: ())
case let .failure(err): cont.resume(throwing: err)
}
}
conn.stateUpdateHandler = { state in
conn.stateUpdateHandler = { [weak self, weak conn] state in
guard let self else { return }
switch state {
case .ready:
Task { @MainActor in self.state = .connected }
resume(.success(()))
conn?.stateUpdateHandler = nil
cont.resume(returning: ())
case let .failed(err):
Task { @MainActor in self.state = .degraded(err.localizedDescription) }
resume(.failure(err))
conn?.stateUpdateHandler = nil
cont.resume(throwing: err)
case let .waiting(err):
Task { @MainActor in self.state = .degraded(err.localizedDescription) }
resume(.failure(err))
conn?.stateUpdateHandler = nil
cont.resume(throwing: err)
default:
break
}

View File

@@ -350,21 +350,36 @@ enum CommandResolver {
// Run the real clawdis CLI on the remote host; do not fall back to clawdis-mac.
let exportedPath = "/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Users/steipete/Library/pnpm:$PATH"
let quotedArgs = ([subcommand] + extraArgs).map(self.shellQuote).joined(separator: " ")
let userPRJ = settings.projectRoot
let prjInit = userPRJ.isEmpty ? "" : "PRJ=\(self.shellQuote(userPRJ));"
let scriptBody = """
PATH=\(exportedPath);
\(prjInit)
let userPRJ = settings.projectRoot.trimmingCharacters(in: .whitespacesAndNewlines)
let projectSection: String
if userPRJ.isEmpty {
projectSection = """
DEFAULT_PRJ="$HOME/Projects/clawdis"
if [ -z "${PRJ:-}" ] && [ -d "$DEFAULT_PRJ" ]; then PRJ="$DEFAULT_PRJ"; fi
if [ -n "${PRJ:-}" ]; then
if [ -d "$DEFAULT_PRJ" ]; then
PRJ="$DEFAULT_PRJ"
cd "$PRJ" || { echo "Project root not found: $PRJ"; exit 127; }
fi
"""
} else {
projectSection = """
PRJ=\(self.shellQuote(userPRJ))
cd \(self.shellQuote(userPRJ)) || { echo "Project root not found: \(userPRJ)"; exit 127; }
"""
}
let scriptBody = """
PATH=\(exportedPath);
CLI="";
\(projectSection)
if command -v clawdis >/dev/null 2>&1; then
CLI="$(command -v clawdis)"
clawdis \(quotedArgs);
elif [ -n "${PRJ:-}" ] && [ -f "$PRJ/bin/clawdis.js" ] && command -v node >/dev/null 2>&1; then
CLI="node $PRJ/bin/clawdis.js"
node "$PRJ/bin/clawdis.js" \(quotedArgs);
elif command -v pnpm >/dev/null 2>&1; then
CLI="pnpm --silent clawdis"
pnpm --silent clawdis \(quotedArgs);
else
echo "clawdis CLI missing on remote host"; exit 127;

View File

@@ -284,27 +284,6 @@ final class VoiceWakeOverlayController: ObservableObject {
])
}
}
private struct CloseHoverButton: View {
var onClose: () -> Void
var body: some View {
Button(action: self.onClose) {
Image(systemName: "xmark")
.font(.system(size: 12, weight: .bold))
.foregroundColor(Color.white.opacity(0.85))
.frame(width: 22, height: 22)
.background(Color.black.opacity(0.42))
.clipShape(Circle())
.shadow(color: Color.black.opacity(0.4), radius: 8, y: 2)
}
.buttonStyle(.plain)
.focusable(false)
.contentShape(Circle())
.padding(6)
}
}
private struct VoiceWakeOverlayView: View {
@ObservedObject var controller: VoiceWakeOverlayController
@FocusState private var textFocused: Bool
@@ -334,7 +313,7 @@ private struct VoiceWakeOverlayView: View {
self.controller.sendNow()
})
.focused(self.$textFocused)
.frame(minHeight: 32, maxHeight: .infinity)
.frame(maxWidth: .infinity, minHeight: 32, maxHeight: .infinity, alignment: .topLeading)
.id("editing")
} else {
VibrantLabelView(
@@ -343,7 +322,7 @@ private struct VoiceWakeOverlayView: View {
self.controller.userBeganEditing()
self.textFocused = true
})
.frame(minHeight: 32, maxHeight: .infinity)
.frame(maxWidth: .infinity, minHeight: 32, maxHeight: .infinity, alignment: .topLeading)
.id("display")
}
@@ -450,7 +429,7 @@ private struct TranscriptTextView: NSViewRepresentable {
let scroll = NSScrollView()
scroll.drawsBackground = false
scroll.borderType = .noBorder
scroll.hasVerticalScroller = self.isOverflowing
scroll.hasVerticalScroller = true
scroll.autohidesScrollers = true
scroll.scrollerStyle = .overlay
scroll.hasHorizontalScroller = false
@@ -502,8 +481,7 @@ private struct VibrantLabelView: NSViewRepresentable {
var onTap: () -> Void
func makeNSView(context: Context) -> NSView {
let display = self.attributed.strippingForegroundColor()
let label = NSTextField(labelWithAttributedString: display)
let label = NSTextField(labelWithAttributedString: self.attributed)
label.isEditable = false
label.isBordered = false
label.drawsBackground = false
@@ -514,7 +492,9 @@ private struct VibrantLabelView: NSViewRepresentable {
label.cell?.isScrollable = false
label.setContentHuggingPriority(.defaultLow, for: .horizontal)
label.setContentCompressionResistancePriority(.defaultLow, for: .horizontal)
label.textColor = .textColor
label.setContentHuggingPriority(.required, for: .vertical)
label.setContentCompressionResistancePriority(.required, for: .vertical)
label.textColor = .labelColor
let container = ClickCatcher(onTap: onTap)
container.addSubview(label)
@@ -532,7 +512,7 @@ private struct VibrantLabelView: NSViewRepresentable {
func updateNSView(_ nsView: NSView, context: Context) {
guard let container = nsView as? ClickCatcher,
let label = container.subviews.first as? NSTextField else { return }
label.attributedStringValue = self.attributed.strippingForegroundColor()
label.attributedStringValue = self.attributed
}
}
@@ -588,15 +568,6 @@ private struct CloseButtonOverlay: View {
.allowsHitTesting(isVisible)
}
}
}
private extension NSAttributedString {
func strippingForegroundColor() -> NSAttributedString {
let mutable = NSMutableAttributedString(attributedString: self)
mutable.removeAttribute(.foregroundColor, range: NSRange(location: 0, length: mutable.length))
return mutable
}
}
private final class TranscriptNSTextView: NSTextView {
var onSend: (() -> Void)?

View File

@@ -17,6 +17,7 @@ actor VoiceWakeRuntime {
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var lastHeard: Date?
private var noiseFloorRMS: Double = 1e-4
private var captureStartedAt: Date?
private var captureTask: Task<Void, Never>?
private var capturedTranscript: String = ""
@@ -36,6 +37,19 @@ actor VoiceWakeRuntime {
// Maximum capture duration from trigger until we force-send, to avoid runaway sessions.
private let captureHardStop: TimeInterval = 120.0
private let debounceAfterSend: TimeInterval = 0.35
// Voice activity detection parameters (RMS-based).
private let minSpeechRMS: Double = 1e-3
private let speechBoostFactor: Double = 6.0 // how far above noise floor we require to mark speech
/// Stops the active Speech pipeline without clearing the stored config, so we can restart cleanly.
private func haltRecognitionPipeline() {
self.recognitionTask?.cancel()
self.recognitionTask = nil
self.recognitionRequest?.endAudio()
self.recognitionRequest = nil
self.audioEngine.inputNode.removeTap(onBus: 0)
self.audioEngine.stop()
}
struct RuntimeConfig: Equatable {
let triggers: [String]
@@ -94,8 +108,11 @@ actor VoiceWakeRuntime {
let input = self.audioEngine.inputNode
let format = input.outputFormat(forBus: 0)
input.removeTap(onBus: 0)
input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request] buffer, _ in
input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak self, weak request] buffer, _ in
request?.append(buffer)
if let rms = Self.rmsLevel(buffer: buffer) {
Task { await self?.noteAudioLevel(rms: rms) }
}
}
self.audioEngine.prepare()
@@ -266,6 +283,8 @@ actor VoiceWakeRuntime {
self.captureTask = nil
let finalTranscript = self.capturedTranscript.trimmingCharacters(in: .whitespacesAndNewlines)
// Stop further recognition events so we don't retrigger immediately with buffered audio.
self.haltRecognitionPipeline()
self.capturedTranscript = ""
self.captureStartedAt = nil
self.lastHeard = nil