fix(mac): switch push-to-talk to right option
This commit is contained in:
@@ -68,7 +68,7 @@ clawdis relay --verbose
|
||||
## macOS Companion App (Clawdis.app)
|
||||
|
||||
- **On-device Voice Wake:** listens for wake words (e.g. “Claude”) using Apple’s on-device speech recognizer (macOS 26+). macOS still shows the standard Speech/Mic permissions prompt, but audio stays on device.
|
||||
- **Push-to-talk (Cmd+Fn):** hold the hotkey to speak; the voice overlay shows live partials and sends when you release.
|
||||
- **Push-to-talk (Right Option hold):** hold right Option to speak; the voice overlay shows live partials and sends when you release.
|
||||
- **Config tab:** pick the model from your local Pi model catalog (`pi-mono/packages/ai/src/models.generated.ts`), or enter a custom model ID; edit session store path and context tokens.
|
||||
- **Voice settings:** language + additional languages, mic picker, live level meter, trigger-word table, and a built-in test harness.
|
||||
- **Menu bar toggle:** enable/disable Voice Wake from the menu bar; respects Dock-icon preference.
|
||||
|
||||
@@ -3,14 +3,13 @@ import AVFoundation
|
||||
import OSLog
|
||||
import Speech
|
||||
|
||||
/// Observes Cmd+Fn and starts a push-to-talk capture while both are held.
|
||||
/// Observes right Option and starts a push-to-talk capture while it is held.
|
||||
@MainActor
|
||||
final class VoicePushToTalkHotkey {
|
||||
static let shared = VoicePushToTalkHotkey()
|
||||
|
||||
private var monitor: Any?
|
||||
private var fnDown = false
|
||||
private var commandDown = false
|
||||
private var optionDown = false // right option only
|
||||
private var active = false
|
||||
|
||||
func setEnabled(_ enabled: Bool) {
|
||||
@@ -23,7 +22,7 @@ final class VoicePushToTalkHotkey {
|
||||
|
||||
private func startMonitoring() {
|
||||
guard self.monitor == nil else { return }
|
||||
// Listen-only global monitor; Fn only surfaces on .flagsChanged and cannot be registered as a hotkey.
|
||||
// Listen-only global monitor; we rely on Input Monitoring permission to receive events.
|
||||
self.monitor = NSEvent.addGlobalMonitorForEvents(matching: .flagsChanged) { [weak self] event in
|
||||
guard let self else { return }
|
||||
self.updateModifierState(from: event)
|
||||
@@ -35,23 +34,17 @@ final class VoicePushToTalkHotkey {
|
||||
NSEvent.removeMonitor(monitor)
|
||||
self.monitor = nil
|
||||
}
|
||||
self.fnDown = false
|
||||
self.commandDown = false
|
||||
self.optionDown = false
|
||||
self.active = false
|
||||
}
|
||||
|
||||
private func updateModifierState(from event: NSEvent) {
|
||||
switch event.keyCode {
|
||||
case 63: // Fn
|
||||
self.fnDown = event.modifierFlags.contains(.function)
|
||||
case 55, 54: // Left / Right command
|
||||
self.commandDown = event.modifierFlags.contains(.command)
|
||||
default:
|
||||
break
|
||||
// Right Option (keyCode 61) acts as a hold-to-talk modifier.
|
||||
if event.keyCode == 61 {
|
||||
self.optionDown = event.modifierFlags.contains(.option)
|
||||
}
|
||||
|
||||
// “Walkie-talkie” chord is live only while both keys stay down.
|
||||
let chordActive = self.fnDown && self.commandDown
|
||||
let chordActive = self.optionDown
|
||||
if chordActive && !self.active {
|
||||
self.active = true
|
||||
Task {
|
||||
|
||||
@@ -50,8 +50,8 @@ struct VoiceWakeSettings: View {
|
||||
.disabled(!voiceWakeSupported)
|
||||
|
||||
SettingsToggleRow(
|
||||
title: "Hold Cmd+Fn to talk",
|
||||
subtitle: "Push-to-talk mode that starts listening while you hold the hotkey and shows the preview overlay.",
|
||||
title: "Hold Right Option to talk",
|
||||
subtitle: "Push-to-talk mode that starts listening while you hold the key and shows the preview overlay.",
|
||||
binding: self.$state.voicePushToTalkEnabled)
|
||||
.disabled(!voiceWakeSupported)
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ Updated: 2025-12-08 · Owners: mac app
|
||||
|
||||
## Modes
|
||||
- **Wake-word mode** (default): always-on Speech recognizer waits for trigger tokens (`swabbleTriggerWords`). On match it starts capture, shows the overlay with partial text, and auto-sends after silence.
|
||||
- **Push-to-talk (Cmd+Fn)**: hold Cmd+Fn to capture immediately—no trigger needed. The overlay appears while held; releasing finalizes and forwards after a short delay so you can tweak text.
|
||||
- **Push-to-talk (Right Option hold)**: hold the right Option key to capture immediately—no trigger needed. The overlay appears while held; releasing finalizes and forwards after a short delay so you can tweak text.
|
||||
|
||||
## Runtime behavior (wake-word)
|
||||
- Speech recognizer lives in `VoiceWakeRuntime`.
|
||||
@@ -15,11 +15,11 @@ Updated: 2025-12-08 · Owners: mac app
|
||||
- After send, recognizer restarts cleanly to listen for the next trigger.
|
||||
|
||||
## Push-to-talk specifics
|
||||
- Hotkey detection uses a global `.flagsChanged` monitor: Fn is `keyCode 63` and flagged via `.function`; Command is `keyCode 55/54`. We only **observe** events (no swallowing).
|
||||
- Hotkey detection uses a global `.flagsChanged` monitor for **right Option** (`keyCode 61` + `.option`). We only observe events (no swallowing).
|
||||
- Capture pipeline lives in `VoicePushToTalk`: starts Speech immediately, streams partials to the overlay, and calls `VoiceWakeForwarder` on release.
|
||||
- When push-to-talk starts we pause the wake-word runtime to avoid dueling audio taps; it restarts automatically after release.
|
||||
- Permissions: requires Microphone + Speech. macOS will prompt the first time; seeing events needs Accessibility approval.
|
||||
- Fn caveat: some external keyboards don’t expose Fn; fall back to a standard shortcut if needed.
|
||||
- Permissions: requires Microphone + Speech; seeing events needs Accessibility/Input Monitoring approval.
|
||||
- External keyboards: some may not expose right Option as expected—offer a fallback shortcut if users report misses.
|
||||
|
||||
## User-facing settings
|
||||
- **Voice Wake** toggle: enables wake-word runtime.
|
||||
|
||||
Reference in New Issue
Block a user