feat(mac): add push-to-talk hotkey
This commit is contained in:
@@ -97,6 +97,10 @@ final class AppState: ObservableObject {
|
||||
didSet { UserDefaults.standard.set(self.voiceWakeForwardCommand, forKey: voiceWakeForwardCommandKey) }
|
||||
}
|
||||
|
||||
@Published var voicePushToTalkEnabled: Bool {
|
||||
didSet { UserDefaults.standard.set(self.voicePushToTalkEnabled, forKey: voicePushToTalkEnabledKey) }
|
||||
}
|
||||
|
||||
@Published var isWorking: Bool = false
|
||||
@Published var earBoostActive: Bool = false
|
||||
@Published var heartbeatsEnabled: Bool {
|
||||
@@ -158,6 +162,9 @@ final class AppState: ObservableObject {
|
||||
.string(forKey: voiceWakeForwardTargetKey) ?? legacyTarget
|
||||
self.voiceWakeForwardIdentity = UserDefaults.standard.string(forKey: voiceWakeForwardIdentityKey) ?? ""
|
||||
|
||||
self.voicePushToTalkEnabled = UserDefaults.standard
|
||||
.object(forKey: voicePushToTalkEnabledKey) as? Bool ?? false
|
||||
|
||||
var storedForwardCommand = UserDefaults.standard
|
||||
.string(forKey: voiceWakeForwardCommandKey) ?? defaultVoiceWakeForwardCommand
|
||||
// Guard against older prefs missing flags; the forwarder depends on these for replies.
|
||||
|
||||
@@ -20,6 +20,7 @@ let voiceWakeForwardUserKey = "clawdis.voiceWakeForwardUser"
|
||||
let voiceWakeForwardPortKey = "clawdis.voiceWakeForwardPort"
|
||||
let voiceWakeForwardIdentityKey = "clawdis.voiceWakeForwardIdentity"
|
||||
let voiceWakeForwardCommandKey = "clawdis.voiceWakeForwardCommand"
|
||||
let voicePushToTalkEnabledKey = "clawdis.voicePushToTalkEnabled"
|
||||
let connectionModeKey = "clawdis.connectionMode"
|
||||
let remoteTargetKey = "clawdis.remoteTarget"
|
||||
let remoteIdentityKey = "clawdis.remoteIdentity"
|
||||
|
||||
@@ -92,6 +92,12 @@ private struct MenuContent: View {
|
||||
await self.loadMicrophones(force: true)
|
||||
}
|
||||
}
|
||||
.task {
|
||||
VoicePushToTalkHotkey.shared.setEnabled(voiceWakeSupported && self.state.voicePushToTalkEnabled)
|
||||
}
|
||||
.onChange(of: self.state.voicePushToTalkEnabled) { _, enabled in
|
||||
VoicePushToTalkHotkey.shared.setEnabled(voiceWakeSupported && enabled)
|
||||
}
|
||||
}
|
||||
|
||||
private func open(tab: SettingsTab) {
|
||||
|
||||
249
apps/macos/Sources/Clawdis/VoicePushToTalk.swift
Normal file
249
apps/macos/Sources/Clawdis/VoicePushToTalk.swift
Normal file
@@ -0,0 +1,249 @@
|
||||
import AppKit
|
||||
import AVFoundation
|
||||
import OSLog
|
||||
import Speech
|
||||
|
||||
/// Observes Cmd+Fn and starts a push-to-talk capture while both are held.
|
||||
@MainActor
|
||||
final class VoicePushToTalkHotkey {
|
||||
static let shared = VoicePushToTalkHotkey()
|
||||
|
||||
private var monitor: Any?
|
||||
private var fnDown = false
|
||||
private var commandDown = false
|
||||
private var active = false
|
||||
|
||||
func setEnabled(_ enabled: Bool) {
|
||||
if enabled {
|
||||
self.startMonitoring()
|
||||
} else {
|
||||
self.stopMonitoring()
|
||||
}
|
||||
}
|
||||
|
||||
private func startMonitoring() {
|
||||
guard self.monitor == nil else { return }
|
||||
// Listen-only global monitor; Fn only surfaces on .flagsChanged and cannot be registered as a hotkey.
|
||||
self.monitor = NSEvent.addGlobalMonitorForEvents(matching: .flagsChanged) { [weak self] event in
|
||||
guard let self else { return }
|
||||
self.updateModifierState(from: event)
|
||||
}
|
||||
}
|
||||
|
||||
private func stopMonitoring() {
|
||||
if let monitor {
|
||||
NSEvent.removeMonitor(monitor)
|
||||
self.monitor = nil
|
||||
}
|
||||
self.fnDown = false
|
||||
self.commandDown = false
|
||||
self.active = false
|
||||
}
|
||||
|
||||
private func updateModifierState(from event: NSEvent) {
|
||||
switch event.keyCode {
|
||||
case 63: // Fn
|
||||
self.fnDown = event.modifierFlags.contains(.function)
|
||||
case 55, 54: // Left / Right command
|
||||
self.commandDown = event.modifierFlags.contains(.command)
|
||||
default:
|
||||
break
|
||||
}
|
||||
|
||||
// “Walkie-talkie” chord is live only while both keys stay down.
|
||||
let chordActive = self.fnDown && self.commandDown
|
||||
if chordActive && !self.active {
|
||||
self.active = true
|
||||
Task {
|
||||
await VoicePushToTalk.shared.begin()
|
||||
}
|
||||
} else if !chordActive && self.active {
|
||||
self.active = false
|
||||
Task {
|
||||
await VoicePushToTalk.shared.end()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Short-lived speech recognizer that records while the hotkey is held.
|
||||
actor VoicePushToTalk {
|
||||
static let shared = VoicePushToTalk()
|
||||
|
||||
private var recognizer: SFSpeechRecognizer?
|
||||
private var audioEngine = AVAudioEngine()
|
||||
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
||||
private var recognitionTask: SFSpeechRecognitionTask?
|
||||
|
||||
private var committed: String = ""
|
||||
private var volatile: String = ""
|
||||
private var activeConfig: Config?
|
||||
private var isCapturing = false
|
||||
|
||||
private struct Config {
|
||||
let micID: String?
|
||||
let localeID: String?
|
||||
let forwardConfig: VoiceWakeForwardConfig
|
||||
}
|
||||
|
||||
func begin() async {
|
||||
guard voiceWakeSupported else { return }
|
||||
guard !self.isCapturing else { return }
|
||||
|
||||
// Ensure permissions up front.
|
||||
let granted = await PermissionManager.ensureVoiceWakePermissions(interactive: true)
|
||||
guard granted else { return }
|
||||
|
||||
let config = await MainActor.run { self.makeConfig() }
|
||||
self.activeConfig = config
|
||||
self.isCapturing = true
|
||||
await VoiceWakeRuntime.shared.pauseForPushToTalk()
|
||||
await MainActor.run {
|
||||
VoiceWakeOverlayController.shared.showPartial(transcript: "")
|
||||
}
|
||||
|
||||
do {
|
||||
try await self.startRecognition(localeID: config.localeID)
|
||||
} catch {
|
||||
await MainActor.run {
|
||||
VoiceWakeOverlayController.shared.dismiss()
|
||||
}
|
||||
self.isCapturing = false
|
||||
}
|
||||
}
|
||||
|
||||
func end() async {
|
||||
guard self.isCapturing else { return }
|
||||
self.isCapturing = false
|
||||
|
||||
self.recognitionTask?.cancel()
|
||||
self.recognitionRequest?.endAudio()
|
||||
self.recognitionRequest = nil
|
||||
self.recognitionTask = nil
|
||||
self.audioEngine.inputNode.removeTap(onBus: 0)
|
||||
self.audioEngine.stop()
|
||||
|
||||
let finalText = (self.committed + self.volatile).trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let attributed = Self.makeAttributed(committed: self.committed, volatile: self.volatile, isFinal: true)
|
||||
let forward: VoiceWakeForwardConfig
|
||||
if let cached = self.activeConfig?.forwardConfig {
|
||||
forward = cached
|
||||
} else {
|
||||
forward = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig }
|
||||
}
|
||||
|
||||
await MainActor.run {
|
||||
VoiceWakeOverlayController.shared.presentFinal(
|
||||
transcript: finalText,
|
||||
forwardConfig: forward,
|
||||
delay: finalText.isEmpty ? 0.0 : 0.8,
|
||||
attributed: attributed)
|
||||
}
|
||||
|
||||
self.committed = ""
|
||||
self.volatile = ""
|
||||
self.activeConfig = nil
|
||||
|
||||
// Resume the wake-word runtime after push-to-talk finishes.
|
||||
_ = await MainActor.run {
|
||||
Task {
|
||||
await VoiceWakeRuntime.shared.refresh(state: AppStateStore.shared)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private
|
||||
|
||||
private func startRecognition(localeID: String?) async throws {
|
||||
let locale = localeID.flatMap { Locale(identifier: $0) } ?? Locale(identifier: Locale.current.identifier)
|
||||
self.recognizer = SFSpeechRecognizer(locale: locale)
|
||||
guard let recognizer, recognizer.isAvailable else {
|
||||
throw NSError(domain: "VoicePushToTalk", code: 1, userInfo: [NSLocalizedDescriptionKey: "Recognizer unavailable"])
|
||||
}
|
||||
|
||||
self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
|
||||
self.recognitionRequest?.shouldReportPartialResults = true
|
||||
guard let request = self.recognitionRequest else { return }
|
||||
|
||||
let input = self.audioEngine.inputNode
|
||||
let format = input.outputFormat(forBus: 0)
|
||||
input.removeTap(onBus: 0)
|
||||
// Pipe raw mic buffers into the Speech request while the chord is held.
|
||||
input.installTap(onBus: 0, bufferSize: 2048, format: format) { [weak request] buffer, _ in
|
||||
request?.append(buffer)
|
||||
}
|
||||
|
||||
self.audioEngine.prepare()
|
||||
try self.audioEngine.start()
|
||||
|
||||
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self] result, error in
|
||||
guard let self else { return }
|
||||
if let error {
|
||||
Logger(subsystem: "com.steipete.clawdis", category: "voicewake.ptt")
|
||||
.debug("push-to-talk error: \(error.localizedDescription, privacy: .public)")
|
||||
}
|
||||
let transcript = result?.bestTranscription.formattedString
|
||||
let isFinal = result?.isFinal ?? false
|
||||
Task.detached { [weak self, transcript, isFinal] in
|
||||
guard let self else { return }
|
||||
await self.handle(transcript: transcript, isFinal: isFinal)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func handle(transcript: String?, isFinal: Bool) async {
|
||||
guard let transcript else { return }
|
||||
if isFinal {
|
||||
self.committed = transcript
|
||||
self.volatile = ""
|
||||
} else {
|
||||
self.volatile = Self.delta(after: self.committed, current: transcript)
|
||||
}
|
||||
|
||||
let attributed = Self.makeAttributed(committed: self.committed, volatile: self.volatile, isFinal: isFinal)
|
||||
let snapshot = self.committed + self.volatile
|
||||
await MainActor.run {
|
||||
VoiceWakeOverlayController.shared.showPartial(transcript: snapshot, attributed: attributed)
|
||||
}
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private func makeConfig() -> Config {
|
||||
let state = AppStateStore.shared
|
||||
return Config(
|
||||
micID: state.voiceWakeMicID.isEmpty ? nil : state.voiceWakeMicID,
|
||||
localeID: state.voiceWakeLocaleID,
|
||||
forwardConfig: state.voiceWakeForwardConfig)
|
||||
}
|
||||
|
||||
// MARK: - Test helpers
|
||||
|
||||
static func _testDelta(committed: String, current: String) -> String {
|
||||
self.delta(after: committed, current: current)
|
||||
}
|
||||
|
||||
static func _testAttributedColors(isFinal: Bool) -> (NSColor, NSColor) {
|
||||
let sample = self.makeAttributed(committed: "a", volatile: "b", isFinal: isFinal)
|
||||
let committedColor = sample.attribute(.foregroundColor, at: 0, effectiveRange: nil) as? NSColor ?? .clear
|
||||
let volatileColor = sample.attribute(.foregroundColor, at: 1, effectiveRange: nil) as? NSColor ?? .clear
|
||||
return (committedColor, volatileColor)
|
||||
}
|
||||
|
||||
private static func delta(after committed: String, current: String) -> String {
|
||||
if current.hasPrefix(committed) {
|
||||
let start = current.index(current.startIndex, offsetBy: committed.count)
|
||||
return String(current[start...])
|
||||
}
|
||||
return current
|
||||
}
|
||||
|
||||
private static func makeAttributed(committed: String, volatile: String, isFinal: Bool) -> NSAttributedString {
|
||||
let full = NSMutableAttributedString()
|
||||
let committedAttr: [NSAttributedString.Key: Any] = [.foregroundColor: NSColor.labelColor]
|
||||
full.append(NSAttributedString(string: committed, attributes: committedAttr))
|
||||
let volatileColor: NSColor = isFinal ? .labelColor : .secondaryLabelColor
|
||||
let volatileAttr: [NSAttributedString.Key: Any] = [.foregroundColor: volatileColor]
|
||||
full.append(NSAttributedString(string: volatile, attributes: volatileAttr))
|
||||
return full
|
||||
}
|
||||
}
|
||||
@@ -288,6 +288,10 @@ actor VoiceWakeRuntime {
|
||||
}
|
||||
}
|
||||
|
||||
func pauseForPushToTalk() {
|
||||
self.stop()
|
||||
}
|
||||
|
||||
private func updateHeardBeyondTrigger(withTrimmed trimmed: String) {
|
||||
if !self.heardBeyondTrigger, !trimmed.isEmpty {
|
||||
self.heardBeyondTrigger = true
|
||||
|
||||
@@ -47,6 +47,12 @@ struct VoiceWakeSettings: View {
|
||||
binding: self.voiceWakeBinding)
|
||||
.disabled(!voiceWakeSupported)
|
||||
|
||||
SettingsToggleRow(
|
||||
title: "Hold Cmd+Fn to talk",
|
||||
subtitle: "Push-to-talk mode that starts listening while you hold the hotkey and shows the preview overlay.",
|
||||
binding: self.$state.voicePushToTalkEnabled)
|
||||
.disabled(!voiceWakeSupported)
|
||||
|
||||
if !voiceWakeSupported {
|
||||
Label("Voice Wake requires macOS 26 or newer.", systemImage: "exclamationmark.triangle.fill")
|
||||
.font(.callout)
|
||||
|
||||
24
apps/macos/Tests/ClawdisIPCTests/VoicePushToTalkTests.swift
Normal file
24
apps/macos/Tests/ClawdisIPCTests/VoicePushToTalkTests.swift
Normal file
@@ -0,0 +1,24 @@
|
||||
import Testing
|
||||
@testable import Clawdis
|
||||
|
||||
@Suite struct VoicePushToTalkTests {
|
||||
@Test func deltaTrimsCommittedPrefix() {
|
||||
let delta = VoicePushToTalk._testDelta(committed: "hello ", current: "hello world again")
|
||||
#expect(delta == "world again")
|
||||
}
|
||||
|
||||
@Test func deltaFallsBackWhenPrefixDiffers() {
|
||||
let delta = VoicePushToTalk._testDelta(committed: "goodbye", current: "hello world")
|
||||
#expect(delta == "hello world")
|
||||
}
|
||||
|
||||
@Test func attributedColorsDifferWhenNotFinal() {
|
||||
let colors = VoicePushToTalk._testAttributedColors(isFinal: false)
|
||||
#expect(colors.0 != colors.1)
|
||||
}
|
||||
|
||||
@Test func attributedColorsMatchWhenFinal() {
|
||||
let colors = VoicePushToTalk._testAttributedColors(isFinal: true)
|
||||
#expect(colors.0 == colors.1)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user