feat: share wake gate via SwabbleKit
This commit is contained in:
@@ -17,6 +17,7 @@ let package = Package(
|
||||
.package(url: "https://github.com/swiftlang/swift-subprocess.git", from: "0.1.0"),
|
||||
.package(url: "https://github.com/sparkle-project/Sparkle", from: "2.8.1"),
|
||||
.package(path: "../shared/ClawdisKit"),
|
||||
.package(path: "../../Swabble"),
|
||||
.package(path: "../../Peekaboo/Core/PeekabooCore"),
|
||||
.package(path: "../../Peekaboo/Core/PeekabooAutomationKit"),
|
||||
],
|
||||
@@ -41,6 +42,7 @@ let package = Package(
|
||||
"ClawdisProtocol",
|
||||
.product(name: "ClawdisKit", package: "ClawdisKit"),
|
||||
.product(name: "ClawdisChatUI", package: "ClawdisKit"),
|
||||
.product(name: "SwabbleKit", package: "swabble"),
|
||||
.product(name: "MenuBarExtraAccess", package: "MenuBarExtraAccess"),
|
||||
.product(name: "Subprocess", package: "swift-subprocess"),
|
||||
.product(name: "Sparkle", package: "Sparkle"),
|
||||
@@ -56,7 +58,12 @@ let package = Package(
|
||||
]),
|
||||
.testTarget(
|
||||
name: "ClawdisIPCTests",
|
||||
dependencies: ["ClawdisIPC", "Clawdis", "ClawdisProtocol"],
|
||||
dependencies: [
|
||||
"ClawdisIPC",
|
||||
"Clawdis",
|
||||
"ClawdisProtocol",
|
||||
.product(name: "SwabbleKit", package: "swabble"),
|
||||
],
|
||||
swiftSettings: [
|
||||
.enableUpcomingFeature("StrictConcurrency"),
|
||||
.enableExperimentalFeature("SwiftTesting"),
|
||||
|
||||
@@ -2,6 +2,7 @@ import AVFoundation
|
||||
import Foundation
|
||||
import OSLog
|
||||
import Speech
|
||||
import SwabbleKit
|
||||
#if canImport(AppKit)
|
||||
import AppKit
|
||||
#endif
|
||||
@@ -35,6 +36,7 @@ actor VoiceWakeRuntime {
|
||||
private var currentConfig: RuntimeConfig?
|
||||
private var listeningState: ListeningState = .idle
|
||||
private var overlayToken: UUID?
|
||||
private var activeTriggerEndTime: TimeInterval?
|
||||
|
||||
// Tunables
|
||||
// Silence threshold once we've captured user speech (post-trigger).
|
||||
@@ -147,9 +149,13 @@ actor VoiceWakeRuntime {
|
||||
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self, generation] result, error in
|
||||
guard let self else { return }
|
||||
let transcript = result?.bestTranscription.formattedString
|
||||
let segments = result.flatMap { result in
|
||||
transcript.map { WakeWordSpeechSegments.from(transcription: result.bestTranscription, transcript: $0) }
|
||||
} ?? []
|
||||
let isFinal = result?.isFinal ?? false
|
||||
Task { await self.handleRecognition(
|
||||
transcript: transcript,
|
||||
segments: segments,
|
||||
isFinal: isFinal,
|
||||
error: error,
|
||||
config: config,
|
||||
@@ -184,6 +190,7 @@ actor VoiceWakeRuntime {
|
||||
self.audioEngine = nil
|
||||
self.currentConfig = nil
|
||||
self.listeningState = .idle
|
||||
self.activeTriggerEndTime = nil
|
||||
self.logger.debug("voicewake runtime stopped")
|
||||
DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "stopped")
|
||||
|
||||
@@ -206,6 +213,7 @@ actor VoiceWakeRuntime {
|
||||
|
||||
private func handleRecognition(
|
||||
transcript: String?,
|
||||
segments: [WakeWordSegment],
|
||||
isFinal: Bool,
|
||||
error: Error?,
|
||||
config: RuntimeConfig,
|
||||
@@ -224,7 +232,11 @@ actor VoiceWakeRuntime {
|
||||
if !transcript.isEmpty {
|
||||
self.lastHeard = now
|
||||
if self.isCapturing {
|
||||
let trimmed = Self.trimmedAfterTrigger(transcript, triggers: config.triggers)
|
||||
let trimmed = Self.commandAfterTrigger(
|
||||
transcript: transcript,
|
||||
segments: segments,
|
||||
triggerEndTime: self.activeTriggerEndTime,
|
||||
triggers: config.triggers)
|
||||
self.capturedTranscript = trimmed
|
||||
self.updateHeardBeyondTrigger(withTrimmed: trimmed)
|
||||
if isFinal {
|
||||
@@ -252,37 +264,27 @@ actor VoiceWakeRuntime {
|
||||
|
||||
if self.isCapturing { return }
|
||||
|
||||
if Self.matches(text: transcript, triggers: config.triggers) {
|
||||
let gateConfig = WakeWordGateConfig(triggers: config.triggers)
|
||||
if let match = WakeWordGate.match(transcript: transcript, segments: segments, config: gateConfig) {
|
||||
if let cooldown = cooldownUntil, now < cooldown {
|
||||
return
|
||||
}
|
||||
await self.beginCapture(transcript: transcript, config: config)
|
||||
await self.beginCapture(command: match.command, triggerEndTime: match.triggerEndTime, config: config)
|
||||
}
|
||||
}
|
||||
|
||||
private static func matches(text: String, triggers: [String]) -> Bool {
|
||||
guard !text.isEmpty else { return false }
|
||||
let normalized = text.lowercased()
|
||||
for trigger in triggers {
|
||||
let t = trigger.lowercased().trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if t.isEmpty { continue }
|
||||
if normalized.contains(t) { return true }
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
private func beginCapture(transcript: String, config: RuntimeConfig) async {
|
||||
private func beginCapture(command: String, triggerEndTime: TimeInterval, config: RuntimeConfig) async {
|
||||
self.listeningState = .voiceWake
|
||||
self.isCapturing = true
|
||||
DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "beginCapture")
|
||||
let trimmed = Self.trimmedAfterTrigger(transcript, triggers: config.triggers)
|
||||
self.capturedTranscript = trimmed
|
||||
self.capturedTranscript = command
|
||||
self.committedTranscript = ""
|
||||
self.volatileTranscript = trimmed
|
||||
self.volatileTranscript = command
|
||||
self.captureStartedAt = Date()
|
||||
self.cooldownUntil = nil
|
||||
self.heardBeyondTrigger = !trimmed.isEmpty
|
||||
self.heardBeyondTrigger = !command.isEmpty
|
||||
self.triggerChimePlayed = false
|
||||
self.activeTriggerEndTime = triggerEndTime
|
||||
|
||||
if config.triggerChime != .none, !self.triggerChimePlayed {
|
||||
self.triggerChimePlayed = true
|
||||
@@ -354,6 +356,7 @@ actor VoiceWakeRuntime {
|
||||
self.lastHeard = nil
|
||||
self.heardBeyondTrigger = false
|
||||
self.triggerChimePlayed = false
|
||||
self.activeTriggerEndTime = nil
|
||||
|
||||
await MainActor.run { AppStateStore.shared.stopVoiceEars() }
|
||||
if let token = self.overlayToken {
|
||||
@@ -467,6 +470,22 @@ actor VoiceWakeRuntime {
|
||||
return text
|
||||
}
|
||||
|
||||
private static func commandAfterTrigger(
|
||||
transcript: String,
|
||||
segments: [WakeWordSegment],
|
||||
triggerEndTime: TimeInterval?,
|
||||
triggers: [String]) -> String
|
||||
{
|
||||
guard let triggerEndTime else {
|
||||
return trimmedAfterTrigger(transcript, triggers: triggers)
|
||||
}
|
||||
let trimmed = WakeWordGate.commandText(
|
||||
transcript: transcript,
|
||||
segments: segments,
|
||||
triggerEndTime: triggerEndTime)
|
||||
return trimmed.isEmpty ? trimmedAfterTrigger(transcript, triggers: triggers) : trimmed
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
static func _testTrimmedAfterTrigger(_ text: String, triggers: [String]) -> String {
|
||||
self.trimmedAfterTrigger(text, triggers: triggers)
|
||||
@@ -481,9 +500,6 @@ actor VoiceWakeRuntime {
|
||||
.attribute(.foregroundColor, at: 0, effectiveRange: nil) as? NSColor ?? .clear
|
||||
}
|
||||
|
||||
static func _testMatches(text: String, triggers: [String]) -> Bool {
|
||||
self.matches(text: text, triggers: triggers)
|
||||
}
|
||||
#endif
|
||||
|
||||
private static func delta(after committed: String, current: String) -> String {
|
||||
|
||||
@@ -2,6 +2,7 @@ import AVFoundation
|
||||
import Foundation
|
||||
import OSLog
|
||||
import Speech
|
||||
import SwabbleKit
|
||||
|
||||
enum VoiceWakeTestState: Equatable {
|
||||
case idle
|
||||
@@ -93,14 +94,16 @@ final class VoiceWakeTester {
|
||||
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self] result, error in
|
||||
guard let self, !self.isStopping else { return }
|
||||
let text = result?.bestTranscription.formattedString ?? ""
|
||||
let matched = Self.matches(text: text, triggers: triggers)
|
||||
let segments = result.map { WakeWordSpeechSegments.from(transcription: $0.bestTranscription, transcript: text) } ?? []
|
||||
let gateConfig = WakeWordGateConfig(triggers: triggers)
|
||||
let match = WakeWordGate.match(transcript: text, segments: segments, config: gateConfig)
|
||||
let isFinal = result?.isFinal ?? false
|
||||
let errorMessage = error?.localizedDescription
|
||||
|
||||
Task { [weak self] in
|
||||
guard let self, !self.isStopping else { return }
|
||||
await self.handleResult(
|
||||
matched: matched,
|
||||
match: match,
|
||||
text: text,
|
||||
isFinal: isFinal,
|
||||
errorMessage: errorMessage,
|
||||
@@ -120,7 +123,7 @@ final class VoiceWakeTester {
|
||||
}
|
||||
|
||||
private func handleResult(
|
||||
matched: Bool,
|
||||
match: WakeWordGateMatch?,
|
||||
text: String,
|
||||
isFinal: Bool,
|
||||
errorMessage: String?,
|
||||
@@ -129,15 +132,15 @@ final class VoiceWakeTester {
|
||||
if !text.isEmpty {
|
||||
self.lastHeard = Date()
|
||||
}
|
||||
if matched, !text.isEmpty {
|
||||
if let match, !match.command.isEmpty {
|
||||
self.holdingAfterDetect = true
|
||||
self.detectedText = text
|
||||
self.logger.info("voice wake detected; forwarding (len=\(text.count))")
|
||||
self.detectedText = match.command
|
||||
self.logger.info("voice wake detected; forwarding (len=\(match.command.count))")
|
||||
await MainActor.run { AppStateStore.shared.triggerVoiceEars(ttl: nil) }
|
||||
Task.detached {
|
||||
await VoiceWakeForwarder.forward(transcript: text)
|
||||
await VoiceWakeForwarder.forward(transcript: match.command)
|
||||
}
|
||||
Task { @MainActor in onUpdate(.detected(text)) }
|
||||
Task { @MainActor in onUpdate(.detected(match.command)) }
|
||||
self.holdUntilSilence(onUpdate: onUpdate)
|
||||
return
|
||||
}
|
||||
@@ -187,15 +190,6 @@ final class VoiceWakeTester {
|
||||
_ = preferredMicID
|
||||
}
|
||||
|
||||
private static func matches(text: String, triggers: [String]) -> Bool {
|
||||
let lowered = text.lowercased()
|
||||
return triggers.contains { lowered.contains($0.lowercased()) }
|
||||
}
|
||||
|
||||
static func _testMatches(text: String, triggers: [String]) -> Bool {
|
||||
self.matches(text: text, triggers: triggers)
|
||||
}
|
||||
|
||||
private nonisolated static func ensurePermissions() async throws -> Bool {
|
||||
let speechStatus = SFSpeechRecognizer.authorizationStatus()
|
||||
if speechStatus == .notDetermined {
|
||||
|
||||
@@ -1,23 +1,9 @@
|
||||
import Foundation
|
||||
import Testing
|
||||
import SwabbleKit
|
||||
@testable import Clawdis
|
||||
|
||||
@Suite struct VoiceWakeRuntimeTests {
|
||||
@Test func matchesIsCaseInsensitive() {
|
||||
let triggers = ["ClAwD", "buddy"]
|
||||
#expect(VoiceWakeRuntime._testMatches(text: "hey clawd are you there", triggers: triggers))
|
||||
#expect(!VoiceWakeRuntime._testMatches(text: "nothing to see", triggers: triggers))
|
||||
}
|
||||
|
||||
@Test func matchesIgnoresWhitespace() {
|
||||
let triggers = [" claude "]
|
||||
#expect(VoiceWakeRuntime._testMatches(text: "hello claude!", triggers: triggers))
|
||||
}
|
||||
|
||||
@Test func matchesSkipsEmptyTriggers() {
|
||||
let triggers = [" ", ""]
|
||||
#expect(!VoiceWakeRuntime._testMatches(text: "hello", triggers: triggers))
|
||||
}
|
||||
|
||||
@Test func trimsAfterTriggerKeepsPostSpeech() {
|
||||
let triggers = ["claude", "clawd"]
|
||||
let text = "hey Claude how are you"
|
||||
@@ -48,4 +34,46 @@ import Testing
|
||||
let text = "claude write a note"
|
||||
#expect(VoiceWakeRuntime._testHasContentAfterTrigger(text, triggers: triggers))
|
||||
}
|
||||
|
||||
@Test func gateRequiresGapBetweenTriggerAndCommand() {
|
||||
let transcript = "hey clawd do thing"
|
||||
let segments = makeSegments(
|
||||
transcript: transcript,
|
||||
words: [
|
||||
("hey", 0.0, 0.1),
|
||||
("clawd", 0.2, 0.1),
|
||||
("do", 0.35, 0.1),
|
||||
("thing", 0.5, 0.1),
|
||||
])
|
||||
let config = WakeWordGateConfig(triggers: ["clawd"], minPostTriggerGap: 0.3)
|
||||
#expect(WakeWordGate.match(transcript: transcript, segments: segments, config: config) == nil)
|
||||
}
|
||||
|
||||
@Test func gateAcceptsGapAndExtractsCommand() {
|
||||
let transcript = "hey clawd do thing"
|
||||
let segments = makeSegments(
|
||||
transcript: transcript,
|
||||
words: [
|
||||
("hey", 0.0, 0.1),
|
||||
("clawd", 0.2, 0.1),
|
||||
("do", 0.9, 0.1),
|
||||
("thing", 1.1, 0.1),
|
||||
])
|
||||
let config = WakeWordGateConfig(triggers: ["clawd"], minPostTriggerGap: 0.3)
|
||||
#expect(WakeWordGate.match(transcript: transcript, segments: segments, config: config)?.command == "do thing")
|
||||
}
|
||||
}
|
||||
|
||||
private func makeSegments(
|
||||
transcript: String,
|
||||
words: [(String, TimeInterval, TimeInterval)])
|
||||
-> [WakeWordSegment] {
|
||||
var searchStart = transcript.startIndex
|
||||
var output: [WakeWordSegment] = []
|
||||
for (word, start, duration) in words {
|
||||
let range = transcript.range(of: word, range: searchStart..<transcript.endIndex)
|
||||
output.append(WakeWordSegment(text: word, start: start, duration: duration, range: range))
|
||||
if let range { searchStart = range.upperBound }
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
@@ -1,15 +1,47 @@
|
||||
import Foundation
|
||||
import Testing
|
||||
@testable import Clawdis
|
||||
import SwabbleKit
|
||||
|
||||
struct VoiceWakeTesterTests {
|
||||
@Test func matchesIsCaseInsensitiveAndSubstring() {
|
||||
let triggers = ["Claude", "wake word"]
|
||||
#expect(VoiceWakeTester._testMatches(text: "hey claude are you there", triggers: triggers))
|
||||
#expect(VoiceWakeTester._testMatches(text: "this has wake word inside", triggers: triggers))
|
||||
@Test func matchRespectsGapRequirement() {
|
||||
let transcript = "hey claude do thing"
|
||||
let segments = makeSegments(
|
||||
transcript: transcript,
|
||||
words: [
|
||||
("hey", 0.0, 0.1),
|
||||
("claude", 0.2, 0.1),
|
||||
("do", 0.35, 0.1),
|
||||
("thing", 0.5, 0.1),
|
||||
])
|
||||
let config = WakeWordGateConfig(triggers: ["claude"], minPostTriggerGap: 0.3)
|
||||
#expect(WakeWordGate.match(transcript: transcript, segments: segments, config: config) == nil)
|
||||
}
|
||||
|
||||
@Test func matchesReturnsFalseWhenNoTrigger() {
|
||||
let triggers = ["claude"]
|
||||
#expect(!VoiceWakeTester._testMatches(text: "random text", triggers: triggers))
|
||||
@Test func matchReturnsCommandAfterGap() {
|
||||
let transcript = "hey claude do thing"
|
||||
let segments = makeSegments(
|
||||
transcript: transcript,
|
||||
words: [
|
||||
("hey", 0.0, 0.1),
|
||||
("claude", 0.2, 0.1),
|
||||
("do", 0.8, 0.1),
|
||||
("thing", 1.0, 0.1),
|
||||
])
|
||||
let config = WakeWordGateConfig(triggers: ["claude"], minPostTriggerGap: 0.3)
|
||||
#expect(WakeWordGate.match(transcript: transcript, segments: segments, config: config)?.command == "do thing")
|
||||
}
|
||||
}
|
||||
|
||||
private func makeSegments(
|
||||
transcript: String,
|
||||
words: [(String, TimeInterval, TimeInterval)])
|
||||
-> [WakeWordSegment] {
|
||||
var searchStart = transcript.startIndex
|
||||
var output: [WakeWordSegment] = []
|
||||
for (word, start, duration) in words {
|
||||
let range = transcript.range(of: word, range: searchStart..<transcript.endIndex)
|
||||
output.append(WakeWordSegment(text: word, start: start, duration: duration, range: range))
|
||||
if let range { searchStart = range.upperBound }
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user