From 99a31021348fd1b3858c4a9c51bb24ddee60add3 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 9 Dec 2025 03:25:55 +0100 Subject: [PATCH] Docs: voice overlay plan and fix web mocks --- .../Sources/Clawdis/VoiceWakeChime.swift | 78 ++++++++++++++++--- docs/mac/voice-overlay.md | 43 ++++++++++ src/auto-reply/reply.triggers.test.ts | 1 + src/commands/status.test.ts | 2 + src/infra/control-channel.ts | 4 +- 5 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 docs/mac/voice-overlay.md diff --git a/apps/macos/Sources/Clawdis/VoiceWakeChime.swift b/apps/macos/Sources/Clawdis/VoiceWakeChime.swift index e2d9c6c18..2782d5cd5 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeChime.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeChime.swift @@ -28,19 +28,71 @@ enum VoiceWakeChime: Codable, Equatable, Sendable { struct VoiceWakeChimeCatalog { /// Options shown in the picker. - static let systemOptions: [String] = [ - "Glass", // default - "Ping", - "Pop", - "Frog", - "Submarine", - "Funk", - "Tink", - ] + static let systemOptions: [String] = { + let discovered = Self.discoveredSoundMap.keys + let fallback: [String] = [ + "Glass", // default + "Ping", + "Pop", + "Frog", + "Submarine", + "Funk", + "Tink", + "Basso", + "Blow", + "Bottle", + "Hero", + "Morse", + "Purr", + "Sosumi", + "Mail Sent", + ] + + // Keep Glass first, then present the rest alphabetically without duplicates. + var names = Set(discovered).union(fallback) + names.remove("Glass") + let sorted = names.sorted { $0.localizedCaseInsensitiveCompare($1) == .orderedAscending } + return ["Glass"] + sorted + }() static func displayName(for raw: String) -> String { return raw } + + static func url(for name: String) -> URL? { + return self.discoveredSoundMap[name] + } + + private static let allowedExtensions: Set = [ + "aif", "aiff", "caf", "wav", "m4a", "mp3", + ] + + private static let searchRoots: [URL] = [ + FileManager.default.homeDirectoryForCurrentUser.appendingPathComponent("Library/Sounds"), + URL(fileURLWithPath: "/Library/Sounds"), + URL(fileURLWithPath: "/System/Applications/Mail.app/Contents/Resources"), // Mail “swoosh” + URL(fileURLWithPath: "/System/Library/Sounds"), + ] + + private static let discoveredSoundMap: [String: URL] = { + var map: [String: URL] = [:] + for root in self.searchRoots { + guard let contents = try? FileManager.default.contentsOfDirectory( + at: root, + includingPropertiesForKeys: nil, + options: [.skipsHiddenFiles]) + else { continue } + + for url in contents where self.allowedExtensions.contains(url.pathExtension.lowercased()) { + let name = url.deletingPathExtension().lastPathComponent + // Preserve the first match in priority order. + if map[name] == nil { + map[name] = url + } + } + } + return map + }() } @MainActor @@ -62,7 +114,13 @@ enum VoiceWakeChimePlayer { case .none: return nil case let .system(name): - return NSSound(named: NSSound.Name(name)) + if let named = NSSound(named: NSSound.Name(name)) { + return named + } + if let url = VoiceWakeChimeCatalog.url(for: name) { + return NSSound(contentsOf: url, byReference: false) + } + return nil case let .custom(_, bookmark): var stale = false diff --git a/docs/mac/voice-overlay.md b/docs/mac/voice-overlay.md new file mode 100644 index 000000000..be741fc6e --- /dev/null +++ b/docs/mac/voice-overlay.md @@ -0,0 +1,43 @@ +## Voice Overlay Lifecycle (macOS) + +Audience: macOS app contributors. Goal: keep the voice overlay predictable when wake-word and push-to-talk overlap. + +### Current intent +- If the overlay is already visible from wake-word and the user presses the hotkey, the hotkey session *adopts* the existing text instead of resetting it. The overlay stays up while the hotkey is held. When the user releases: send if there is trimmed text, otherwise dismiss. +- Wake-word alone still auto-sends on silence; push-to-talk sends immediately on release. + +### Proposed architecture (to implement next) +1. **VoiceSessionCoordinator (actor)** + - Owns exactly one `VoiceSession` at a time. + - API (token-based): `beginWakeCapture`, `beginPushToTalk`, `updatePartial`, `endCapture`, `cancel`, `applyCooldown`. + - Drops callbacks that carry stale tokens (prevents old recognizers from reopening the overlay). +2. **VoiceSession (model)** + - Fields: `token`, `source` (wakeWord|pushToTalk), committed/volatile text, chime flags, timers (auto-send, idle), `overlayMode` (display|editing|sending), cooldown deadline. +3. **Overlay binding** + - `VoiceSessionPublisher` (`ObservableObject`) mirrors the active session into SwiftUI. + - `VoiceWakeOverlayView` renders only via the publisher; it never mutates global singletons directly. + - Overlay user actions (`sendNow`, `dismiss`, `edit`) call back into the coordinator with the session token. +4. **Unified send path** + - On `endCapture`: if trimmed text is empty → dismiss; else `performSend(session:)` (plays send chime once, forwards, dismisses). + - Push-to-talk: no delay; wake-word: optional delay for auto-send. + - Apply a short cooldown to the wake runtime after push-to-talk finishes so wake-word doesn’t immediately retrigger. +5. **Logging** + - Coordinator emits `.info` logs in subsystem `com.steipete.clawdis`, categories `voicewake.overlay` and `voicewake.chime`. + - Key events: `session_started`, `adopted_by_push_to_talk`, `partial`, `finalized`, `send`, `dismiss`, `cancel`, `cooldown`. + +### Debugging checklist +- Stream logs while reproducing a sticky overlay: + + ```bash + sudo log stream --predicate 'subsystem == "com.steipete.clawdis" AND category CONTAINS "voicewake"' --level info --style compact + ``` +- Verify only one active session token; stale callbacks should be dropped by the coordinator. +- Ensure push-to-talk release always calls `endCapture` with the active token; if text is empty, expect `dismiss` without chime or send. + +### Migration steps (suggested) +1. Add `VoiceSessionCoordinator`, `VoiceSession`, and `VoiceSessionPublisher`. +2. Refactor `VoiceWakeRuntime` to create/update/end sessions instead of touching `VoiceWakeOverlayController` directly. +3. Refactor `VoicePushToTalk` to adopt existing sessions and call `endCapture` on release; apply runtime cooldown. +4. Wire `VoiceWakeOverlayController` to the publisher; remove direct calls from runtime/PTT. +5. Add integration tests for session adoption, cooldown, and empty-text dismissal. + diff --git a/src/auto-reply/reply.triggers.test.ts b/src/auto-reply/reply.triggers.test.ts index 063372934..85b474fdc 100644 --- a/src/auto-reply/reply.triggers.test.ts +++ b/src/auto-reply/reply.triggers.test.ts @@ -6,6 +6,7 @@ import { getReplyFromConfig } from "./reply.js"; const webMocks = vi.hoisted(() => ({ webAuthExists: vi.fn().mockResolvedValue(true), getWebAuthAgeMs: vi.fn().mockReturnValue(120_000), + readWebSelfId: vi.fn().mockReturnValue({ e164: "+1999" }), })); vi.mock("../web/session.js", () => webMocks); diff --git a/src/commands/status.test.ts b/src/commands/status.test.ts index 0185ab443..ad672b2ff 100644 --- a/src/commands/status.test.ts +++ b/src/commands/status.test.ts @@ -7,6 +7,7 @@ const mocks = vi.hoisted(() => ({ resolveStorePath: vi.fn().mockReturnValue("/tmp/sessions.json"), webAuthExists: vi.fn().mockResolvedValue(true), getWebAuthAgeMs: vi.fn().mockReturnValue(5000), + readWebSelfId: vi.fn().mockReturnValue({ e164: "+1999" }), logWebSelfId: vi.fn(), })); @@ -17,6 +18,7 @@ vi.mock("../config/sessions.js", () => ({ vi.mock("../web/session.js", () => ({ webAuthExists: mocks.webAuthExists, getWebAuthAgeMs: mocks.getWebAuthAgeMs, + readWebSelfId: mocks.readWebSelfId, logWebSelfId: mocks.logWebSelfId, })); vi.mock("../config/config.js", () => ({ diff --git a/src/infra/control-channel.ts b/src/infra/control-channel.ts index 8dbb3412f..8500f8ec6 100644 --- a/src/infra/control-channel.ts +++ b/src/infra/control-channel.ts @@ -179,7 +179,9 @@ export async function startControlChannel( respond(undefined, false, `unknown method: ${parsed.method}`); break; } - logDebug(`control: ${parsed.method} responded in ${Date.now() - started}ms`); + logDebug( + `control: ${parsed.method} responded in ${Date.now() - started}ms`, + ); } catch (err) { logError( `control: ${parsed.method} failed in ${Date.now() - started}ms: ${String(err)}`,