Docs: voice overlay plan and fix web mocks

This commit is contained in:
Peter Steinberger
2025-12-09 03:25:55 +01:00
parent 3a42979e53
commit 99a3102134
5 changed files with 117 additions and 11 deletions

View File

@@ -28,19 +28,71 @@ enum VoiceWakeChime: Codable, Equatable, Sendable {
struct VoiceWakeChimeCatalog { struct VoiceWakeChimeCatalog {
/// Options shown in the picker. /// Options shown in the picker.
static let systemOptions: [String] = [ static let systemOptions: [String] = {
"Glass", // default let discovered = Self.discoveredSoundMap.keys
"Ping", let fallback: [String] = [
"Pop", "Glass", // default
"Frog", "Ping",
"Submarine", "Pop",
"Funk", "Frog",
"Tink", "Submarine",
] "Funk",
"Tink",
"Basso",
"Blow",
"Bottle",
"Hero",
"Morse",
"Purr",
"Sosumi",
"Mail Sent",
]
// Keep Glass first, then present the rest alphabetically without duplicates.
var names = Set(discovered).union(fallback)
names.remove("Glass")
let sorted = names.sorted { $0.localizedCaseInsensitiveCompare($1) == .orderedAscending }
return ["Glass"] + sorted
}()
static func displayName(for raw: String) -> String { static func displayName(for raw: String) -> String {
return raw return raw
} }
static func url(for name: String) -> URL? {
return self.discoveredSoundMap[name]
}
private static let allowedExtensions: Set<String> = [
"aif", "aiff", "caf", "wav", "m4a", "mp3",
]
private static let searchRoots: [URL] = [
FileManager.default.homeDirectoryForCurrentUser.appendingPathComponent("Library/Sounds"),
URL(fileURLWithPath: "/Library/Sounds"),
URL(fileURLWithPath: "/System/Applications/Mail.app/Contents/Resources"), // Mail swoosh
URL(fileURLWithPath: "/System/Library/Sounds"),
]
private static let discoveredSoundMap: [String: URL] = {
var map: [String: URL] = [:]
for root in self.searchRoots {
guard let contents = try? FileManager.default.contentsOfDirectory(
at: root,
includingPropertiesForKeys: nil,
options: [.skipsHiddenFiles])
else { continue }
for url in contents where self.allowedExtensions.contains(url.pathExtension.lowercased()) {
let name = url.deletingPathExtension().lastPathComponent
// Preserve the first match in priority order.
if map[name] == nil {
map[name] = url
}
}
}
return map
}()
} }
@MainActor @MainActor
@@ -62,7 +114,13 @@ enum VoiceWakeChimePlayer {
case .none: case .none:
return nil return nil
case let .system(name): case let .system(name):
return NSSound(named: NSSound.Name(name)) if let named = NSSound(named: NSSound.Name(name)) {
return named
}
if let url = VoiceWakeChimeCatalog.url(for: name) {
return NSSound(contentsOf: url, byReference: false)
}
return nil
case let .custom(_, bookmark): case let .custom(_, bookmark):
var stale = false var stale = false

43
docs/mac/voice-overlay.md Normal file
View File

@@ -0,0 +1,43 @@
## Voice Overlay Lifecycle (macOS)
Audience: macOS app contributors. Goal: keep the voice overlay predictable when wake-word and push-to-talk overlap.
### Current intent
- If the overlay is already visible from wake-word and the user presses the hotkey, the hotkey session *adopts* the existing text instead of resetting it. The overlay stays up while the hotkey is held. When the user releases: send if there is trimmed text, otherwise dismiss.
- Wake-word alone still auto-sends on silence; push-to-talk sends immediately on release.
### Proposed architecture (to implement next)
1. **VoiceSessionCoordinator (actor)**
- Owns exactly one `VoiceSession` at a time.
- API (token-based): `beginWakeCapture`, `beginPushToTalk`, `updatePartial`, `endCapture`, `cancel`, `applyCooldown`.
- Drops callbacks that carry stale tokens (prevents old recognizers from reopening the overlay).
2. **VoiceSession (model)**
- Fields: `token`, `source` (wakeWord|pushToTalk), committed/volatile text, chime flags, timers (auto-send, idle), `overlayMode` (display|editing|sending), cooldown deadline.
3. **Overlay binding**
- `VoiceSessionPublisher` (`ObservableObject`) mirrors the active session into SwiftUI.
- `VoiceWakeOverlayView` renders only via the publisher; it never mutates global singletons directly.
- Overlay user actions (`sendNow`, `dismiss`, `edit`) call back into the coordinator with the session token.
4. **Unified send path**
- On `endCapture`: if trimmed text is empty → dismiss; else `performSend(session:)` (plays send chime once, forwards, dismisses).
- Push-to-talk: no delay; wake-word: optional delay for auto-send.
- Apply a short cooldown to the wake runtime after push-to-talk finishes so wake-word doesnt immediately retrigger.
5. **Logging**
- Coordinator emits `.info` logs in subsystem `com.steipete.clawdis`, categories `voicewake.overlay` and `voicewake.chime`.
- Key events: `session_started`, `adopted_by_push_to_talk`, `partial`, `finalized`, `send`, `dismiss`, `cancel`, `cooldown`.
### Debugging checklist
- Stream logs while reproducing a sticky overlay:
```bash
sudo log stream --predicate 'subsystem == "com.steipete.clawdis" AND category CONTAINS "voicewake"' --level info --style compact
```
- Verify only one active session token; stale callbacks should be dropped by the coordinator.
- Ensure push-to-talk release always calls `endCapture` with the active token; if text is empty, expect `dismiss` without chime or send.
### Migration steps (suggested)
1. Add `VoiceSessionCoordinator`, `VoiceSession`, and `VoiceSessionPublisher`.
2. Refactor `VoiceWakeRuntime` to create/update/end sessions instead of touching `VoiceWakeOverlayController` directly.
3. Refactor `VoicePushToTalk` to adopt existing sessions and call `endCapture` on release; apply runtime cooldown.
4. Wire `VoiceWakeOverlayController` to the publisher; remove direct calls from runtime/PTT.
5. Add integration tests for session adoption, cooldown, and empty-text dismissal.

View File

@@ -6,6 +6,7 @@ import { getReplyFromConfig } from "./reply.js";
const webMocks = vi.hoisted(() => ({ const webMocks = vi.hoisted(() => ({
webAuthExists: vi.fn().mockResolvedValue(true), webAuthExists: vi.fn().mockResolvedValue(true),
getWebAuthAgeMs: vi.fn().mockReturnValue(120_000), getWebAuthAgeMs: vi.fn().mockReturnValue(120_000),
readWebSelfId: vi.fn().mockReturnValue({ e164: "+1999" }),
})); }));
vi.mock("../web/session.js", () => webMocks); vi.mock("../web/session.js", () => webMocks);

View File

@@ -7,6 +7,7 @@ const mocks = vi.hoisted(() => ({
resolveStorePath: vi.fn().mockReturnValue("/tmp/sessions.json"), resolveStorePath: vi.fn().mockReturnValue("/tmp/sessions.json"),
webAuthExists: vi.fn().mockResolvedValue(true), webAuthExists: vi.fn().mockResolvedValue(true),
getWebAuthAgeMs: vi.fn().mockReturnValue(5000), getWebAuthAgeMs: vi.fn().mockReturnValue(5000),
readWebSelfId: vi.fn().mockReturnValue({ e164: "+1999" }),
logWebSelfId: vi.fn(), logWebSelfId: vi.fn(),
})); }));
@@ -17,6 +18,7 @@ vi.mock("../config/sessions.js", () => ({
vi.mock("../web/session.js", () => ({ vi.mock("../web/session.js", () => ({
webAuthExists: mocks.webAuthExists, webAuthExists: mocks.webAuthExists,
getWebAuthAgeMs: mocks.getWebAuthAgeMs, getWebAuthAgeMs: mocks.getWebAuthAgeMs,
readWebSelfId: mocks.readWebSelfId,
logWebSelfId: mocks.logWebSelfId, logWebSelfId: mocks.logWebSelfId,
})); }));
vi.mock("../config/config.js", () => ({ vi.mock("../config/config.js", () => ({

View File

@@ -179,7 +179,9 @@ export async function startControlChannel(
respond(undefined, false, `unknown method: ${parsed.method}`); respond(undefined, false, `unknown method: ${parsed.method}`);
break; break;
} }
logDebug(`control: ${parsed.method} responded in ${Date.now() - started}ms`); logDebug(
`control: ${parsed.method} responded in ${Date.now() - started}ms`,
);
} catch (err) { } catch (err) {
logError( logError(
`control: ${parsed.method} failed in ${Date.now() - started}ms: ${String(err)}`, `control: ${parsed.method} failed in ${Date.now() - started}ms: ${String(err)}`,