Docs: voice overlay plan and fix web mocks
This commit is contained in:
@@ -28,19 +28,71 @@ enum VoiceWakeChime: Codable, Equatable, Sendable {
|
|||||||
|
|
||||||
struct VoiceWakeChimeCatalog {
|
struct VoiceWakeChimeCatalog {
|
||||||
/// Options shown in the picker.
|
/// Options shown in the picker.
|
||||||
static let systemOptions: [String] = [
|
static let systemOptions: [String] = {
|
||||||
"Glass", // default
|
let discovered = Self.discoveredSoundMap.keys
|
||||||
"Ping",
|
let fallback: [String] = [
|
||||||
"Pop",
|
"Glass", // default
|
||||||
"Frog",
|
"Ping",
|
||||||
"Submarine",
|
"Pop",
|
||||||
"Funk",
|
"Frog",
|
||||||
"Tink",
|
"Submarine",
|
||||||
]
|
"Funk",
|
||||||
|
"Tink",
|
||||||
|
"Basso",
|
||||||
|
"Blow",
|
||||||
|
"Bottle",
|
||||||
|
"Hero",
|
||||||
|
"Morse",
|
||||||
|
"Purr",
|
||||||
|
"Sosumi",
|
||||||
|
"Mail Sent",
|
||||||
|
]
|
||||||
|
|
||||||
|
// Keep Glass first, then present the rest alphabetically without duplicates.
|
||||||
|
var names = Set(discovered).union(fallback)
|
||||||
|
names.remove("Glass")
|
||||||
|
let sorted = names.sorted { $0.localizedCaseInsensitiveCompare($1) == .orderedAscending }
|
||||||
|
return ["Glass"] + sorted
|
||||||
|
}()
|
||||||
|
|
||||||
static func displayName(for raw: String) -> String {
|
static func displayName(for raw: String) -> String {
|
||||||
return raw
|
return raw
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static func url(for name: String) -> URL? {
|
||||||
|
return self.discoveredSoundMap[name]
|
||||||
|
}
|
||||||
|
|
||||||
|
private static let allowedExtensions: Set<String> = [
|
||||||
|
"aif", "aiff", "caf", "wav", "m4a", "mp3",
|
||||||
|
]
|
||||||
|
|
||||||
|
private static let searchRoots: [URL] = [
|
||||||
|
FileManager.default.homeDirectoryForCurrentUser.appendingPathComponent("Library/Sounds"),
|
||||||
|
URL(fileURLWithPath: "/Library/Sounds"),
|
||||||
|
URL(fileURLWithPath: "/System/Applications/Mail.app/Contents/Resources"), // Mail “swoosh”
|
||||||
|
URL(fileURLWithPath: "/System/Library/Sounds"),
|
||||||
|
]
|
||||||
|
|
||||||
|
private static let discoveredSoundMap: [String: URL] = {
|
||||||
|
var map: [String: URL] = [:]
|
||||||
|
for root in self.searchRoots {
|
||||||
|
guard let contents = try? FileManager.default.contentsOfDirectory(
|
||||||
|
at: root,
|
||||||
|
includingPropertiesForKeys: nil,
|
||||||
|
options: [.skipsHiddenFiles])
|
||||||
|
else { continue }
|
||||||
|
|
||||||
|
for url in contents where self.allowedExtensions.contains(url.pathExtension.lowercased()) {
|
||||||
|
let name = url.deletingPathExtension().lastPathComponent
|
||||||
|
// Preserve the first match in priority order.
|
||||||
|
if map[name] == nil {
|
||||||
|
map[name] = url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return map
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
@MainActor
|
@MainActor
|
||||||
@@ -62,7 +114,13 @@ enum VoiceWakeChimePlayer {
|
|||||||
case .none:
|
case .none:
|
||||||
return nil
|
return nil
|
||||||
case let .system(name):
|
case let .system(name):
|
||||||
return NSSound(named: NSSound.Name(name))
|
if let named = NSSound(named: NSSound.Name(name)) {
|
||||||
|
return named
|
||||||
|
}
|
||||||
|
if let url = VoiceWakeChimeCatalog.url(for: name) {
|
||||||
|
return NSSound(contentsOf: url, byReference: false)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
|
||||||
case let .custom(_, bookmark):
|
case let .custom(_, bookmark):
|
||||||
var stale = false
|
var stale = false
|
||||||
|
|||||||
43
docs/mac/voice-overlay.md
Normal file
43
docs/mac/voice-overlay.md
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
## Voice Overlay Lifecycle (macOS)
|
||||||
|
|
||||||
|
Audience: macOS app contributors. Goal: keep the voice overlay predictable when wake-word and push-to-talk overlap.
|
||||||
|
|
||||||
|
### Current intent
|
||||||
|
- If the overlay is already visible from wake-word and the user presses the hotkey, the hotkey session *adopts* the existing text instead of resetting it. The overlay stays up while the hotkey is held. When the user releases: send if there is trimmed text, otherwise dismiss.
|
||||||
|
- Wake-word alone still auto-sends on silence; push-to-talk sends immediately on release.
|
||||||
|
|
||||||
|
### Proposed architecture (to implement next)
|
||||||
|
1. **VoiceSessionCoordinator (actor)**
|
||||||
|
- Owns exactly one `VoiceSession` at a time.
|
||||||
|
- API (token-based): `beginWakeCapture`, `beginPushToTalk`, `updatePartial`, `endCapture`, `cancel`, `applyCooldown`.
|
||||||
|
- Drops callbacks that carry stale tokens (prevents old recognizers from reopening the overlay).
|
||||||
|
2. **VoiceSession (model)**
|
||||||
|
- Fields: `token`, `source` (wakeWord|pushToTalk), committed/volatile text, chime flags, timers (auto-send, idle), `overlayMode` (display|editing|sending), cooldown deadline.
|
||||||
|
3. **Overlay binding**
|
||||||
|
- `VoiceSessionPublisher` (`ObservableObject`) mirrors the active session into SwiftUI.
|
||||||
|
- `VoiceWakeOverlayView` renders only via the publisher; it never mutates global singletons directly.
|
||||||
|
- Overlay user actions (`sendNow`, `dismiss`, `edit`) call back into the coordinator with the session token.
|
||||||
|
4. **Unified send path**
|
||||||
|
- On `endCapture`: if trimmed text is empty → dismiss; else `performSend(session:)` (plays send chime once, forwards, dismisses).
|
||||||
|
- Push-to-talk: no delay; wake-word: optional delay for auto-send.
|
||||||
|
- Apply a short cooldown to the wake runtime after push-to-talk finishes so wake-word doesn’t immediately retrigger.
|
||||||
|
5. **Logging**
|
||||||
|
- Coordinator emits `.info` logs in subsystem `com.steipete.clawdis`, categories `voicewake.overlay` and `voicewake.chime`.
|
||||||
|
- Key events: `session_started`, `adopted_by_push_to_talk`, `partial`, `finalized`, `send`, `dismiss`, `cancel`, `cooldown`.
|
||||||
|
|
||||||
|
### Debugging checklist
|
||||||
|
- Stream logs while reproducing a sticky overlay:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo log stream --predicate 'subsystem == "com.steipete.clawdis" AND category CONTAINS "voicewake"' --level info --style compact
|
||||||
|
```
|
||||||
|
- Verify only one active session token; stale callbacks should be dropped by the coordinator.
|
||||||
|
- Ensure push-to-talk release always calls `endCapture` with the active token; if text is empty, expect `dismiss` without chime or send.
|
||||||
|
|
||||||
|
### Migration steps (suggested)
|
||||||
|
1. Add `VoiceSessionCoordinator`, `VoiceSession`, and `VoiceSessionPublisher`.
|
||||||
|
2. Refactor `VoiceWakeRuntime` to create/update/end sessions instead of touching `VoiceWakeOverlayController` directly.
|
||||||
|
3. Refactor `VoicePushToTalk` to adopt existing sessions and call `endCapture` on release; apply runtime cooldown.
|
||||||
|
4. Wire `VoiceWakeOverlayController` to the publisher; remove direct calls from runtime/PTT.
|
||||||
|
5. Add integration tests for session adoption, cooldown, and empty-text dismissal.
|
||||||
|
|
||||||
@@ -6,6 +6,7 @@ import { getReplyFromConfig } from "./reply.js";
|
|||||||
const webMocks = vi.hoisted(() => ({
|
const webMocks = vi.hoisted(() => ({
|
||||||
webAuthExists: vi.fn().mockResolvedValue(true),
|
webAuthExists: vi.fn().mockResolvedValue(true),
|
||||||
getWebAuthAgeMs: vi.fn().mockReturnValue(120_000),
|
getWebAuthAgeMs: vi.fn().mockReturnValue(120_000),
|
||||||
|
readWebSelfId: vi.fn().mockReturnValue({ e164: "+1999" }),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
vi.mock("../web/session.js", () => webMocks);
|
vi.mock("../web/session.js", () => webMocks);
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ const mocks = vi.hoisted(() => ({
|
|||||||
resolveStorePath: vi.fn().mockReturnValue("/tmp/sessions.json"),
|
resolveStorePath: vi.fn().mockReturnValue("/tmp/sessions.json"),
|
||||||
webAuthExists: vi.fn().mockResolvedValue(true),
|
webAuthExists: vi.fn().mockResolvedValue(true),
|
||||||
getWebAuthAgeMs: vi.fn().mockReturnValue(5000),
|
getWebAuthAgeMs: vi.fn().mockReturnValue(5000),
|
||||||
|
readWebSelfId: vi.fn().mockReturnValue({ e164: "+1999" }),
|
||||||
logWebSelfId: vi.fn(),
|
logWebSelfId: vi.fn(),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
@@ -17,6 +18,7 @@ vi.mock("../config/sessions.js", () => ({
|
|||||||
vi.mock("../web/session.js", () => ({
|
vi.mock("../web/session.js", () => ({
|
||||||
webAuthExists: mocks.webAuthExists,
|
webAuthExists: mocks.webAuthExists,
|
||||||
getWebAuthAgeMs: mocks.getWebAuthAgeMs,
|
getWebAuthAgeMs: mocks.getWebAuthAgeMs,
|
||||||
|
readWebSelfId: mocks.readWebSelfId,
|
||||||
logWebSelfId: mocks.logWebSelfId,
|
logWebSelfId: mocks.logWebSelfId,
|
||||||
}));
|
}));
|
||||||
vi.mock("../config/config.js", () => ({
|
vi.mock("../config/config.js", () => ({
|
||||||
|
|||||||
@@ -179,7 +179,9 @@ export async function startControlChannel(
|
|||||||
respond(undefined, false, `unknown method: ${parsed.method}`);
|
respond(undefined, false, `unknown method: ${parsed.method}`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
logDebug(`control: ${parsed.method} responded in ${Date.now() - started}ms`);
|
logDebug(
|
||||||
|
`control: ${parsed.method} responded in ${Date.now() - started}ms`,
|
||||||
|
);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logError(
|
logError(
|
||||||
`control: ${parsed.method} failed in ${Date.now() - started}ms: ${String(err)}`,
|
`control: ${parsed.method} failed in ${Date.now() - started}ms: ${String(err)}`,
|
||||||
|
|||||||
Reference in New Issue
Block a user