fix: wire talk menu + mac build
This commit is contained in:
@@ -41,6 +41,7 @@
|
|||||||
- Also read the shared guardrails at `~/Projects/oracle/AGENTS.md` and `~/Projects/agent-scripts/AGENTS.MD` before making changes; align with any cross-repo rules noted there.
|
- Also read the shared guardrails at `~/Projects/oracle/AGENTS.md` and `~/Projects/agent-scripts/AGENTS.MD` before making changes; align with any cross-repo rules noted there.
|
||||||
- SwiftUI state management (iOS/macOS): prefer the `Observation` framework (`@Observable`, `@Bindable`) over `ObservableObject`/`@StateObject`; don’t introduce new `ObservableObject` unless required for compatibility, and migrate existing usages when touching related code.
|
- SwiftUI state management (iOS/macOS): prefer the `Observation` framework (`@Observable`, `@Bindable`) over `ObservableObject`/`@StateObject`; don’t introduce new `ObservableObject` unless required for compatibility, and migrate existing usages when touching related code.
|
||||||
- **Restart apps:** “restart iOS/Android apps” means rebuild (recompile/install) and relaunch, not just kill/launch.
|
- **Restart apps:** “restart iOS/Android apps” means rebuild (recompile/install) and relaunch, not just kill/launch.
|
||||||
|
- **Device checks:** before testing, verify connected real devices (iOS/Android) before reaching for simulators/emulators.
|
||||||
- iOS Team ID lookup: `security find-identity -p codesigning -v` → use Apple Development (…) TEAMID. Fallback: `defaults read com.apple.dt.Xcode IDEProvisioningTeamIdentifiers`.
|
- iOS Team ID lookup: `security find-identity -p codesigning -v` → use Apple Development (…) TEAMID. Fallback: `defaults read com.apple.dt.Xcode IDEProvisioningTeamIdentifiers`.
|
||||||
- A2UI bundle hash: `src/canvas-host/a2ui/.bundle.hash` is auto-generated; regenerate via `pnpm canvas:a2ui:bundle` (or `scripts/bundle-a2ui.sh`) instead of manual conflict resolution.
|
- A2UI bundle hash: `src/canvas-host/a2ui/.bundle.hash` is auto-generated; regenerate via `pnpm canvas:a2ui:bundle` (or `scripts/bundle-a2ui.sh`) instead of manual conflict resolution.
|
||||||
- Notary key file lives at `~/Library/CloudStorage/Dropbox/Backup/AppStore/AuthKey_NJF3NFGTS3.p8` (Sparkle keys live under `~/Library/CloudStorage/Dropbox/Backup/Sparkle`).
|
- Notary key file lives at `~/Library/CloudStorage/Dropbox/Backup/AppStore/AuthKey_NJF3NFGTS3.p8` (Sparkle keys live under `~/Library/CloudStorage/Dropbox/Backup/Sparkle`).
|
||||||
|
|||||||
@@ -277,14 +277,25 @@ struct ConfigSettings: View {
|
|||||||
GridRow {
|
GridRow {
|
||||||
self.gridLabel("Voice ID")
|
self.gridLabel("Voice ID")
|
||||||
VStack(alignment: .leading, spacing: 6) {
|
VStack(alignment: .leading, spacing: 6) {
|
||||||
ComboBox("ElevenLabs voice ID", text: self.$talkVoiceId) {
|
HStack(spacing: 8) {
|
||||||
ForEach(self.talkVoiceSuggestions, id: \.self) { value in
|
TextField("ElevenLabs voice ID", text: self.$talkVoiceId)
|
||||||
Text(value).tag(value)
|
.textFieldStyle(.roundedBorder)
|
||||||
|
.frame(maxWidth: .infinity)
|
||||||
|
.onChange(of: self.talkVoiceId) { _, _ in self.autosaveConfig() }
|
||||||
|
if !self.talkVoiceSuggestions.isEmpty {
|
||||||
|
Menu {
|
||||||
|
ForEach(self.talkVoiceSuggestions, id: \.self) { value in
|
||||||
|
Button(value) {
|
||||||
|
self.talkVoiceId = value
|
||||||
|
self.autosaveConfig()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} label: {
|
||||||
|
Label("Suggestions", systemImage: "chevron.up.chevron.down")
|
||||||
|
}
|
||||||
|
.fixedSize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
.textFieldStyle(.roundedBorder)
|
|
||||||
.frame(maxWidth: .infinity)
|
|
||||||
.onChange(of: self.talkVoiceId) { _, _ in self.autosaveConfig() }
|
|
||||||
Text("Defaults to ELEVENLABS_VOICE_ID / SAG_VOICE_ID if unset.")
|
Text("Defaults to ELEVENLABS_VOICE_ID / SAG_VOICE_ID if unset.")
|
||||||
.font(.footnote)
|
.font(.footnote)
|
||||||
.foregroundStyle(.secondary)
|
.foregroundStyle(.secondary)
|
||||||
|
|||||||
@@ -80,11 +80,6 @@ struct MenuContent: View {
|
|||||||
if self.showVoiceWakeMicPicker {
|
if self.showVoiceWakeMicPicker {
|
||||||
self.voiceWakeMicMenu
|
self.voiceWakeMicMenu
|
||||||
}
|
}
|
||||||
Toggle(isOn: self.talkBinding) {
|
|
||||||
Label("Talk", systemImage: "bubble.left.and.waveform")
|
|
||||||
}
|
|
||||||
.disabled(!voiceWakeSupported)
|
|
||||||
.opacity(voiceWakeSupported ? 1 : 0.5)
|
|
||||||
Divider()
|
Divider()
|
||||||
Button {
|
Button {
|
||||||
Task { @MainActor in
|
Task { @MainActor in
|
||||||
@@ -115,6 +110,13 @@ struct MenuContent: View {
|
|||||||
systemImage: "rectangle.inset.filled.on.rectangle")
|
systemImage: "rectangle.inset.filled.on.rectangle")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Button {
|
||||||
|
Task { await self.state.setTalkEnabled(!self.state.talkEnabled) }
|
||||||
|
} label: {
|
||||||
|
Label(self.state.talkEnabled ? "Stop Talk Mode" : "Talk Mode", systemImage: "bubble.left.and.waveform")
|
||||||
|
}
|
||||||
|
.disabled(!voiceWakeSupported)
|
||||||
|
.opacity(voiceWakeSupported ? 1 : 0.5)
|
||||||
Divider()
|
Divider()
|
||||||
Button("Settings…") { self.open(tab: .general) }
|
Button("Settings…") { self.open(tab: .general) }
|
||||||
.keyboardShortcut(",", modifiers: [.command])
|
.keyboardShortcut(",", modifiers: [.command])
|
||||||
@@ -344,14 +346,6 @@ struct MenuContent: View {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
private var talkBinding: Binding<Bool> {
|
|
||||||
Binding(
|
|
||||||
get: { self.state.talkEnabled },
|
|
||||||
set: { newValue in
|
|
||||||
Task { await self.state.setTalkEnabled(newValue) }
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
private var showVoiceWakeMicPicker: Bool {
|
private var showVoiceWakeMicPicker: Bool {
|
||||||
voiceWakeSupported && self.state.swabbleEnabled
|
voiceWakeSupported && self.state.swabbleEnabled
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import Foundation
|
|||||||
import OSLog
|
import OSLog
|
||||||
|
|
||||||
@MainActor
|
@MainActor
|
||||||
final class TalkAudioPlayer: NSObject, AVAudioPlayerDelegate {
|
final class TalkAudioPlayer: NSObject, @preconcurrency AVAudioPlayerDelegate {
|
||||||
static let shared = TalkAudioPlayer()
|
static let shared = TalkAudioPlayer()
|
||||||
|
|
||||||
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "talk.tts")
|
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "talk.tts")
|
||||||
|
|||||||
@@ -87,9 +87,9 @@ actor TalkModeRuntime {
|
|||||||
|
|
||||||
private struct RecognitionUpdate {
|
private struct RecognitionUpdate {
|
||||||
let transcript: String?
|
let transcript: String?
|
||||||
let segments: [SFTranscriptionSegment]
|
let hasConfidence: Bool
|
||||||
let isFinal: Bool
|
let isFinal: Bool
|
||||||
let error: Error?
|
let errorDescription: String?
|
||||||
let generation: Int
|
let generation: Int
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -136,12 +136,13 @@ actor TalkModeRuntime {
|
|||||||
|
|
||||||
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self, generation] result, error in
|
self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self, generation] result, error in
|
||||||
guard let self else { return }
|
guard let self else { return }
|
||||||
|
let segments = result?.bestTranscription.segments ?? []
|
||||||
let transcript = result?.bestTranscription.formattedString
|
let transcript = result?.bestTranscription.formattedString
|
||||||
let update = RecognitionUpdate(
|
let update = RecognitionUpdate(
|
||||||
transcript: transcript,
|
transcript: transcript,
|
||||||
segments: result?.bestTranscription.segments ?? [],
|
hasConfidence: segments.contains { $0.confidence > 0.6 },
|
||||||
isFinal: result?.isFinal ?? false,
|
isFinal: result?.isFinal ?? false,
|
||||||
error: error,
|
errorDescription: error?.localizedDescription,
|
||||||
generation: generation)
|
generation: generation)
|
||||||
Task { await self.handleRecognition(update) }
|
Task { await self.handleRecognition(update) }
|
||||||
}
|
}
|
||||||
@@ -161,14 +162,14 @@ actor TalkModeRuntime {
|
|||||||
|
|
||||||
private func handleRecognition(_ update: RecognitionUpdate) async {
|
private func handleRecognition(_ update: RecognitionUpdate) async {
|
||||||
guard update.generation == self.recognitionGeneration else { return }
|
guard update.generation == self.recognitionGeneration else { return }
|
||||||
if let error = update.error {
|
if let errorDescription = update.errorDescription {
|
||||||
self.logger.debug("talk recognition error: \(error.localizedDescription, privacy: .public)")
|
self.logger.debug("talk recognition error: \(errorDescription, privacy: .public)")
|
||||||
}
|
}
|
||||||
guard let transcript = update.transcript else { return }
|
guard let transcript = update.transcript else { return }
|
||||||
|
|
||||||
let trimmed = transcript.trimmingCharacters(in: .whitespacesAndNewlines)
|
let trimmed = transcript.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
if self.phase == .speaking, self.interruptOnSpeech {
|
if self.phase == .speaking, self.interruptOnSpeech {
|
||||||
if await self.shouldInterrupt(transcript: trimmed, segments: update.segments) {
|
if await self.shouldInterrupt(transcript: trimmed, hasConfidence: update.hasConfidence) {
|
||||||
await self.stopSpeaking(reason: .speech)
|
await self.stopSpeaking(reason: .speech)
|
||||||
self.lastTranscript = ""
|
self.lastTranscript = ""
|
||||||
self.lastHeard = nil
|
self.lastHeard = nil
|
||||||
@@ -194,11 +195,14 @@ actor TalkModeRuntime {
|
|||||||
private func startSilenceMonitor() {
|
private func startSilenceMonitor() {
|
||||||
self.silenceTask?.cancel()
|
self.silenceTask?.cancel()
|
||||||
self.silenceTask = Task { [weak self] in
|
self.silenceTask = Task { [weak self] in
|
||||||
guard let self else { return }
|
await self?.silenceLoop()
|
||||||
while self.isEnabled {
|
}
|
||||||
try? await Task.sleep(nanoseconds: 200_000_000)
|
}
|
||||||
await self.checkSilence()
|
|
||||||
}
|
private func silenceLoop() async {
|
||||||
|
while self.isEnabled {
|
||||||
|
try? await Task.sleep(nanoseconds: 200_000_000)
|
||||||
|
await self.checkSilence()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -297,9 +301,9 @@ actor TalkModeRuntime {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private func waitForChatCompletion(runId: String, timeoutSeconds: Int) async -> ChatCompletionState {
|
private func waitForChatCompletion(runId: String, timeoutSeconds: Int) async -> ChatCompletionState {
|
||||||
await withTaskGroup(of: ChatCompletionState.self) { group in
|
let stream = await GatewayConnection.shared.subscribe()
|
||||||
|
return await withTaskGroup(of: ChatCompletionState.self) { group in
|
||||||
group.addTask { [runId] in
|
group.addTask { [runId] in
|
||||||
let stream = GatewayConnection.shared.subscribe()
|
|
||||||
for await push in stream {
|
for await push in stream {
|
||||||
if case let .event(evt) = push, evt.event == "chat", let payload = evt.payload {
|
if case let .event(evt) = push, evt.event == "chat", let payload = evt.payload {
|
||||||
if let chat = try? JSONDecoder().decode(
|
if let chat = try? JSONDecoder().decode(
|
||||||
@@ -332,13 +336,13 @@ actor TalkModeRuntime {
|
|||||||
do {
|
do {
|
||||||
let history = try await GatewayConnection.shared.chatHistory(sessionKey: sessionKey)
|
let history = try await GatewayConnection.shared.chatHistory(sessionKey: sessionKey)
|
||||||
let messages = history.messages ?? []
|
let messages = history.messages ?? []
|
||||||
let decoded = messages.compactMap { item in
|
let decoded: [ClawdisChatMessage] = messages.compactMap { item in
|
||||||
guard let data = try? JSONEncoder().encode(item) else { return nil }
|
guard let data = try? JSONEncoder().encode(item) else { return nil }
|
||||||
return try? JSONDecoder().decode(ClawdisChatMessage.self, from: data)
|
return try? JSONDecoder().decode(ClawdisChatMessage.self, from: data)
|
||||||
}
|
}
|
||||||
guard let assistant = decoded.last(where: { $0.role == "assistant" }) else { return nil }
|
guard let assistant = decoded.last(where: { $0.role == "assistant" }) else { return nil }
|
||||||
let text = assistant.content.compactMap { $0.text }.joined(separator: "\n")
|
let text = assistant.content.compactMap { $0.text }.joined(separator: "\n")
|
||||||
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
let trimmed = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
|
||||||
return trimmed.isEmpty ? nil : trimmed
|
return trimmed.isEmpty ? nil : trimmed
|
||||||
} catch {
|
} catch {
|
||||||
self.logger.error("talk history fetch failed: \(error.localizedDescription, privacy: .public)")
|
self.logger.error("talk history fetch failed: \(error.localizedDescription, privacy: .public)")
|
||||||
@@ -418,7 +422,7 @@ actor TalkModeRuntime {
|
|||||||
let audio = try await ElevenLabsClient(apiKey: apiKey).synthesize(
|
let audio = try await ElevenLabsClient(apiKey: apiKey).synthesize(
|
||||||
voiceId: voiceId,
|
voiceId: voiceId,
|
||||||
request: request)
|
request: request)
|
||||||
let result = await MainActor.run { await TalkAudioPlayer.shared.play(data: audio) }
|
let result = await TalkAudioPlayer.shared.play(data: audio)
|
||||||
if !result.finished, let interruptedAt = result.interruptedAt, self.phase == .speaking {
|
if !result.finished, let interruptedAt = result.interruptedAt, self.phase == .speaking {
|
||||||
if self.interruptOnSpeech {
|
if self.interruptOnSpeech {
|
||||||
self.lastInterruptedAtSeconds = interruptedAt
|
self.lastInterruptedAtSeconds = interruptedAt
|
||||||
@@ -533,7 +537,7 @@ actor TalkModeRuntime {
|
|||||||
return sqrt(sum / Double(frameCount))
|
return sqrt(sum / Double(frameCount))
|
||||||
}
|
}
|
||||||
|
|
||||||
private func shouldInterrupt(transcript: String, segments: [SFTranscriptionSegment]) async -> Bool {
|
private func shouldInterrupt(transcript: String, hasConfidence: Bool) async -> Bool {
|
||||||
let trimmed = transcript.trimmingCharacters(in: .whitespacesAndNewlines)
|
let trimmed = transcript.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
guard trimmed.count >= 3 else { return false }
|
guard trimmed.count >= 3 else { return false }
|
||||||
if self.isLikelyEcho(of: trimmed) { return false }
|
if self.isLikelyEcho(of: trimmed) { return false }
|
||||||
@@ -541,7 +545,6 @@ actor TalkModeRuntime {
|
|||||||
if let lastSpeechEnergyAt, now.timeIntervalSince(lastSpeechEnergyAt) > 0.35 {
|
if let lastSpeechEnergyAt, now.timeIntervalSince(lastSpeechEnergyAt) > 0.35 {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
let hasConfidence = segments.contains { $0.confidence > 0.6 }
|
|
||||||
return hasConfidence
|
return hasConfidence
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user