feat: talk mode key distribution and tts polling
This commit is contained in:
@@ -329,18 +329,26 @@ final class AppState {
|
||||
func setTalkEnabled(_ enabled: Bool) async {
|
||||
guard voiceWakeSupported else {
|
||||
self.talkEnabled = false
|
||||
await GatewayConnection.shared.talkMode(enabled: false, phase: "disabled")
|
||||
return
|
||||
}
|
||||
|
||||
self.talkEnabled = enabled
|
||||
guard !self.isPreview else { return }
|
||||
|
||||
if !enabled { return }
|
||||
if !enabled {
|
||||
await GatewayConnection.shared.talkMode(enabled: false, phase: "disabled")
|
||||
return
|
||||
}
|
||||
|
||||
if PermissionManager.voiceWakePermissionsGranted() { return }
|
||||
if PermissionManager.voiceWakePermissionsGranted() {
|
||||
await GatewayConnection.shared.talkMode(enabled: true, phase: "enabled")
|
||||
return
|
||||
}
|
||||
|
||||
let granted = await PermissionManager.ensureVoiceWakePermissions(interactive: true)
|
||||
self.talkEnabled = granted
|
||||
await GatewayConnection.shared.talkMode(enabled: granted, phase: granted ? "enabled" : "denied")
|
||||
}
|
||||
|
||||
// MARK: - Global wake words sync (Gateway-owned)
|
||||
|
||||
@@ -34,6 +34,7 @@ struct ConfigSettings: View {
|
||||
@State private var talkVoiceId: String = ""
|
||||
@State private var talkInterruptOnSpeech: Bool = true
|
||||
@State private var talkApiKey: String = ""
|
||||
@State private var gatewayApiKeyFound = false
|
||||
|
||||
var body: some View {
|
||||
ScrollView { self.content }
|
||||
@@ -49,6 +50,7 @@ struct ConfigSettings: View {
|
||||
self.hasLoaded = true
|
||||
self.loadConfig()
|
||||
await self.loadModels()
|
||||
await self.refreshGatewayTalkApiKey()
|
||||
self.allowAutosave = true
|
||||
}
|
||||
}
|
||||
@@ -323,6 +325,10 @@ struct ConfigSettings: View {
|
||||
Text("Using ELEVENLABS_API_KEY from the environment.")
|
||||
.font(.footnote)
|
||||
.foregroundStyle(.secondary)
|
||||
} else if self.gatewayApiKeyFound && self.talkApiKey.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||||
Text("Using API key from the gateway profile.")
|
||||
.font(.footnote)
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -392,6 +398,20 @@ struct ConfigSettings: View {
|
||||
}
|
||||
}
|
||||
|
||||
private func refreshGatewayTalkApiKey() async {
|
||||
do {
|
||||
let snap: ConfigSnapshot = try await GatewayConnection.shared.requestDecoded(
|
||||
method: .configGet,
|
||||
params: nil,
|
||||
timeoutMs: 8000)
|
||||
let talk = snap.config?["talk"]?.dictionaryValue
|
||||
let apiKey = talk?["apiKey"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
self.gatewayApiKeyFound = !(apiKey ?? "").isEmpty
|
||||
} catch {
|
||||
self.gatewayApiKeyFound = false
|
||||
}
|
||||
}
|
||||
|
||||
private func autosaveConfig() {
|
||||
guard self.allowAutosave else { return }
|
||||
Task { await self.saveConfig() }
|
||||
@@ -487,12 +507,14 @@ struct ConfigSettings: View {
|
||||
if !self.talkApiKey.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||||
return "ElevenLabs API key: stored in config"
|
||||
}
|
||||
if self.gatewayApiKeyFound { return "ElevenLabs API key: found (gateway)" }
|
||||
return "ElevenLabs API key: missing"
|
||||
}
|
||||
|
||||
private var apiKeyStatusColor: Color {
|
||||
if self.hasEnvApiKey { return .green }
|
||||
if !self.talkApiKey.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { return .green }
|
||||
if self.gatewayApiKeyFound { return .green }
|
||||
return .red
|
||||
}
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@ actor GatewayConnection {
|
||||
case providersStatus = "providers.status"
|
||||
case configGet = "config.get"
|
||||
case configSet = "config.set"
|
||||
case talkMode = "talk.mode"
|
||||
case webLoginStart = "web.login.start"
|
||||
case webLoginWait = "web.login.wait"
|
||||
case webLogout = "web.logout"
|
||||
@@ -483,6 +484,12 @@ extension GatewayConnection {
|
||||
return res.aborted ?? false
|
||||
}
|
||||
|
||||
func talkMode(enabled: Bool, phase: String? = nil) async {
|
||||
var params: [String: AnyCodable] = ["enabled": AnyCodable(enabled)]
|
||||
if let phase { params["phase"] = AnyCodable(phase) }
|
||||
try? await self.requestVoid(method: .talkMode, params: params)
|
||||
}
|
||||
|
||||
// MARK: - VoiceWake
|
||||
|
||||
func voiceWakeGetTriggers() async throws -> [String] {
|
||||
|
||||
@@ -20,6 +20,7 @@ final class TalkModeController {
|
||||
|
||||
func updatePhase(_ phase: TalkModePhase) {
|
||||
TalkOverlayController.shared.updatePhase(phase)
|
||||
Task { await GatewayConnection.shared.talkMode(enabled: AppStateStore.shared.talkEnabled, phase: phase.rawValue) }
|
||||
}
|
||||
|
||||
func updateLevel(_ level: Double) {
|
||||
|
||||
@@ -244,6 +244,7 @@ actor TalkModeRuntime {
|
||||
await self.reloadConfig()
|
||||
let prompt = self.buildPrompt(transcript: transcript)
|
||||
let runId = UUID().uuidString
|
||||
let startedAt = Date().timeIntervalSince1970
|
||||
|
||||
do {
|
||||
let response = try await GatewayConnection.shared.chatSend(
|
||||
@@ -261,7 +262,11 @@ actor TalkModeRuntime {
|
||||
return
|
||||
}
|
||||
|
||||
guard let assistantText = await self.latestAssistantText(sessionKey: "main") else {
|
||||
guard let assistantText = await self.waitForAssistantText(
|
||||
sessionKey: "main",
|
||||
since: startedAt,
|
||||
timeoutSeconds: 12)
|
||||
else {
|
||||
await self.startListening()
|
||||
await self.startRecognition()
|
||||
return
|
||||
@@ -335,7 +340,22 @@ actor TalkModeRuntime {
|
||||
}
|
||||
}
|
||||
|
||||
private func latestAssistantText(sessionKey: String) async -> String? {
|
||||
private func waitForAssistantText(
|
||||
sessionKey: String,
|
||||
since: Double,
|
||||
timeoutSeconds: Int) async -> String?
|
||||
{
|
||||
let deadline = Date().addingTimeInterval(TimeInterval(timeoutSeconds))
|
||||
while Date() < deadline {
|
||||
if let text = await self.latestAssistantText(sessionKey: sessionKey, since: since) {
|
||||
return text
|
||||
}
|
||||
try? await Task.sleep(nanoseconds: 300_000_000)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
private func latestAssistantText(sessionKey: String, since: Double? = nil) async -> String? {
|
||||
do {
|
||||
let history = try await GatewayConnection.shared.chatHistory(sessionKey: sessionKey)
|
||||
let messages = history.messages ?? []
|
||||
@@ -343,7 +363,13 @@ actor TalkModeRuntime {
|
||||
guard let data = try? JSONEncoder().encode(item) else { return nil }
|
||||
return try? JSONDecoder().decode(ClawdisChatMessage.self, from: data)
|
||||
}
|
||||
guard let assistant = decoded.last(where: { $0.role == "assistant" }) else { return nil }
|
||||
let assistant = decoded.last { message in
|
||||
guard message.role == "assistant" else { return false }
|
||||
guard let since else { return true }
|
||||
guard let timestamp = message.timestamp else { return false }
|
||||
return timestamp >= since - 0.5
|
||||
}
|
||||
guard let assistant else { return nil }
|
||||
let text = assistant.content.compactMap { $0.text }.joined(separator: "\n")
|
||||
let trimmed = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
|
||||
return trimmed.isEmpty ? nil : trimmed
|
||||
|
||||
@@ -20,9 +20,9 @@ final class TalkOverlayController {
|
||||
private var window: NSPanel?
|
||||
private var hostingView: NSHostingView<TalkOverlayView>?
|
||||
|
||||
private let width: CGFloat = 120
|
||||
private let height: CGFloat = 120
|
||||
private let padding: CGFloat = 6
|
||||
private let width: CGFloat = 160
|
||||
private let height: CGFloat = 160
|
||||
private let padding: CGFloat = 8
|
||||
|
||||
func present() {
|
||||
self.ensureWindow()
|
||||
|
||||
@@ -7,12 +7,12 @@ struct TalkOverlayView: View {
|
||||
var body: some View {
|
||||
ZStack(alignment: .topLeading) {
|
||||
TalkOrbView(phase: self.controller.model.phase, level: self.controller.model.level)
|
||||
.frame(width: 80, height: 80)
|
||||
.frame(width: 96, height: 96)
|
||||
.contentShape(Rectangle())
|
||||
.onTapGesture {
|
||||
TalkModeController.shared.stopSpeaking(reason: .userTap)
|
||||
}
|
||||
.padding(16)
|
||||
.padding(26)
|
||||
|
||||
Button {
|
||||
TalkModeController.shared.exitTalkMode()
|
||||
@@ -29,7 +29,7 @@ struct TalkOverlayView: View {
|
||||
.padding(4)
|
||||
.onHover { self.hovering = $0 }
|
||||
}
|
||||
.frame(width: 120, height: 120, alignment: .center)
|
||||
.frame(width: 160, height: 160, alignment: .center)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,6 +72,7 @@ private struct TalkWaveRings: View {
|
||||
let phase: TalkModePhase
|
||||
let level: Double
|
||||
let time: TimeInterval
|
||||
private let ringColor = Color(red: 0.82, green: 0.94, blue: 1.0)
|
||||
|
||||
var body: some View {
|
||||
ZStack {
|
||||
@@ -80,9 +81,9 @@ private struct TalkWaveRings: View {
|
||||
let progress = (time * speed + Double(idx) * 0.28).truncatingRemainder(dividingBy: 1)
|
||||
let amplitude = phase == .speaking ? 0.95 : phase == .listening ? 0.5 + level * 0.7 : 0.35
|
||||
let scale = 0.75 + progress * amplitude + (phase == .listening ? level * 0.15 : 0)
|
||||
let alpha = phase == .speaking ? 0.55 : phase == .listening ? 0.45 + level * 0.25 : 0.28
|
||||
let alpha = phase == .speaking ? 0.72 : phase == .listening ? 0.58 + level * 0.28 : 0.4
|
||||
Circle()
|
||||
.stroke(Color.white.opacity(alpha - progress * 0.35), lineWidth: 1.2)
|
||||
.stroke(self.ringColor.opacity(alpha - progress * 0.3), lineWidth: 1.6)
|
||||
.scaleEffect(scale)
|
||||
.opacity(alpha - progress * 0.6)
|
||||
}
|
||||
@@ -97,13 +98,13 @@ private struct TalkOrbitArcs: View {
|
||||
ZStack {
|
||||
Circle()
|
||||
.trim(from: 0.08, to: 0.26)
|
||||
.stroke(Color.white.opacity(0.75), style: StrokeStyle(lineWidth: 1.4, lineCap: .round))
|
||||
.stroke(Color.white.opacity(0.88), style: StrokeStyle(lineWidth: 1.6, lineCap: .round))
|
||||
.rotationEffect(.degrees(time * 42))
|
||||
Circle()
|
||||
.trim(from: 0.62, to: 0.86)
|
||||
.stroke(Color.white.opacity(0.55), style: StrokeStyle(lineWidth: 1.2, lineCap: .round))
|
||||
.stroke(Color.white.opacity(0.7), style: StrokeStyle(lineWidth: 1.4, lineCap: .round))
|
||||
.rotationEffect(.degrees(-time * 35))
|
||||
}
|
||||
.scaleEffect(1.05)
|
||||
.scaleEffect(1.08)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user