feat: talk mode key distribution and tts polling
This commit is contained in:
@@ -10,6 +10,8 @@
|
||||
- macOS menu: add a Talk Mode action alongside the Open Dashboard/Chat/Canvas entries.
|
||||
- macOS Debug: hide “Restart Gateway” when the app won’t start a local gateway (remote mode / attach-only).
|
||||
- macOS Talk Mode: orb overlay refresh, ElevenLabs request logging, API key status in settings, and auto-select first voice when none is configured.
|
||||
- Talk Mode: wait for chat history to surface the assistant reply before starting TTS (macOS/iOS/Android).
|
||||
- Gateway config: inject `talk.apiKey` from `ELEVENLABS_API_KEY`/shell profile so nodes can fetch it on demand.
|
||||
- iOS/Android nodes: enable scrolling for loaded web pages in the Canvas WebView (default scaffold stays touch-first).
|
||||
- macOS menu: device list now uses `node.list` (devices only; no agent/tool presence entries).
|
||||
- macOS menu: device list now shows connected nodes only.
|
||||
|
||||
@@ -76,6 +76,7 @@ class TalkModeManager(
|
||||
private var defaultModelId: String? = null
|
||||
private var currentModelId: String? = null
|
||||
private var defaultOutputFormat: String? = null
|
||||
private var apiKey: String? = null
|
||||
private var interruptOnSpeech: Boolean = true
|
||||
private var voiceOverrideActive = false
|
||||
private var modelOverrideActive = false
|
||||
@@ -268,6 +269,7 @@ class TalkModeManager(
|
||||
}
|
||||
|
||||
try {
|
||||
val startedAt = System.currentTimeMillis().toDouble() / 1000.0
|
||||
val runId = sendChat(prompt, bridge)
|
||||
val ok = waitForChatFinal(runId)
|
||||
if (!ok) {
|
||||
@@ -275,7 +277,7 @@ class TalkModeManager(
|
||||
start()
|
||||
return
|
||||
}
|
||||
val assistant = fetchLatestAssistantText(bridge)
|
||||
val assistant = waitForAssistantText(bridge, startedAt, 12_000)
|
||||
if (assistant.isNullOrBlank()) {
|
||||
_statusText.value = "No reply"
|
||||
start()
|
||||
@@ -345,13 +347,34 @@ class TalkModeManager(
|
||||
return result
|
||||
}
|
||||
|
||||
private suspend fun fetchLatestAssistantText(bridge: BridgeSession): String? {
|
||||
private suspend fun waitForAssistantText(
|
||||
bridge: BridgeSession,
|
||||
sinceSeconds: Double,
|
||||
timeoutMs: Long,
|
||||
): String? {
|
||||
val deadline = SystemClock.elapsedRealtime() + timeoutMs
|
||||
while (SystemClock.elapsedRealtime() < deadline) {
|
||||
val text = fetchLatestAssistantText(bridge, sinceSeconds)
|
||||
if (!text.isNullOrBlank()) return text
|
||||
delay(300)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private suspend fun fetchLatestAssistantText(
|
||||
bridge: BridgeSession,
|
||||
sinceSeconds: Double? = null,
|
||||
): String? {
|
||||
val res = bridge.request("chat.history", "{\"sessionKey\":\"main\"}")
|
||||
val root = json.parseToJsonElement(res).asObjectOrNull() ?: return null
|
||||
val messages = root["messages"] as? JsonArray ?: return null
|
||||
for (item in messages.reversed()) {
|
||||
val obj = item.asObjectOrNull() ?: continue
|
||||
if (obj["role"].asStringOrNull() != "assistant") continue
|
||||
if (sinceSeconds != null) {
|
||||
val timestamp = obj["timestamp"].asDoubleOrNull()
|
||||
if (timestamp != null && timestamp < sinceSeconds - 0.5) continue
|
||||
}
|
||||
val content = obj["content"] as? JsonArray ?: continue
|
||||
val text =
|
||||
content.mapNotNull { entry ->
|
||||
@@ -390,7 +413,9 @@ class TalkModeManager(
|
||||
return
|
||||
}
|
||||
|
||||
val apiKey = System.getenv("ELEVENLABS_API_KEY")?.trim()
|
||||
val apiKey =
|
||||
apiKey?.trim()?.takeIf { it.isNotEmpty() }
|
||||
?: System.getenv("ELEVENLABS_API_KEY")?.trim()
|
||||
if (apiKey.isNullOrEmpty()) {
|
||||
_statusText.value = "Missing ELEVENLABS_API_KEY"
|
||||
return
|
||||
@@ -495,6 +520,7 @@ class TalkModeManager(
|
||||
val bridge = session ?: return
|
||||
val envVoice = System.getenv("ELEVENLABS_VOICE_ID")?.trim()
|
||||
val sagVoice = System.getenv("SAG_VOICE_ID")?.trim()
|
||||
val envKey = System.getenv("ELEVENLABS_API_KEY")?.trim()
|
||||
try {
|
||||
val res = bridge.request("config.get", "{}")
|
||||
val root = json.parseToJsonElement(res).asObjectOrNull()
|
||||
@@ -503,6 +529,7 @@ class TalkModeManager(
|
||||
val voice = talk?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val model = talk?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val outputFormat = talk?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val key = talk?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
|
||||
|
||||
defaultVoiceId = voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
|
||||
@@ -510,9 +537,11 @@ class TalkModeManager(
|
||||
defaultModelId = model
|
||||
if (!modelOverrideActive) currentModelId = defaultModelId
|
||||
defaultOutputFormat = outputFormat
|
||||
apiKey = key ?: envKey?.takeIf { it.isNotEmpty() }
|
||||
if (interrupt != null) interruptOnSpeech = interrupt
|
||||
} catch (_: Throwable) {
|
||||
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
|
||||
apiKey = envKey?.takeIf { it.isNotEmpty() }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ final class TalkModeManager: NSObject {
|
||||
private var defaultModelId: String?
|
||||
private var currentModelId: String?
|
||||
private var defaultOutputFormat: String?
|
||||
private var apiKey: String?
|
||||
private var interruptOnSpeech: Bool = true
|
||||
|
||||
private var bridge: BridgeSession?
|
||||
@@ -189,6 +190,7 @@ final class TalkModeManager: NSObject {
|
||||
}
|
||||
|
||||
do {
|
||||
let startedAt = Date().timeIntervalSince1970
|
||||
let runId = try await self.sendChat(prompt, bridge: bridge)
|
||||
let ok = await self.waitForChatFinal(runId: runId, bridge: bridge)
|
||||
if !ok {
|
||||
@@ -197,7 +199,11 @@ final class TalkModeManager: NSObject {
|
||||
return
|
||||
}
|
||||
|
||||
guard let assistantText = try await self.fetchLatestAssistantText(bridge: bridge) else {
|
||||
guard let assistantText = try await self.waitForAssistantText(
|
||||
bridge: bridge,
|
||||
since: startedAt,
|
||||
timeoutSeconds: 12)
|
||||
else {
|
||||
self.statusText = "No reply"
|
||||
await self.start()
|
||||
return
|
||||
@@ -259,7 +265,22 @@ final class TalkModeManager: NSObject {
|
||||
return false
|
||||
}
|
||||
|
||||
private func fetchLatestAssistantText(bridge: BridgeSession) async throws -> String? {
|
||||
private func waitForAssistantText(
|
||||
bridge: BridgeSession,
|
||||
since: Double,
|
||||
timeoutSeconds: Int) async throws -> String?
|
||||
{
|
||||
let deadline = Date().addingTimeInterval(TimeInterval(timeoutSeconds))
|
||||
while Date() < deadline {
|
||||
if let text = try await self.fetchLatestAssistantText(bridge: bridge, since: since) {
|
||||
return text
|
||||
}
|
||||
try? await Task.sleep(nanoseconds: 300_000_000)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
private func fetchLatestAssistantText(bridge: BridgeSession, since: Double? = nil) async throws -> String? {
|
||||
let res = try await bridge.request(
|
||||
method: "chat.history",
|
||||
paramsJSON: "{\"sessionKey\":\"main\"}",
|
||||
@@ -268,6 +289,9 @@ final class TalkModeManager: NSObject {
|
||||
guard let messages = json["messages"] as? [[String: Any]] else { return nil }
|
||||
for msg in messages.reversed() {
|
||||
guard (msg["role"] as? String) == "assistant" else { continue }
|
||||
if let since, let timestamp = msg["timestamp"] as? Double, timestamp < since - 0.5 {
|
||||
continue
|
||||
}
|
||||
guard let content = msg["content"] as? [[String: Any]] else { continue }
|
||||
let text = content.compactMap { $0["text"] as? String }.joined(separator: "\n")
|
||||
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
@@ -299,7 +323,10 @@ final class TalkModeManager: NSObject {
|
||||
return
|
||||
}
|
||||
|
||||
guard let apiKey = ProcessInfo.processInfo.environment["ELEVENLABS_API_KEY"], !apiKey.isEmpty else {
|
||||
let resolvedKey =
|
||||
(self.apiKey?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? self.apiKey : nil) ??
|
||||
ProcessInfo.processInfo.environment["ELEVENLABS_API_KEY"]
|
||||
guard let apiKey = resolvedKey, !apiKey.isEmpty else {
|
||||
self.statusText = "Missing ELEVENLABS_API_KEY"
|
||||
return
|
||||
}
|
||||
@@ -375,6 +402,7 @@ final class TalkModeManager: NSObject {
|
||||
self.currentModelId = self.defaultModelId
|
||||
self.defaultOutputFormat = (talk?["outputFormat"] as? String)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
self.apiKey = (talk?["apiKey"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if let interrupt = talk?["interruptOnSpeech"] as? Bool {
|
||||
self.interruptOnSpeech = interrupt
|
||||
}
|
||||
|
||||
@@ -329,18 +329,26 @@ final class AppState {
|
||||
func setTalkEnabled(_ enabled: Bool) async {
|
||||
guard voiceWakeSupported else {
|
||||
self.talkEnabled = false
|
||||
await GatewayConnection.shared.talkMode(enabled: false, phase: "disabled")
|
||||
return
|
||||
}
|
||||
|
||||
self.talkEnabled = enabled
|
||||
guard !self.isPreview else { return }
|
||||
|
||||
if !enabled { return }
|
||||
if !enabled {
|
||||
await GatewayConnection.shared.talkMode(enabled: false, phase: "disabled")
|
||||
return
|
||||
}
|
||||
|
||||
if PermissionManager.voiceWakePermissionsGranted() { return }
|
||||
if PermissionManager.voiceWakePermissionsGranted() {
|
||||
await GatewayConnection.shared.talkMode(enabled: true, phase: "enabled")
|
||||
return
|
||||
}
|
||||
|
||||
let granted = await PermissionManager.ensureVoiceWakePermissions(interactive: true)
|
||||
self.talkEnabled = granted
|
||||
await GatewayConnection.shared.talkMode(enabled: granted, phase: granted ? "enabled" : "denied")
|
||||
}
|
||||
|
||||
// MARK: - Global wake words sync (Gateway-owned)
|
||||
|
||||
@@ -34,6 +34,7 @@ struct ConfigSettings: View {
|
||||
@State private var talkVoiceId: String = ""
|
||||
@State private var talkInterruptOnSpeech: Bool = true
|
||||
@State private var talkApiKey: String = ""
|
||||
@State private var gatewayApiKeyFound = false
|
||||
|
||||
var body: some View {
|
||||
ScrollView { self.content }
|
||||
@@ -49,6 +50,7 @@ struct ConfigSettings: View {
|
||||
self.hasLoaded = true
|
||||
self.loadConfig()
|
||||
await self.loadModels()
|
||||
await self.refreshGatewayTalkApiKey()
|
||||
self.allowAutosave = true
|
||||
}
|
||||
}
|
||||
@@ -323,6 +325,10 @@ struct ConfigSettings: View {
|
||||
Text("Using ELEVENLABS_API_KEY from the environment.")
|
||||
.font(.footnote)
|
||||
.foregroundStyle(.secondary)
|
||||
} else if self.gatewayApiKeyFound && self.talkApiKey.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||||
Text("Using API key from the gateway profile.")
|
||||
.font(.footnote)
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -392,6 +398,20 @@ struct ConfigSettings: View {
|
||||
}
|
||||
}
|
||||
|
||||
private func refreshGatewayTalkApiKey() async {
|
||||
do {
|
||||
let snap: ConfigSnapshot = try await GatewayConnection.shared.requestDecoded(
|
||||
method: .configGet,
|
||||
params: nil,
|
||||
timeoutMs: 8000)
|
||||
let talk = snap.config?["talk"]?.dictionaryValue
|
||||
let apiKey = talk?["apiKey"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
self.gatewayApiKeyFound = !(apiKey ?? "").isEmpty
|
||||
} catch {
|
||||
self.gatewayApiKeyFound = false
|
||||
}
|
||||
}
|
||||
|
||||
private func autosaveConfig() {
|
||||
guard self.allowAutosave else { return }
|
||||
Task { await self.saveConfig() }
|
||||
@@ -487,12 +507,14 @@ struct ConfigSettings: View {
|
||||
if !self.talkApiKey.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||||
return "ElevenLabs API key: stored in config"
|
||||
}
|
||||
if self.gatewayApiKeyFound { return "ElevenLabs API key: found (gateway)" }
|
||||
return "ElevenLabs API key: missing"
|
||||
}
|
||||
|
||||
private var apiKeyStatusColor: Color {
|
||||
if self.hasEnvApiKey { return .green }
|
||||
if !self.talkApiKey.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { return .green }
|
||||
if self.gatewayApiKeyFound { return .green }
|
||||
return .red
|
||||
}
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@ actor GatewayConnection {
|
||||
case providersStatus = "providers.status"
|
||||
case configGet = "config.get"
|
||||
case configSet = "config.set"
|
||||
case talkMode = "talk.mode"
|
||||
case webLoginStart = "web.login.start"
|
||||
case webLoginWait = "web.login.wait"
|
||||
case webLogout = "web.logout"
|
||||
@@ -483,6 +484,12 @@ extension GatewayConnection {
|
||||
return res.aborted ?? false
|
||||
}
|
||||
|
||||
func talkMode(enabled: Bool, phase: String? = nil) async {
|
||||
var params: [String: AnyCodable] = ["enabled": AnyCodable(enabled)]
|
||||
if let phase { params["phase"] = AnyCodable(phase) }
|
||||
try? await self.requestVoid(method: .talkMode, params: params)
|
||||
}
|
||||
|
||||
// MARK: - VoiceWake
|
||||
|
||||
func voiceWakeGetTriggers() async throws -> [String] {
|
||||
|
||||
@@ -20,6 +20,7 @@ final class TalkModeController {
|
||||
|
||||
func updatePhase(_ phase: TalkModePhase) {
|
||||
TalkOverlayController.shared.updatePhase(phase)
|
||||
Task { await GatewayConnection.shared.talkMode(enabled: AppStateStore.shared.talkEnabled, phase: phase.rawValue) }
|
||||
}
|
||||
|
||||
func updateLevel(_ level: Double) {
|
||||
|
||||
@@ -244,6 +244,7 @@ actor TalkModeRuntime {
|
||||
await self.reloadConfig()
|
||||
let prompt = self.buildPrompt(transcript: transcript)
|
||||
let runId = UUID().uuidString
|
||||
let startedAt = Date().timeIntervalSince1970
|
||||
|
||||
do {
|
||||
let response = try await GatewayConnection.shared.chatSend(
|
||||
@@ -261,7 +262,11 @@ actor TalkModeRuntime {
|
||||
return
|
||||
}
|
||||
|
||||
guard let assistantText = await self.latestAssistantText(sessionKey: "main") else {
|
||||
guard let assistantText = await self.waitForAssistantText(
|
||||
sessionKey: "main",
|
||||
since: startedAt,
|
||||
timeoutSeconds: 12)
|
||||
else {
|
||||
await self.startListening()
|
||||
await self.startRecognition()
|
||||
return
|
||||
@@ -335,7 +340,22 @@ actor TalkModeRuntime {
|
||||
}
|
||||
}
|
||||
|
||||
private func latestAssistantText(sessionKey: String) async -> String? {
|
||||
private func waitForAssistantText(
|
||||
sessionKey: String,
|
||||
since: Double,
|
||||
timeoutSeconds: Int) async -> String?
|
||||
{
|
||||
let deadline = Date().addingTimeInterval(TimeInterval(timeoutSeconds))
|
||||
while Date() < deadline {
|
||||
if let text = await self.latestAssistantText(sessionKey: sessionKey, since: since) {
|
||||
return text
|
||||
}
|
||||
try? await Task.sleep(nanoseconds: 300_000_000)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
private func latestAssistantText(sessionKey: String, since: Double? = nil) async -> String? {
|
||||
do {
|
||||
let history = try await GatewayConnection.shared.chatHistory(sessionKey: sessionKey)
|
||||
let messages = history.messages ?? []
|
||||
@@ -343,7 +363,13 @@ actor TalkModeRuntime {
|
||||
guard let data = try? JSONEncoder().encode(item) else { return nil }
|
||||
return try? JSONDecoder().decode(ClawdisChatMessage.self, from: data)
|
||||
}
|
||||
guard let assistant = decoded.last(where: { $0.role == "assistant" }) else { return nil }
|
||||
let assistant = decoded.last { message in
|
||||
guard message.role == "assistant" else { return false }
|
||||
guard let since else { return true }
|
||||
guard let timestamp = message.timestamp else { return false }
|
||||
return timestamp >= since - 0.5
|
||||
}
|
||||
guard let assistant else { return nil }
|
||||
let text = assistant.content.compactMap { $0.text }.joined(separator: "\n")
|
||||
let trimmed = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
|
||||
return trimmed.isEmpty ? nil : trimmed
|
||||
|
||||
@@ -20,9 +20,9 @@ final class TalkOverlayController {
|
||||
private var window: NSPanel?
|
||||
private var hostingView: NSHostingView<TalkOverlayView>?
|
||||
|
||||
private let width: CGFloat = 120
|
||||
private let height: CGFloat = 120
|
||||
private let padding: CGFloat = 6
|
||||
private let width: CGFloat = 160
|
||||
private let height: CGFloat = 160
|
||||
private let padding: CGFloat = 8
|
||||
|
||||
func present() {
|
||||
self.ensureWindow()
|
||||
|
||||
@@ -7,12 +7,12 @@ struct TalkOverlayView: View {
|
||||
var body: some View {
|
||||
ZStack(alignment: .topLeading) {
|
||||
TalkOrbView(phase: self.controller.model.phase, level: self.controller.model.level)
|
||||
.frame(width: 80, height: 80)
|
||||
.frame(width: 96, height: 96)
|
||||
.contentShape(Rectangle())
|
||||
.onTapGesture {
|
||||
TalkModeController.shared.stopSpeaking(reason: .userTap)
|
||||
}
|
||||
.padding(16)
|
||||
.padding(26)
|
||||
|
||||
Button {
|
||||
TalkModeController.shared.exitTalkMode()
|
||||
@@ -29,7 +29,7 @@ struct TalkOverlayView: View {
|
||||
.padding(4)
|
||||
.onHover { self.hovering = $0 }
|
||||
}
|
||||
.frame(width: 120, height: 120, alignment: .center)
|
||||
.frame(width: 160, height: 160, alignment: .center)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,6 +72,7 @@ private struct TalkWaveRings: View {
|
||||
let phase: TalkModePhase
|
||||
let level: Double
|
||||
let time: TimeInterval
|
||||
private let ringColor = Color(red: 0.82, green: 0.94, blue: 1.0)
|
||||
|
||||
var body: some View {
|
||||
ZStack {
|
||||
@@ -80,9 +81,9 @@ private struct TalkWaveRings: View {
|
||||
let progress = (time * speed + Double(idx) * 0.28).truncatingRemainder(dividingBy: 1)
|
||||
let amplitude = phase == .speaking ? 0.95 : phase == .listening ? 0.5 + level * 0.7 : 0.35
|
||||
let scale = 0.75 + progress * amplitude + (phase == .listening ? level * 0.15 : 0)
|
||||
let alpha = phase == .speaking ? 0.55 : phase == .listening ? 0.45 + level * 0.25 : 0.28
|
||||
let alpha = phase == .speaking ? 0.72 : phase == .listening ? 0.58 + level * 0.28 : 0.4
|
||||
Circle()
|
||||
.stroke(Color.white.opacity(alpha - progress * 0.35), lineWidth: 1.2)
|
||||
.stroke(self.ringColor.opacity(alpha - progress * 0.3), lineWidth: 1.6)
|
||||
.scaleEffect(scale)
|
||||
.opacity(alpha - progress * 0.6)
|
||||
}
|
||||
@@ -97,13 +98,13 @@ private struct TalkOrbitArcs: View {
|
||||
ZStack {
|
||||
Circle()
|
||||
.trim(from: 0.08, to: 0.26)
|
||||
.stroke(Color.white.opacity(0.75), style: StrokeStyle(lineWidth: 1.4, lineCap: .round))
|
||||
.stroke(Color.white.opacity(0.88), style: StrokeStyle(lineWidth: 1.6, lineCap: .round))
|
||||
.rotationEffect(.degrees(time * 42))
|
||||
Circle()
|
||||
.trim(from: 0.62, to: 0.86)
|
||||
.stroke(Color.white.opacity(0.55), style: StrokeStyle(lineWidth: 1.2, lineCap: .round))
|
||||
.stroke(Color.white.opacity(0.7), style: StrokeStyle(lineWidth: 1.4, lineCap: .round))
|
||||
.rotationEffect(.degrees(-time * 35))
|
||||
}
|
||||
.scaleEffect(1.05)
|
||||
.scaleEffect(1.08)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -198,6 +198,7 @@ Controls inbound/outbound prefixes and timestamps.
|
||||
### `talk`
|
||||
|
||||
Defaults for Talk mode (macOS/iOS/Android). Voice IDs fall back to `ELEVENLABS_VOICE_ID` or `SAG_VOICE_ID` when unset.
|
||||
`apiKey` falls back to `ELEVENLABS_API_KEY` (or the gateway’s shell profile) when unset.
|
||||
|
||||
```json5
|
||||
{
|
||||
@@ -205,6 +206,7 @@ Defaults for Talk mode (macOS/iOS/Android). Voice IDs fall back to `ELEVENLABS_V
|
||||
voiceId: "elevenlabs_voice_id",
|
||||
modelId: "eleven_v3",
|
||||
outputFormat: "mp3_44100_128",
|
||||
apiKey: "elevenlabs_api_key",
|
||||
interruptOnSpeech: true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ Supported keys:
|
||||
"voiceId": "elevenlabs_voice_id",
|
||||
"modelId": "eleven_v3",
|
||||
"outputFormat": "mp3_44100_128",
|
||||
"apiKey": "elevenlabs_api_key",
|
||||
"interruptOnSpeech": true
|
||||
}
|
||||
}
|
||||
@@ -55,6 +56,7 @@ Supported keys:
|
||||
Defaults:
|
||||
- `interruptOnSpeech`: true
|
||||
- `voiceId`: falls back to `ELEVENLABS_VOICE_ID` / `SAG_VOICE_ID`
|
||||
- `apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available)
|
||||
|
||||
## macOS UI
|
||||
- Menu bar toggle: **Talk**
|
||||
|
||||
@@ -174,3 +174,50 @@ describe("config identity defaults", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("talk api key fallback", () => {
|
||||
let previousEnv: string | undefined;
|
||||
|
||||
beforeEach(() => {
|
||||
previousEnv = process.env.ELEVENLABS_API_KEY;
|
||||
delete process.env.ELEVENLABS_API_KEY;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
process.env.ELEVENLABS_API_KEY = previousEnv;
|
||||
});
|
||||
|
||||
it("injects talk.apiKey from profile when config is missing", async () => {
|
||||
await withTempHome(async (home) => {
|
||||
await fs.writeFile(
|
||||
path.join(home, ".profile"),
|
||||
"export ELEVENLABS_API_KEY=profile-key\n",
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
vi.resetModules();
|
||||
const { readConfigFileSnapshot } = await import("./config.js");
|
||||
const snap = await readConfigFileSnapshot();
|
||||
|
||||
expect(snap.config?.talk?.apiKey).toBe("profile-key");
|
||||
expect(snap.exists).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
it("prefers ELEVENLABS_API_KEY env over profile", async () => {
|
||||
await withTempHome(async (home) => {
|
||||
await fs.writeFile(
|
||||
path.join(home, ".profile"),
|
||||
"export ELEVENLABS_API_KEY=profile-key\n",
|
||||
"utf-8",
|
||||
);
|
||||
process.env.ELEVENLABS_API_KEY = "env-key";
|
||||
|
||||
vi.resetModules();
|
||||
const { readConfigFileSnapshot } = await import("./config.js");
|
||||
const snap = await readConfigFileSnapshot();
|
||||
|
||||
expect(snap.config?.talk?.apiKey).toBe("env-key");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -226,6 +226,8 @@ export type TalkConfig = {
|
||||
modelId?: string;
|
||||
/** Default ElevenLabs output format (e.g. mp3_44100_128). */
|
||||
outputFormat?: string;
|
||||
/** ElevenLabs API key (optional; falls back to ELEVENLABS_API_KEY). */
|
||||
apiKey?: string;
|
||||
/** Stop speaking when user starts talking (default: true). */
|
||||
interruptOnSpeech?: boolean;
|
||||
};
|
||||
@@ -802,6 +804,7 @@ const ClawdisSchema = z.object({
|
||||
voiceId: z.string().optional(),
|
||||
modelId: z.string().optional(),
|
||||
outputFormat: z.string().optional(),
|
||||
apiKey: z.string().optional(),
|
||||
interruptOnSpeech: z.boolean().optional(),
|
||||
})
|
||||
.optional(),
|
||||
@@ -964,17 +967,59 @@ export function parseConfigJson5(
|
||||
}
|
||||
}
|
||||
|
||||
function readTalkApiKeyFromProfile(): string | null {
|
||||
const home = os.homedir();
|
||||
const candidates = [".profile", ".zprofile", ".zshrc", ".bashrc"].map(
|
||||
(name) => path.join(home, name),
|
||||
);
|
||||
for (const candidate of candidates) {
|
||||
if (!fs.existsSync(candidate)) continue;
|
||||
try {
|
||||
const text = fs.readFileSync(candidate, "utf-8");
|
||||
const match = text.match(
|
||||
/(?:^|\n)\s*(?:export\s+)?ELEVENLABS_API_KEY\s*=\s*["']?([^\n"']+)["']?/,
|
||||
);
|
||||
const value = match?.[1]?.trim();
|
||||
if (value) return value;
|
||||
} catch {
|
||||
// Ignore profile read errors.
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveTalkApiKey(): string | null {
|
||||
const envValue = (process.env.ELEVENLABS_API_KEY ?? "").trim();
|
||||
if (envValue) return envValue;
|
||||
return readTalkApiKeyFromProfile();
|
||||
}
|
||||
|
||||
function applyTalkApiKey(config: ClawdisConfig): ClawdisConfig {
|
||||
const resolved = resolveTalkApiKey();
|
||||
if (!resolved) return config;
|
||||
const existing = config.talk?.apiKey?.trim();
|
||||
if (existing) return config;
|
||||
return {
|
||||
...config,
|
||||
talk: {
|
||||
...config.talk,
|
||||
apiKey: resolved,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function readConfigFileSnapshot(): Promise<ConfigFileSnapshot> {
|
||||
const configPath = CONFIG_PATH_CLAWDIS;
|
||||
const exists = fs.existsSync(configPath);
|
||||
if (!exists) {
|
||||
const config = applyTalkApiKey({});
|
||||
return {
|
||||
path: configPath,
|
||||
exists: false,
|
||||
raw: null,
|
||||
parsed: {},
|
||||
valid: true,
|
||||
config: {},
|
||||
config,
|
||||
issues: [],
|
||||
};
|
||||
}
|
||||
@@ -1015,7 +1060,7 @@ export async function readConfigFileSnapshot(): Promise<ConfigFileSnapshot> {
|
||||
raw,
|
||||
parsed: parsedRes.parsed,
|
||||
valid: true,
|
||||
config: validated.config,
|
||||
config: applyTalkApiKey(validated.config),
|
||||
issues: [],
|
||||
};
|
||||
} catch (err) {
|
||||
|
||||
@@ -95,6 +95,8 @@ import {
|
||||
SnapshotSchema,
|
||||
type StateVersion,
|
||||
StateVersionSchema,
|
||||
type TalkModeParams,
|
||||
TalkModeParamsSchema,
|
||||
type TickEvent,
|
||||
TickEventSchema,
|
||||
type WakeParams,
|
||||
@@ -169,6 +171,8 @@ export const validateConfigGetParams = ajv.compile<ConfigGetParams>(
|
||||
export const validateConfigSetParams = ajv.compile<ConfigSetParams>(
|
||||
ConfigSetParamsSchema,
|
||||
);
|
||||
export const validateTalkModeParams =
|
||||
ajv.compile<TalkModeParams>(TalkModeParamsSchema);
|
||||
export const validateProvidersStatusParams = ajv.compile<ProvidersStatusParams>(
|
||||
ProvidersStatusParamsSchema,
|
||||
);
|
||||
@@ -297,6 +301,7 @@ export type {
|
||||
NodePairApproveParams,
|
||||
ConfigGetParams,
|
||||
ConfigSetParams,
|
||||
TalkModeParams,
|
||||
ProvidersStatusParams,
|
||||
WebLoginStartParams,
|
||||
WebLoginWaitParams,
|
||||
|
||||
@@ -339,6 +339,14 @@ export const ConfigSetParamsSchema = Type.Object(
|
||||
{ additionalProperties: false },
|
||||
);
|
||||
|
||||
export const TalkModeParamsSchema = Type.Object(
|
||||
{
|
||||
enabled: Type.Boolean(),
|
||||
phase: Type.Optional(Type.String()),
|
||||
},
|
||||
{ additionalProperties: false },
|
||||
);
|
||||
|
||||
export const ProvidersStatusParamsSchema = Type.Object(
|
||||
{
|
||||
probe: Type.Optional(Type.Boolean()),
|
||||
@@ -668,6 +676,7 @@ export const ProtocolSchemas: Record<string, TSchema> = {
|
||||
SessionsCompactParams: SessionsCompactParamsSchema,
|
||||
ConfigGetParams: ConfigGetParamsSchema,
|
||||
ConfigSetParams: ConfigSetParamsSchema,
|
||||
TalkModeParams: TalkModeParamsSchema,
|
||||
ProvidersStatusParams: ProvidersStatusParamsSchema,
|
||||
WebLoginStartParams: WebLoginStartParamsSchema,
|
||||
WebLoginWaitParams: WebLoginWaitParamsSchema,
|
||||
@@ -724,6 +733,7 @@ export type SessionsDeleteParams = Static<typeof SessionsDeleteParamsSchema>;
|
||||
export type SessionsCompactParams = Static<typeof SessionsCompactParamsSchema>;
|
||||
export type ConfigGetParams = Static<typeof ConfigGetParamsSchema>;
|
||||
export type ConfigSetParams = Static<typeof ConfigSetParamsSchema>;
|
||||
export type TalkModeParams = Static<typeof TalkModeParamsSchema>;
|
||||
export type ProvidersStatusParams = Static<typeof ProvidersStatusParamsSchema>;
|
||||
export type WebLoginStartParams = Static<typeof WebLoginStartParamsSchema>;
|
||||
export type WebLoginWaitParams = Static<typeof WebLoginWaitParamsSchema>;
|
||||
|
||||
@@ -393,6 +393,7 @@ import {
|
||||
validateSkillsInstallParams,
|
||||
validateSkillsStatusParams,
|
||||
validateSkillsUpdateParams,
|
||||
validateTalkModeParams,
|
||||
validateWakeParams,
|
||||
validateWebLoginStartParams,
|
||||
validateWebLoginWaitParams,
|
||||
@@ -469,6 +470,7 @@ const METHODS = [
|
||||
"status",
|
||||
"config.get",
|
||||
"config.set",
|
||||
"talk.mode",
|
||||
"models.list",
|
||||
"skills.status",
|
||||
"skills.install",
|
||||
@@ -518,6 +520,7 @@ const EVENTS = [
|
||||
"chat",
|
||||
"presence",
|
||||
"tick",
|
||||
"talk.mode",
|
||||
"shutdown",
|
||||
"health",
|
||||
"heartbeat",
|
||||
@@ -2379,6 +2382,25 @@ export async function startGatewayServer(
|
||||
}),
|
||||
};
|
||||
}
|
||||
case "talk.mode": {
|
||||
const params = parseParams();
|
||||
if (!validateTalkModeParams(params)) {
|
||||
return {
|
||||
ok: false,
|
||||
error: {
|
||||
code: ErrorCodes.INVALID_REQUEST,
|
||||
message: `invalid talk.mode params: ${formatValidationErrors(validateTalkModeParams.errors)}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
const payload = {
|
||||
enabled: (params as { enabled: boolean }).enabled,
|
||||
phase: (params as { phase?: string }).phase ?? null,
|
||||
ts: Date.now(),
|
||||
};
|
||||
broadcast("talk.mode", payload, { dropIfSlow: true });
|
||||
return { ok: true, payloadJSON: JSON.stringify(payload) };
|
||||
}
|
||||
case "models.list": {
|
||||
const params = parseParams();
|
||||
if (!validateModelsListParams(params)) {
|
||||
@@ -4615,6 +4637,28 @@ export async function startGatewayServer(
|
||||
);
|
||||
break;
|
||||
}
|
||||
case "talk.mode": {
|
||||
const params = (req.params ?? {}) as Record<string, unknown>;
|
||||
if (!validateTalkModeParams(params)) {
|
||||
respond(
|
||||
false,
|
||||
undefined,
|
||||
errorShape(
|
||||
ErrorCodes.INVALID_REQUEST,
|
||||
`invalid talk.mode params: ${formatValidationErrors(validateTalkModeParams.errors)}`,
|
||||
),
|
||||
);
|
||||
break;
|
||||
}
|
||||
const payload = {
|
||||
enabled: (params as { enabled: boolean }).enabled,
|
||||
phase: (params as { phase?: string }).phase ?? null,
|
||||
ts: Date.now(),
|
||||
};
|
||||
broadcast("talk.mode", payload, { dropIfSlow: true });
|
||||
respond(true, payload, undefined);
|
||||
break;
|
||||
}
|
||||
case "skills.status": {
|
||||
const params = (req.params ?? {}) as Record<string, unknown>;
|
||||
if (!validateSkillsStatusParams(params)) {
|
||||
|
||||
Reference in New Issue
Block a user