feat: move TTS into core (#1559) (thanks @Glucksberg)

2026-01-24 07:57:46 +00:00
parent aef88cd9f1
commit d9a467fe3b
26 changed files with 1522 additions and 1649 deletions
--- a/src/config/types.messages.ts
+++ b/src/config/types.messages.ts
@@ -1,4 +1,5 @@
 import type { QueueDropPolicy, QueueMode, QueueModeByProvider } from "./types.queue.js";
+import type { TtsConfig } from "./types.tts.js";

 export type GroupChatConfig = {
  mentionPatterns?: string[];
@@ -81,6 +82,8 @@ export type MessagesConfig = {
  ackReactionScope?: "group-mentions" | "group-all" | "direct" | "all";
  /** Remove ack reaction after reply is sent (default: false). */
  removeAckAfterReply?: boolean;
+  /** Text-to-speech settings for outbound replies. */
+  tts?: TtsConfig;
 };

 export type NativeCommandsSetting = boolean | "auto";
--- a/src/config/types.ts
+++ b/src/config/types.ts
@@ -23,5 +23,6 @@ export * from "./types.signal.js";
 export * from "./types.skills.js";
 export * from "./types.slack.js";
 export * from "./types.telegram.js";
+export * from "./types.tts.js";
 export * from "./types.tools.js";
 export * from "./types.whatsapp.js";
--- a/src/config/types.tts.ts
+++ b/src/config/types.tts.ts
@@ -0,0 +1,30 @@
+export type TtsProvider = "elevenlabs" | "openai";
+
+export type TtsMode = "final" | "all";
+
+export type TtsConfig = {
+  /** Enable auto-TTS (can be overridden by local prefs). */
+  enabled?: boolean;
+  /** Apply TTS to final replies only or to all replies (tool/block/final). */
+  mode?: TtsMode;
+  /** Primary TTS provider (fallbacks are automatic). */
+  provider?: TtsProvider;
+  /** ElevenLabs configuration. */
+  elevenlabs?: {
+    apiKey?: string;
+    voiceId?: string;
+    modelId?: string;
+  };
+  /** OpenAI configuration. */
+  openai?: {
+    apiKey?: string;
+    model?: string;
+    voice?: string;
+  };
+  /** Optional path for local TTS user preferences JSON. */
+  prefsPath?: string;
+  /** Hard cap for text sent to TTS (chars). */
+  maxTextLength?: number;
+  /** API request timeout (ms). */
+  timeoutMs?: number;
+};
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -155,6 +155,36 @@ export const MarkdownConfigSchema = z
  .strict()
  .optional();

+export const TtsProviderSchema = z.enum(["elevenlabs", "openai"]);
+export const TtsModeSchema = z.enum(["final", "all"]);
+export const TtsConfigSchema = z
+  .object({
+    enabled: z.boolean().optional(),
+    mode: TtsModeSchema.optional(),
+    provider: TtsProviderSchema.optional(),
+    elevenlabs: z
+      .object({
+        apiKey: z.string().optional(),
+        voiceId: z.string().optional(),
+        modelId: z.string().optional(),
+      })
+      .strict()
+      .optional(),
+    openai: z
+      .object({
+        apiKey: z.string().optional(),
+        model: z.string().optional(),
+        voice: z.string().optional(),
+      })
+      .strict()
+      .optional(),
+    prefsPath: z.string().optional(),
+    maxTextLength: z.number().int().min(1).optional(),
+    timeoutMs: z.number().int().min(1000).max(120000).optional(),
+  })
+  .strict()
+  .optional();
+
 export const HumanDelaySchema = z
  .object({
    mode: z.union([z.literal("off"), z.literal("natural"), z.literal("custom")]).optional(),
--- a/src/config/zod-schema.session.ts
+++ b/src/config/zod-schema.session.ts
@@ -5,6 +5,7 @@ import {
  InboundDebounceSchema,
  NativeCommandsSettingSchema,
  QueueSchema,
+  TtsConfigSchema,
 } from "./zod-schema.core.js";

 const SessionResetConfigSchema = z
@@ -90,6 +91,7 @@ export const MessagesSchema = z
    ackReaction: z.string().optional(),
    ackReactionScope: z.enum(["group-mentions", "group-all", "direct", "all"]).optional(),
    removeAckAfterReply: z.boolean().optional(),
+    tts: TtsConfigSchema,
  })
  .strict()
  .optional();