From 6a9301c27d179804a2477ab957804b2534af7cc2 Mon Sep 17 00:00:00 2001 From: zhixian Date: Sun, 25 Jan 2026 17:04:20 +0900 Subject: [PATCH] feat(tts): support custom OpenAI-compatible TTS endpoints (#1701) * feat(tts): support custom OpenAI-compatible TTS endpoints Add OPENAI_TTS_BASE_URL environment variable to allow using self-hosted or third-party OpenAI-compatible TTS services like Kokoro, LocalAI, or OpenedAI-Speech. Changes: - Add OPENAI_TTS_BASE_URL env var (defaults to OpenAI official API) - Relax model/voice validation when using custom endpoints - Add tts-1 and tts-1-hd to the model allowlist This enables users to: - Use local TTS for privacy and cost savings - Use models with better non-English language support (Chinese, Japanese) - Reduce latency with local inference Example usage: OPENAI_TTS_BASE_URL=http://localhost:8880/v1 Tested with Kokoro-FastAPI. * fix: strip trailing slashes from OPENAI_TTS_BASE_URL Address review feedback: normalize the base URL by removing trailing slashes to prevent double-slash paths like /v1//audio/speech which cause 404 errors on some OpenAI-compatible servers. * style: format code with oxfmt * test: update tests for expanded OpenAI TTS model list - Accept tts-1 and tts-1-hd as valid models - Update OPENAI_TTS_MODELS length expectation to 3 --------- Co-authored-by: zhixian --- src/tts/tts.test.ts | 14 ++++++++------ src/tts/tts.ts | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/tts/tts.test.ts b/src/tts/tts.test.ts index a8c9dce9c..8462cba01 100644 --- a/src/tts/tts.test.ts +++ b/src/tts/tts.test.ts @@ -109,13 +109,13 @@ describe("tts", () => { }); describe("isValidOpenAIModel", () => { - it("accepts gpt-4o-mini-tts model", () => { + it("accepts supported models", () => { expect(isValidOpenAIModel("gpt-4o-mini-tts")).toBe(true); + expect(isValidOpenAIModel("tts-1")).toBe(true); + expect(isValidOpenAIModel("tts-1-hd")).toBe(true); }); - it("rejects other models", () => { - expect(isValidOpenAIModel("tts-1")).toBe(false); - expect(isValidOpenAIModel("tts-1-hd")).toBe(false); + it("rejects unsupported models", () => { expect(isValidOpenAIModel("invalid")).toBe(false); expect(isValidOpenAIModel("")).toBe(false); expect(isValidOpenAIModel("gpt-4")).toBe(false); @@ -123,9 +123,11 @@ describe("tts", () => { }); describe("OPENAI_TTS_MODELS", () => { - it("contains only gpt-4o-mini-tts", () => { + it("contains supported models", () => { expect(OPENAI_TTS_MODELS).toContain("gpt-4o-mini-tts"); - expect(OPENAI_TTS_MODELS).toHaveLength(1); + expect(OPENAI_TTS_MODELS).toContain("tts-1"); + expect(OPENAI_TTS_MODELS).toContain("tts-1-hd"); + expect(OPENAI_TTS_MODELS).toHaveLength(3); }); it("is a non-empty array", () => { diff --git a/src/tts/tts.ts b/src/tts/tts.ts index 5fa06f8d4..5f911ec14 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -736,7 +736,17 @@ function parseTtsDirectives( }; } -export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"] as const; +export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"] as const; + +/** + * Custom OpenAI-compatible TTS endpoint. + * When set, model/voice validation is relaxed to allow non-OpenAI models. + * Example: OPENAI_TTS_BASE_URL=http://localhost:8880/v1 + */ +const OPENAI_TTS_BASE_URL = ( + process.env.OPENAI_TTS_BASE_URL?.trim() || "https://api.openai.com/v1" +).replace(/\/+$/, ""); +const isCustomOpenAIEndpoint = OPENAI_TTS_BASE_URL !== "https://api.openai.com/v1"; export const OPENAI_TTS_VOICES = [ "alloy", "ash", @@ -752,10 +762,14 @@ export const OPENAI_TTS_VOICES = [ type OpenAiTtsVoice = (typeof OPENAI_TTS_VOICES)[number]; function isValidOpenAIModel(model: string): boolean { + // Allow any model when using custom endpoint (e.g., Kokoro, LocalAI) + if (isCustomOpenAIEndpoint) return true; return OPENAI_TTS_MODELS.includes(model as (typeof OPENAI_TTS_MODELS)[number]); } function isValidOpenAIVoice(voice: string): voice is OpenAiTtsVoice { + // Allow any voice when using custom endpoint (e.g., Kokoro Chinese voices) + if (isCustomOpenAIEndpoint) return true; return OPENAI_TTS_VOICES.includes(voice as OpenAiTtsVoice); } @@ -982,7 +996,7 @@ async function openaiTTS(params: { const timeout = setTimeout(() => controller.abort(), timeoutMs); try { - const response = await fetch("https://api.openai.com/v1/audio/speech", { + const response = await fetch(`${OPENAI_TTS_BASE_URL}/audio/speech`, { method: "POST", headers: { Authorization: `Bearer ${apiKey}`,