feat(tts): support custom OpenAI-compatible TTS endpoints (#1701)
* feat(tts): support custom OpenAI-compatible TTS endpoints Add OPENAI_TTS_BASE_URL environment variable to allow using self-hosted or third-party OpenAI-compatible TTS services like Kokoro, LocalAI, or OpenedAI-Speech. Changes: - Add OPENAI_TTS_BASE_URL env var (defaults to OpenAI official API) - Relax model/voice validation when using custom endpoints - Add tts-1 and tts-1-hd to the model allowlist This enables users to: - Use local TTS for privacy and cost savings - Use models with better non-English language support (Chinese, Japanese) - Reduce latency with local inference Example usage: OPENAI_TTS_BASE_URL=http://localhost:8880/v1 Tested with Kokoro-FastAPI. * fix: strip trailing slashes from OPENAI_TTS_BASE_URL Address review feedback: normalize the base URL by removing trailing slashes to prevent double-slash paths like /v1//audio/speech which cause 404 errors on some OpenAI-compatible servers. * style: format code with oxfmt * test: update tests for expanded OpenAI TTS model list - Accept tts-1 and tts-1-hd as valid models - Update OPENAI_TTS_MODELS length expectation to 3 --------- Co-authored-by: zhixian <zhixian@bunker.local>
This commit is contained in:
@@ -109,13 +109,13 @@ describe("tts", () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
describe("isValidOpenAIModel", () => {
|
describe("isValidOpenAIModel", () => {
|
||||||
it("accepts gpt-4o-mini-tts model", () => {
|
it("accepts supported models", () => {
|
||||||
expect(isValidOpenAIModel("gpt-4o-mini-tts")).toBe(true);
|
expect(isValidOpenAIModel("gpt-4o-mini-tts")).toBe(true);
|
||||||
|
expect(isValidOpenAIModel("tts-1")).toBe(true);
|
||||||
|
expect(isValidOpenAIModel("tts-1-hd")).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("rejects other models", () => {
|
it("rejects unsupported models", () => {
|
||||||
expect(isValidOpenAIModel("tts-1")).toBe(false);
|
|
||||||
expect(isValidOpenAIModel("tts-1-hd")).toBe(false);
|
|
||||||
expect(isValidOpenAIModel("invalid")).toBe(false);
|
expect(isValidOpenAIModel("invalid")).toBe(false);
|
||||||
expect(isValidOpenAIModel("")).toBe(false);
|
expect(isValidOpenAIModel("")).toBe(false);
|
||||||
expect(isValidOpenAIModel("gpt-4")).toBe(false);
|
expect(isValidOpenAIModel("gpt-4")).toBe(false);
|
||||||
@@ -123,9 +123,11 @@ describe("tts", () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
describe("OPENAI_TTS_MODELS", () => {
|
describe("OPENAI_TTS_MODELS", () => {
|
||||||
it("contains only gpt-4o-mini-tts", () => {
|
it("contains supported models", () => {
|
||||||
expect(OPENAI_TTS_MODELS).toContain("gpt-4o-mini-tts");
|
expect(OPENAI_TTS_MODELS).toContain("gpt-4o-mini-tts");
|
||||||
expect(OPENAI_TTS_MODELS).toHaveLength(1);
|
expect(OPENAI_TTS_MODELS).toContain("tts-1");
|
||||||
|
expect(OPENAI_TTS_MODELS).toContain("tts-1-hd");
|
||||||
|
expect(OPENAI_TTS_MODELS).toHaveLength(3);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("is a non-empty array", () => {
|
it("is a non-empty array", () => {
|
||||||
|
|||||||
@@ -736,7 +736,17 @@ function parseTtsDirectives(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"] as const;
|
export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"] as const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Custom OpenAI-compatible TTS endpoint.
|
||||||
|
* When set, model/voice validation is relaxed to allow non-OpenAI models.
|
||||||
|
* Example: OPENAI_TTS_BASE_URL=http://localhost:8880/v1
|
||||||
|
*/
|
||||||
|
const OPENAI_TTS_BASE_URL = (
|
||||||
|
process.env.OPENAI_TTS_BASE_URL?.trim() || "https://api.openai.com/v1"
|
||||||
|
).replace(/\/+$/, "");
|
||||||
|
const isCustomOpenAIEndpoint = OPENAI_TTS_BASE_URL !== "https://api.openai.com/v1";
|
||||||
export const OPENAI_TTS_VOICES = [
|
export const OPENAI_TTS_VOICES = [
|
||||||
"alloy",
|
"alloy",
|
||||||
"ash",
|
"ash",
|
||||||
@@ -752,10 +762,14 @@ export const OPENAI_TTS_VOICES = [
|
|||||||
type OpenAiTtsVoice = (typeof OPENAI_TTS_VOICES)[number];
|
type OpenAiTtsVoice = (typeof OPENAI_TTS_VOICES)[number];
|
||||||
|
|
||||||
function isValidOpenAIModel(model: string): boolean {
|
function isValidOpenAIModel(model: string): boolean {
|
||||||
|
// Allow any model when using custom endpoint (e.g., Kokoro, LocalAI)
|
||||||
|
if (isCustomOpenAIEndpoint) return true;
|
||||||
return OPENAI_TTS_MODELS.includes(model as (typeof OPENAI_TTS_MODELS)[number]);
|
return OPENAI_TTS_MODELS.includes(model as (typeof OPENAI_TTS_MODELS)[number]);
|
||||||
}
|
}
|
||||||
|
|
||||||
function isValidOpenAIVoice(voice: string): voice is OpenAiTtsVoice {
|
function isValidOpenAIVoice(voice: string): voice is OpenAiTtsVoice {
|
||||||
|
// Allow any voice when using custom endpoint (e.g., Kokoro Chinese voices)
|
||||||
|
if (isCustomOpenAIEndpoint) return true;
|
||||||
return OPENAI_TTS_VOICES.includes(voice as OpenAiTtsVoice);
|
return OPENAI_TTS_VOICES.includes(voice as OpenAiTtsVoice);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -982,7 +996,7 @@ async function openaiTTS(params: {
|
|||||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch("https://api.openai.com/v1/audio/speech", {
|
const response = await fetch(`${OPENAI_TTS_BASE_URL}/audio/speech`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
Authorization: `Bearer ${apiKey}`,
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
|||||||
Reference in New Issue
Block a user