feat(telegram-tts): add auto-TTS hook and provider switching
- Integrate message_sending hook into Telegram delivery path - Send text first, then audio as voice message after - Add /tts_provider command to switch between OpenAI and ElevenLabs - Implement automatic fallback when primary provider fails - Use gpt-4o-mini-tts as default OpenAI model - Add hook integration to route-reply.ts for other channels Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
committed by
Peter Steinberger
parent
46e6546bb9
commit
df09e583aa
@@ -47,6 +47,7 @@ interface TtsConfig {
|
|||||||
interface UserPreferences {
|
interface UserPreferences {
|
||||||
tts?: {
|
tts?: {
|
||||||
enabled?: boolean;
|
enabled?: boolean;
|
||||||
|
provider?: "openai" | "elevenlabs";
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,10 +73,26 @@ function isValidVoiceId(voiceId: string): boolean {
|
|||||||
* Validates OpenAI voice name.
|
* Validates OpenAI voice name.
|
||||||
*/
|
*/
|
||||||
function isValidOpenAIVoice(voice: string): boolean {
|
function isValidOpenAIVoice(voice: string): boolean {
|
||||||
const validVoices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"];
|
const validVoices = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"];
|
||||||
return validVoices.includes(voice);
|
return validVoices.includes(voice);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Available OpenAI TTS models.
|
||||||
|
*/
|
||||||
|
const OPENAI_TTS_MODELS = [
|
||||||
|
"gpt-4o-mini-tts",
|
||||||
|
"tts-1",
|
||||||
|
"tts-1-hd",
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates OpenAI TTS model name.
|
||||||
|
*/
|
||||||
|
function isValidOpenAIModel(model: string): boolean {
|
||||||
|
return OPENAI_TTS_MODELS.includes(model) || model.startsWith("gpt-4o-mini-tts-");
|
||||||
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// Configuration & Preferences
|
// Configuration & Preferences
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -107,7 +124,30 @@ function setTtsEnabled(prefsPath: string, enabled: boolean): void {
|
|||||||
} catch {
|
} catch {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
prefs.tts = { enabled };
|
prefs.tts = { ...prefs.tts, enabled };
|
||||||
|
writeFileSync(prefsPath, JSON.stringify(prefs, null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
function getTtsProvider(prefsPath: string): "openai" | "elevenlabs" | undefined {
|
||||||
|
try {
|
||||||
|
if (!existsSync(prefsPath)) return undefined;
|
||||||
|
const prefs: UserPreferences = JSON.parse(readFileSync(prefsPath, "utf8"));
|
||||||
|
return prefs?.tts?.provider;
|
||||||
|
} catch {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function setTtsProvider(prefsPath: string, provider: "openai" | "elevenlabs"): void {
|
||||||
|
let prefs: UserPreferences = {};
|
||||||
|
try {
|
||||||
|
if (existsSync(prefsPath)) {
|
||||||
|
prefs = JSON.parse(readFileSync(prefsPath, "utf8"));
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
prefs.tts = { ...prefs.tts, provider };
|
||||||
writeFileSync(prefsPath, JSON.stringify(prefs, null, 2));
|
writeFileSync(prefsPath, JSON.stringify(prefs, null, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -200,10 +240,14 @@ async function elevenLabsTTS(
|
|||||||
async function openaiTTS(
|
async function openaiTTS(
|
||||||
text: string,
|
text: string,
|
||||||
apiKey: string,
|
apiKey: string,
|
||||||
model: string = "tts-1",
|
model: string = "gpt-4o-mini-tts",
|
||||||
voice: string = "alloy",
|
voice: string = "alloy",
|
||||||
timeoutMs: number = DEFAULT_TIMEOUT_MS
|
timeoutMs: number = DEFAULT_TIMEOUT_MS
|
||||||
): Promise<Buffer> {
|
): Promise<Buffer> {
|
||||||
|
// Validate model
|
||||||
|
if (!isValidOpenAIModel(model)) {
|
||||||
|
throw new Error(`Invalid model: ${model}`);
|
||||||
|
}
|
||||||
// Validate voice
|
// Validate voice
|
||||||
if (!isValidOpenAIVoice(voice)) {
|
if (!isValidOpenAIVoice(voice)) {
|
||||||
throw new Error(`Invalid voice: ${voice}`);
|
throw new Error(`Invalid voice: ${voice}`);
|
||||||
@@ -243,18 +287,13 @@ async function openaiTTS(
|
|||||||
// Core TTS Function
|
// Core TTS Function
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|
||||||
async function textToSpeech(text: string, config: TtsConfig): Promise<TtsResult> {
|
async function textToSpeech(text: string, config: TtsConfig, prefsPath?: string): Promise<TtsResult> {
|
||||||
const provider = config.provider || "elevenlabs";
|
// Get user's preferred provider (from prefs) or fall back to config
|
||||||
const apiKey = getApiKey(config, provider);
|
const userProvider = prefsPath ? getTtsProvider(prefsPath) : undefined;
|
||||||
|
const primaryProvider = userProvider || config.provider || "openai";
|
||||||
|
const fallbackProvider = primaryProvider === "openai" ? "elevenlabs" : "openai";
|
||||||
const timeoutMs = config.timeoutMs || DEFAULT_TIMEOUT_MS;
|
const timeoutMs = config.timeoutMs || DEFAULT_TIMEOUT_MS;
|
||||||
|
|
||||||
if (!apiKey) {
|
|
||||||
return {
|
|
||||||
success: false,
|
|
||||||
error: `No API key configured for ${provider}`,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const maxLen = config.maxTextLength || 4000;
|
const maxLen = config.maxTextLength || 4000;
|
||||||
if (text.length > maxLen) {
|
if (text.length > maxLen) {
|
||||||
return {
|
return {
|
||||||
@@ -263,48 +302,65 @@ async function textToSpeech(text: string, config: TtsConfig): Promise<TtsResult>
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
// Try primary provider first, then fallback
|
||||||
let audioBuffer: Buffer;
|
const providers = [primaryProvider, fallbackProvider];
|
||||||
|
let lastError: string | undefined;
|
||||||
|
|
||||||
if (provider === "elevenlabs") {
|
for (const provider of providers) {
|
||||||
audioBuffer = await elevenLabsTTS(
|
const apiKey = getApiKey(config, provider);
|
||||||
text,
|
if (!apiKey) {
|
||||||
apiKey,
|
lastError = `No API key for ${provider}`;
|
||||||
config.elevenlabs?.voiceId,
|
continue;
|
||||||
config.elevenlabs?.modelId,
|
|
||||||
timeoutMs
|
|
||||||
);
|
|
||||||
} else if (provider === "openai") {
|
|
||||||
audioBuffer = await openaiTTS(
|
|
||||||
text,
|
|
||||||
apiKey,
|
|
||||||
config.openai?.model,
|
|
||||||
config.openai?.voice,
|
|
||||||
timeoutMs
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
return { success: false, error: `Unknown provider: ${provider}` };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save to temp file
|
try {
|
||||||
const tempDir = mkdtempSync(join(tmpdir(), "tts-"));
|
let audioBuffer: Buffer;
|
||||||
const audioPath = join(tempDir, `voice-${Date.now()}.mp3`);
|
|
||||||
writeFileSync(audioPath, audioBuffer);
|
|
||||||
|
|
||||||
// Schedule cleanup after delay (file should be consumed by then)
|
if (provider === "elevenlabs") {
|
||||||
scheduleCleanup(tempDir);
|
audioBuffer = await elevenLabsTTS(
|
||||||
|
text,
|
||||||
|
apiKey,
|
||||||
|
config.elevenlabs?.voiceId,
|
||||||
|
config.elevenlabs?.modelId,
|
||||||
|
timeoutMs
|
||||||
|
);
|
||||||
|
} else if (provider === "openai") {
|
||||||
|
audioBuffer = await openaiTTS(
|
||||||
|
text,
|
||||||
|
apiKey,
|
||||||
|
config.openai?.model || "gpt-4o-mini-tts",
|
||||||
|
config.openai?.voice,
|
||||||
|
timeoutMs
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
lastError = `Unknown provider: ${provider}`;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
return { success: true, audioPath };
|
// Save to temp file
|
||||||
} catch (err) {
|
const tempDir = mkdtempSync(join(tmpdir(), "tts-"));
|
||||||
const error = err as Error;
|
const audioPath = join(tempDir, `voice-${Date.now()}.mp3`);
|
||||||
if (error.name === "AbortError") {
|
writeFileSync(audioPath, audioBuffer);
|
||||||
return { success: false, error: "TTS request timed out" };
|
|
||||||
|
// Schedule cleanup after delay (file should be consumed by then)
|
||||||
|
scheduleCleanup(tempDir);
|
||||||
|
|
||||||
|
return { success: true, audioPath };
|
||||||
|
} catch (err) {
|
||||||
|
const error = err as Error;
|
||||||
|
if (error.name === "AbortError") {
|
||||||
|
lastError = `${provider}: request timed out`;
|
||||||
|
} else {
|
||||||
|
lastError = `${provider}: ${error.message}`;
|
||||||
|
}
|
||||||
|
// Continue to try fallback provider
|
||||||
}
|
}
|
||||||
return {
|
|
||||||
success: false,
|
|
||||||
error: `TTS conversion failed: ${error.message}`,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: `TTS conversion failed: ${lastError || "no providers available"}`,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -364,7 +420,7 @@ Do NOT add extra text around the MEDIA directive.`,
|
|||||||
const text = params.text;
|
const text = params.text;
|
||||||
log.info(`[${PLUGIN_ID}] speak() called, length: ${text.length}`);
|
log.info(`[${PLUGIN_ID}] speak() called, length: ${text.length}`);
|
||||||
|
|
||||||
const result = await textToSpeech(text, config);
|
const result = await textToSpeech(text, config, prefsPath);
|
||||||
|
|
||||||
if (result.success && result.audioPath) {
|
if (result.success && result.audioPath) {
|
||||||
log.info(`[${PLUGIN_ID}] Audio generated: ${result.audioPath}`);
|
log.info(`[${PLUGIN_ID}] Audio generated: ${result.audioPath}`);
|
||||||
@@ -396,12 +452,18 @@ Do NOT add extra text around the MEDIA directive.`,
|
|||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
|
|
||||||
// tts.status - Check if TTS is enabled
|
// tts.status - Check if TTS is enabled
|
||||||
api.registerGatewayMethod("tts.status", async () => ({
|
api.registerGatewayMethod("tts.status", async () => {
|
||||||
enabled: isTtsEnabled(prefsPath),
|
const userProvider = getTtsProvider(prefsPath);
|
||||||
provider: config.provider,
|
const activeProvider = userProvider || config.provider || "openai";
|
||||||
prefsPath,
|
return {
|
||||||
hasApiKey: !!getApiKey(config, config.provider || "elevenlabs"),
|
enabled: isTtsEnabled(prefsPath),
|
||||||
}));
|
provider: activeProvider,
|
||||||
|
fallbackProvider: activeProvider === "openai" ? "elevenlabs" : "openai",
|
||||||
|
prefsPath,
|
||||||
|
hasOpenAIKey: !!getApiKey(config, "openai"),
|
||||||
|
hasElevenLabsKey: !!getApiKey(config, "elevenlabs"),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
// tts.enable - Enable TTS mode
|
// tts.enable - Enable TTS mode
|
||||||
api.registerGatewayMethod("tts.enable", async () => {
|
api.registerGatewayMethod("tts.enable", async () => {
|
||||||
@@ -423,41 +485,196 @@ Do NOT add extra text around the MEDIA directive.`,
|
|||||||
if (typeof params?.text !== "string" || params.text.length === 0) {
|
if (typeof params?.text !== "string" || params.text.length === 0) {
|
||||||
return { ok: false, error: "Invalid or missing 'text' parameter" };
|
return { ok: false, error: "Invalid or missing 'text' parameter" };
|
||||||
}
|
}
|
||||||
const result = await textToSpeech(params.text, config);
|
const result = await textToSpeech(params.text, config, prefsPath);
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
return { ok: true, audioPath: result.audioPath };
|
return { ok: true, audioPath: result.audioPath };
|
||||||
}
|
}
|
||||||
return { ok: false, error: result.error };
|
return { ok: false, error: result.error };
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// tts.setProvider - Set primary TTS provider
|
||||||
|
api.registerGatewayMethod("tts.setProvider", async (params: { provider?: unknown }) => {
|
||||||
|
if (params?.provider !== "openai" && params?.provider !== "elevenlabs") {
|
||||||
|
return { ok: false, error: "Invalid provider. Use 'openai' or 'elevenlabs'" };
|
||||||
|
}
|
||||||
|
setTtsProvider(prefsPath, params.provider);
|
||||||
|
log.info(`[${PLUGIN_ID}] Provider set to ${params.provider} via RPC`);
|
||||||
|
return { ok: true, provider: params.provider };
|
||||||
|
});
|
||||||
|
|
||||||
// tts.providers - List available providers and their status
|
// tts.providers - List available providers and their status
|
||||||
api.registerGatewayMethod("tts.providers", async () => ({
|
api.registerGatewayMethod("tts.providers", async () => {
|
||||||
providers: [
|
const userProvider = getTtsProvider(prefsPath);
|
||||||
{
|
return {
|
||||||
id: "elevenlabs",
|
providers: [
|
||||||
name: "ElevenLabs",
|
{
|
||||||
configured: !!getApiKey(config, "elevenlabs"),
|
id: "openai",
|
||||||
models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_monolingual_v1"],
|
name: "OpenAI",
|
||||||
},
|
configured: !!getApiKey(config, "openai"),
|
||||||
{
|
models: ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"],
|
||||||
id: "openai",
|
voices: ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
|
||||||
name: "OpenAI",
|
},
|
||||||
configured: !!getApiKey(config, "openai"),
|
{
|
||||||
models: ["tts-1", "tts-1-hd"],
|
id: "elevenlabs",
|
||||||
voices: ["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
|
name: "ElevenLabs",
|
||||||
},
|
configured: !!getApiKey(config, "elevenlabs"),
|
||||||
],
|
models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_monolingual_v1"],
|
||||||
active: config.provider,
|
},
|
||||||
}));
|
],
|
||||||
|
active: userProvider || config.provider || "openai",
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// ===========================================================================
|
||||||
|
// Plugin Commands (LLM-free, intercepted automatically)
|
||||||
|
// ===========================================================================
|
||||||
|
|
||||||
|
// /tts_on - Enable TTS mode
|
||||||
|
api.registerCommand({
|
||||||
|
name: "tts_on",
|
||||||
|
description: "Enable text-to-speech for responses",
|
||||||
|
handler: () => {
|
||||||
|
setTtsEnabled(prefsPath, true);
|
||||||
|
log.info(`[${PLUGIN_ID}] TTS enabled via /tts_on command`);
|
||||||
|
return { text: "🔊 TTS ativado! Agora vou responder em áudio." };
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// /tts_off - Disable TTS mode
|
||||||
|
api.registerCommand({
|
||||||
|
name: "tts_off",
|
||||||
|
description: "Disable text-to-speech for responses",
|
||||||
|
handler: () => {
|
||||||
|
setTtsEnabled(prefsPath, false);
|
||||||
|
log.info(`[${PLUGIN_ID}] TTS disabled via /tts_off command`);
|
||||||
|
return { text: "🔇 TTS desativado. Voltando ao modo texto." };
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// /audio <text> - Convert text to audio immediately
|
||||||
|
api.registerCommand({
|
||||||
|
name: "audio",
|
||||||
|
description: "Convert text to audio message",
|
||||||
|
acceptsArgs: true,
|
||||||
|
handler: async (ctx) => {
|
||||||
|
const text = ctx.args?.trim();
|
||||||
|
if (!text) {
|
||||||
|
return { text: "❌ Uso: /audio <texto para converter em áudio>" };
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info(`[${PLUGIN_ID}] /audio command, text length: ${text.length}`);
|
||||||
|
const result = await textToSpeech(text, config, prefsPath);
|
||||||
|
|
||||||
|
if (result.success && result.audioPath) {
|
||||||
|
log.info(`[${PLUGIN_ID}] Audio generated: ${result.audioPath}`);
|
||||||
|
return { text: `MEDIA:${result.audioPath}` };
|
||||||
|
}
|
||||||
|
|
||||||
|
log.error(`[${PLUGIN_ID}] /audio failed: ${result.error}`);
|
||||||
|
return { text: `❌ Erro ao gerar áudio: ${result.error}` };
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// /tts_provider [openai|elevenlabs] - Set or show TTS provider
|
||||||
|
api.registerCommand({
|
||||||
|
name: "tts_provider",
|
||||||
|
description: "Set or show TTS provider (openai or elevenlabs)",
|
||||||
|
acceptsArgs: true,
|
||||||
|
handler: (ctx) => {
|
||||||
|
const arg = ctx.args?.trim().toLowerCase();
|
||||||
|
const currentProvider = getTtsProvider(prefsPath) || config.provider || "openai";
|
||||||
|
|
||||||
|
if (!arg) {
|
||||||
|
// Show current provider
|
||||||
|
const fallback = currentProvider === "openai" ? "elevenlabs" : "openai";
|
||||||
|
const hasOpenAI = !!getApiKey(config, "openai");
|
||||||
|
const hasElevenLabs = !!getApiKey(config, "elevenlabs");
|
||||||
|
return {
|
||||||
|
text: `🎙️ **TTS Provider**\n\n` +
|
||||||
|
`Primário: **${currentProvider}** ${currentProvider === "openai" ? "(gpt-4o-mini-tts)" : "(eleven_multilingual_v2)"}\n` +
|
||||||
|
`Fallback: ${fallback}\n\n` +
|
||||||
|
`OpenAI: ${hasOpenAI ? "✅ configurado" : "❌ sem API key"}\n` +
|
||||||
|
`ElevenLabs: ${hasElevenLabs ? "✅ configurado" : "❌ sem API key"}\n\n` +
|
||||||
|
`Uso: /tts_provider openai ou /tts_provider elevenlabs`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arg !== "openai" && arg !== "elevenlabs") {
|
||||||
|
return { text: "❌ Provedor inválido. Use: /tts_provider openai ou /tts_provider elevenlabs" };
|
||||||
|
}
|
||||||
|
|
||||||
|
setTtsProvider(prefsPath, arg);
|
||||||
|
const fallback = arg === "openai" ? "elevenlabs" : "openai";
|
||||||
|
log.info(`[${PLUGIN_ID}] Provider set to ${arg} via /tts_provider command`);
|
||||||
|
return {
|
||||||
|
text: `✅ Provedor TTS alterado!\n\n` +
|
||||||
|
`Primário: **${arg}** ${arg === "openai" ? "(gpt-4o-mini-tts)" : "(eleven_multilingual_v2)"}\n` +
|
||||||
|
`Fallback: ${fallback}`,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ===========================================================================
|
||||||
|
// Auto-TTS Hook (message_sending)
|
||||||
|
// ===========================================================================
|
||||||
|
|
||||||
|
// Automatically convert text responses to audio when TTS is enabled
|
||||||
|
api.on("message_sending", async (event) => {
|
||||||
|
// Check if TTS is enabled
|
||||||
|
if (!isTtsEnabled(prefsPath)) {
|
||||||
|
return; // TTS disabled, don't modify message
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = event.content?.trim();
|
||||||
|
if (!content) {
|
||||||
|
return; // Empty content, skip
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip if already contains MEDIA directive (avoid double conversion)
|
||||||
|
if (content.includes("MEDIA:")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip very short messages (likely errors or status)
|
||||||
|
if (content.length < 10) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info(`[${PLUGIN_ID}] Auto-TTS: Converting ${content.length} chars`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await textToSpeech(content, config, prefsPath);
|
||||||
|
|
||||||
|
if (result.success && result.audioPath) {
|
||||||
|
log.info(`[${PLUGIN_ID}] Auto-TTS: Audio generated: ${result.audioPath}`);
|
||||||
|
// Return modified content with MEDIA directive
|
||||||
|
// The text is kept for accessibility, audio is appended
|
||||||
|
return {
|
||||||
|
content: `MEDIA:${result.audioPath}`,
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
log.warn(`[${PLUGIN_ID}] Auto-TTS: Failed - ${result.error}`);
|
||||||
|
// On failure, send original text without audio
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
const error = err as Error;
|
||||||
|
log.error(`[${PLUGIN_ID}] Auto-TTS error: ${error.message}`);
|
||||||
|
// On error, send original text
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
// Startup
|
// Startup
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
|
|
||||||
const ttsEnabled = isTtsEnabled(prefsPath);
|
const ttsEnabled = isTtsEnabled(prefsPath);
|
||||||
const hasKey = !!getApiKey(config, config.provider || "elevenlabs");
|
const userProvider = getTtsProvider(prefsPath);
|
||||||
|
const activeProvider = userProvider || config.provider || "openai";
|
||||||
|
const hasKey = !!getApiKey(config, activeProvider);
|
||||||
|
|
||||||
log.info(`[${PLUGIN_ID}] Ready. TTS: ${ttsEnabled ? "ON" : "OFF"}, API Key: ${hasKey ? "OK" : "MISSING"}`);
|
log.info(`[${PLUGIN_ID}] Ready. TTS: ${ttsEnabled ? "ON" : "OFF"}, Provider: ${activeProvider}, API Key: ${hasKey ? "OK" : "MISSING"}`);
|
||||||
|
|
||||||
if (!hasKey) {
|
if (!hasKey) {
|
||||||
log.warn(
|
log.warn(
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
import { resolveSessionAgentId } from "../../agents/agent-scope.js";
|
import { resolveSessionAgentId } from "../../agents/agent-scope.js";
|
||||||
import { resolveEffectiveMessagesConfig } from "../../agents/identity.js";
|
import { resolveEffectiveMessagesConfig } from "../../agents/identity.js";
|
||||||
import { normalizeChannelId } from "../../channels/plugins/index.js";
|
import { normalizeChannelId } from "../../channels/plugins/index.js";
|
||||||
|
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
|
||||||
import type { ClawdbotConfig } from "../../config/config.js";
|
import type { ClawdbotConfig } from "../../config/config.js";
|
||||||
import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js";
|
import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js";
|
||||||
import type { OriginatingChannelType } from "../templating.js";
|
import type { OriginatingChannelType } from "../templating.js";
|
||||||
@@ -72,14 +73,56 @@ export async function routeReply(params: RouteReplyParams): Promise<RouteReplyRe
|
|||||||
});
|
});
|
||||||
if (!normalized) return { ok: true };
|
if (!normalized) return { ok: true };
|
||||||
|
|
||||||
const text = normalized.text ?? "";
|
let text = normalized.text ?? "";
|
||||||
const mediaUrls = (normalized.mediaUrls?.filter(Boolean) ?? []).length
|
let mediaUrls = (normalized.mediaUrls?.filter(Boolean) ?? []).length
|
||||||
? (normalized.mediaUrls?.filter(Boolean) as string[])
|
? (normalized.mediaUrls?.filter(Boolean) as string[])
|
||||||
: normalized.mediaUrl
|
: normalized.mediaUrl
|
||||||
? [normalized.mediaUrl]
|
? [normalized.mediaUrl]
|
||||||
: [];
|
: [];
|
||||||
const replyToId = normalized.replyToId;
|
const replyToId = normalized.replyToId;
|
||||||
|
|
||||||
|
// Run message_sending hook (allows plugins to modify or cancel)
|
||||||
|
const hookRunner = getGlobalHookRunner();
|
||||||
|
const normalizedChannel = normalizeChannelId(channel);
|
||||||
|
if (hookRunner && text.trim() && normalizedChannel) {
|
||||||
|
try {
|
||||||
|
const hookResult = await hookRunner.runMessageSending(
|
||||||
|
{
|
||||||
|
to,
|
||||||
|
content: text,
|
||||||
|
metadata: { channel, accountId, threadId },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
channelId: normalizedChannel,
|
||||||
|
accountId: accountId ?? undefined,
|
||||||
|
conversationId: to,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check if hook wants to cancel the message
|
||||||
|
if (hookResult?.cancel) {
|
||||||
|
return { ok: true }; // Silently cancel
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if hook modified the content
|
||||||
|
if (hookResult?.content !== undefined) {
|
||||||
|
// Check if the modified content contains MEDIA: directive
|
||||||
|
const mediaMatch = hookResult.content.match(/^MEDIA:(.+)$/m);
|
||||||
|
if (mediaMatch) {
|
||||||
|
// Extract media path and add to mediaUrls
|
||||||
|
const mediaPath = mediaMatch[1].trim();
|
||||||
|
mediaUrls = [mediaPath];
|
||||||
|
// Remove MEDIA: directive from text (send audio only)
|
||||||
|
text = hookResult.content.replace(/^MEDIA:.+$/m, "").trim();
|
||||||
|
} else {
|
||||||
|
text = hookResult.content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Hook errors shouldn't block message sending
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Skip empty replies.
|
// Skip empty replies.
|
||||||
if (!text.trim() && mediaUrls.length === 0) {
|
if (!text.trim() && mediaUrls.length === 0) {
|
||||||
return { ok: true };
|
return { ok: true };
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import { mediaKindFromMime } from "../../media/constants.js";
|
|||||||
import { fetchRemoteMedia } from "../../media/fetch.js";
|
import { fetchRemoteMedia } from "../../media/fetch.js";
|
||||||
import { isGifMedia } from "../../media/mime.js";
|
import { isGifMedia } from "../../media/mime.js";
|
||||||
import { saveMediaBuffer } from "../../media/store.js";
|
import { saveMediaBuffer } from "../../media/store.js";
|
||||||
|
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
|
||||||
import type { RuntimeEnv } from "../../runtime.js";
|
import type { RuntimeEnv } from "../../runtime.js";
|
||||||
import { loadWebMedia } from "../../web/media.js";
|
import { loadWebMedia } from "../../web/media.js";
|
||||||
import { resolveTelegramVoiceSend } from "../voice.js";
|
import { resolveTelegramVoiceSend } from "../voice.js";
|
||||||
@@ -39,6 +40,45 @@ export async function deliverReplies(params: {
|
|||||||
const threadParams = buildTelegramThreadParams(messageThreadId);
|
const threadParams = buildTelegramThreadParams(messageThreadId);
|
||||||
let hasReplied = false;
|
let hasReplied = false;
|
||||||
for (const reply of replies) {
|
for (const reply of replies) {
|
||||||
|
// Track if hook wants to send audio after text
|
||||||
|
let audioToSendAfter: string | undefined;
|
||||||
|
|
||||||
|
// Run message_sending hook (allows plugins like TTS to generate audio)
|
||||||
|
const hookRunner = getGlobalHookRunner();
|
||||||
|
if (hookRunner && reply?.text?.trim()) {
|
||||||
|
try {
|
||||||
|
const hookResult = await hookRunner.runMessageSending(
|
||||||
|
{
|
||||||
|
to: chatId,
|
||||||
|
content: reply.text,
|
||||||
|
metadata: { channel: "telegram", threadId: messageThreadId },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
channelId: "telegram",
|
||||||
|
accountId: undefined,
|
||||||
|
conversationId: chatId,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check if hook wants to cancel the message
|
||||||
|
if (hookResult?.cancel) {
|
||||||
|
continue; // Skip this reply
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if hook returned a MEDIA directive (TTS audio)
|
||||||
|
if (hookResult?.content !== undefined) {
|
||||||
|
const mediaMatch = hookResult.content.match(/^MEDIA:(.+)$/m);
|
||||||
|
if (mediaMatch) {
|
||||||
|
// Save audio path to send AFTER the text message
|
||||||
|
audioToSendAfter = mediaMatch[1].trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
// Hook errors shouldn't block message sending
|
||||||
|
logVerbose(`[telegram delivery] hook error: ${String(err)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0;
|
const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0;
|
||||||
if (!reply?.text && !hasMedia) {
|
if (!reply?.text && !hasMedia) {
|
||||||
if (reply?.audioAsVoice) {
|
if (reply?.audioAsVoice) {
|
||||||
@@ -70,6 +110,25 @@ export async function deliverReplies(params: {
|
|||||||
hasReplied = true;
|
hasReplied = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Send TTS audio after text (if hook generated one)
|
||||||
|
if (audioToSendAfter) {
|
||||||
|
try {
|
||||||
|
const audioMedia = await loadWebMedia(audioToSendAfter);
|
||||||
|
const audioFile = new InputFile(audioMedia.buffer, "voice.mp3");
|
||||||
|
// Switch typing indicator to record_voice before sending
|
||||||
|
await params.onVoiceRecording?.();
|
||||||
|
const audioParams: Record<string, unknown> = {};
|
||||||
|
if (threadParams) {
|
||||||
|
audioParams.message_thread_id = threadParams.message_thread_id;
|
||||||
|
}
|
||||||
|
await bot.api.sendVoice(chatId, audioFile, audioParams);
|
||||||
|
logVerbose(`[telegram delivery] TTS audio sent: ${audioToSendAfter}`);
|
||||||
|
} catch (err) {
|
||||||
|
logVerbose(`[telegram delivery] TTS audio send failed: ${String(err)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// media with optional caption on first item
|
// media with optional caption on first item
|
||||||
|
|||||||
Reference in New Issue
Block a user