feat(telegram-tts): add auto-TTS hook and provider switching

- Integrate message_sending hook into Telegram delivery path
- Send text first, then audio as voice message after
- Add /tts_provider command to switch between OpenAI and ElevenLabs
- Implement automatic fallback when primary provider fails
- Use gpt-4o-mini-tts as default OpenAI model
- Add hook integration to route-reply.ts for other channels

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Glucksberg
2026-01-24 00:19:08 +00:00
committed by Peter Steinberger
parent 46e6546bb9
commit df09e583aa
3 changed files with 397 additions and 78 deletions

View File

@@ -14,6 +14,7 @@ import { mediaKindFromMime } from "../../media/constants.js";
import { fetchRemoteMedia } from "../../media/fetch.js";
import { isGifMedia } from "../../media/mime.js";
import { saveMediaBuffer } from "../../media/store.js";
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
import type { RuntimeEnv } from "../../runtime.js";
import { loadWebMedia } from "../../web/media.js";
import { resolveTelegramVoiceSend } from "../voice.js";
@@ -39,6 +40,45 @@ export async function deliverReplies(params: {
const threadParams = buildTelegramThreadParams(messageThreadId);
let hasReplied = false;
for (const reply of replies) {
// Track if hook wants to send audio after text
let audioToSendAfter: string | undefined;
// Run message_sending hook (allows plugins like TTS to generate audio)
const hookRunner = getGlobalHookRunner();
if (hookRunner && reply?.text?.trim()) {
try {
const hookResult = await hookRunner.runMessageSending(
{
to: chatId,
content: reply.text,
metadata: { channel: "telegram", threadId: messageThreadId },
},
{
channelId: "telegram",
accountId: undefined,
conversationId: chatId,
},
);
// Check if hook wants to cancel the message
if (hookResult?.cancel) {
continue; // Skip this reply
}
// Check if hook returned a MEDIA directive (TTS audio)
if (hookResult?.content !== undefined) {
const mediaMatch = hookResult.content.match(/^MEDIA:(.+)$/m);
if (mediaMatch) {
// Save audio path to send AFTER the text message
audioToSendAfter = mediaMatch[1].trim();
}
}
} catch (err) {
// Hook errors shouldn't block message sending
logVerbose(`[telegram delivery] hook error: ${String(err)}`);
}
}
const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0;
if (!reply?.text && !hasMedia) {
if (reply?.audioAsVoice) {
@@ -70,6 +110,25 @@ export async function deliverReplies(params: {
hasReplied = true;
}
}
// Send TTS audio after text (if hook generated one)
if (audioToSendAfter) {
try {
const audioMedia = await loadWebMedia(audioToSendAfter);
const audioFile = new InputFile(audioMedia.buffer, "voice.mp3");
// Switch typing indicator to record_voice before sending
await params.onVoiceRecording?.();
const audioParams: Record<string, unknown> = {};
if (threadParams) {
audioParams.message_thread_id = threadParams.message_thread_id;
}
await bot.api.sendVoice(chatId, audioFile, audioParams);
logVerbose(`[telegram delivery] TTS audio sent: ${audioToSendAfter}`);
} catch (err) {
logVerbose(`[telegram delivery] TTS audio send failed: ${String(err)}`);
}
}
continue;
}
// media with optional caption on first item