feat(telegram-tts): add auto-TTS hook and provider switching
- Integrate message_sending hook into Telegram delivery path - Send text first, then audio as voice message after - Add /tts_provider command to switch between OpenAI and ElevenLabs - Implement automatic fallback when primary provider fails - Use gpt-4o-mini-tts as default OpenAI model - Add hook integration to route-reply.ts for other channels Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
committed by
Peter Steinberger
parent
46e6546bb9
commit
df09e583aa
@@ -10,6 +10,7 @@
|
||||
import { resolveSessionAgentId } from "../../agents/agent-scope.js";
|
||||
import { resolveEffectiveMessagesConfig } from "../../agents/identity.js";
|
||||
import { normalizeChannelId } from "../../channels/plugins/index.js";
|
||||
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
|
||||
import type { ClawdbotConfig } from "../../config/config.js";
|
||||
import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js";
|
||||
import type { OriginatingChannelType } from "../templating.js";
|
||||
@@ -72,14 +73,56 @@ export async function routeReply(params: RouteReplyParams): Promise<RouteReplyRe
|
||||
});
|
||||
if (!normalized) return { ok: true };
|
||||
|
||||
const text = normalized.text ?? "";
|
||||
const mediaUrls = (normalized.mediaUrls?.filter(Boolean) ?? []).length
|
||||
let text = normalized.text ?? "";
|
||||
let mediaUrls = (normalized.mediaUrls?.filter(Boolean) ?? []).length
|
||||
? (normalized.mediaUrls?.filter(Boolean) as string[])
|
||||
: normalized.mediaUrl
|
||||
? [normalized.mediaUrl]
|
||||
: [];
|
||||
const replyToId = normalized.replyToId;
|
||||
|
||||
// Run message_sending hook (allows plugins to modify or cancel)
|
||||
const hookRunner = getGlobalHookRunner();
|
||||
const normalizedChannel = normalizeChannelId(channel);
|
||||
if (hookRunner && text.trim() && normalizedChannel) {
|
||||
try {
|
||||
const hookResult = await hookRunner.runMessageSending(
|
||||
{
|
||||
to,
|
||||
content: text,
|
||||
metadata: { channel, accountId, threadId },
|
||||
},
|
||||
{
|
||||
channelId: normalizedChannel,
|
||||
accountId: accountId ?? undefined,
|
||||
conversationId: to,
|
||||
},
|
||||
);
|
||||
|
||||
// Check if hook wants to cancel the message
|
||||
if (hookResult?.cancel) {
|
||||
return { ok: true }; // Silently cancel
|
||||
}
|
||||
|
||||
// Check if hook modified the content
|
||||
if (hookResult?.content !== undefined) {
|
||||
// Check if the modified content contains MEDIA: directive
|
||||
const mediaMatch = hookResult.content.match(/^MEDIA:(.+)$/m);
|
||||
if (mediaMatch) {
|
||||
// Extract media path and add to mediaUrls
|
||||
const mediaPath = mediaMatch[1].trim();
|
||||
mediaUrls = [mediaPath];
|
||||
// Remove MEDIA: directive from text (send audio only)
|
||||
text = hookResult.content.replace(/^MEDIA:.+$/m, "").trim();
|
||||
} else {
|
||||
text = hookResult.content;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Hook errors shouldn't block message sending
|
||||
}
|
||||
}
|
||||
|
||||
// Skip empty replies.
|
||||
if (!text.trim() && mediaUrls.length === 0) {
|
||||
return { ok: true };
|
||||
|
||||
@@ -14,6 +14,7 @@ import { mediaKindFromMime } from "../../media/constants.js";
|
||||
import { fetchRemoteMedia } from "../../media/fetch.js";
|
||||
import { isGifMedia } from "../../media/mime.js";
|
||||
import { saveMediaBuffer } from "../../media/store.js";
|
||||
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
|
||||
import type { RuntimeEnv } from "../../runtime.js";
|
||||
import { loadWebMedia } from "../../web/media.js";
|
||||
import { resolveTelegramVoiceSend } from "../voice.js";
|
||||
@@ -39,6 +40,45 @@ export async function deliverReplies(params: {
|
||||
const threadParams = buildTelegramThreadParams(messageThreadId);
|
||||
let hasReplied = false;
|
||||
for (const reply of replies) {
|
||||
// Track if hook wants to send audio after text
|
||||
let audioToSendAfter: string | undefined;
|
||||
|
||||
// Run message_sending hook (allows plugins like TTS to generate audio)
|
||||
const hookRunner = getGlobalHookRunner();
|
||||
if (hookRunner && reply?.text?.trim()) {
|
||||
try {
|
||||
const hookResult = await hookRunner.runMessageSending(
|
||||
{
|
||||
to: chatId,
|
||||
content: reply.text,
|
||||
metadata: { channel: "telegram", threadId: messageThreadId },
|
||||
},
|
||||
{
|
||||
channelId: "telegram",
|
||||
accountId: undefined,
|
||||
conversationId: chatId,
|
||||
},
|
||||
);
|
||||
|
||||
// Check if hook wants to cancel the message
|
||||
if (hookResult?.cancel) {
|
||||
continue; // Skip this reply
|
||||
}
|
||||
|
||||
// Check if hook returned a MEDIA directive (TTS audio)
|
||||
if (hookResult?.content !== undefined) {
|
||||
const mediaMatch = hookResult.content.match(/^MEDIA:(.+)$/m);
|
||||
if (mediaMatch) {
|
||||
// Save audio path to send AFTER the text message
|
||||
audioToSendAfter = mediaMatch[1].trim();
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
// Hook errors shouldn't block message sending
|
||||
logVerbose(`[telegram delivery] hook error: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0;
|
||||
if (!reply?.text && !hasMedia) {
|
||||
if (reply?.audioAsVoice) {
|
||||
@@ -70,6 +110,25 @@ export async function deliverReplies(params: {
|
||||
hasReplied = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Send TTS audio after text (if hook generated one)
|
||||
if (audioToSendAfter) {
|
||||
try {
|
||||
const audioMedia = await loadWebMedia(audioToSendAfter);
|
||||
const audioFile = new InputFile(audioMedia.buffer, "voice.mp3");
|
||||
// Switch typing indicator to record_voice before sending
|
||||
await params.onVoiceRecording?.();
|
||||
const audioParams: Record<string, unknown> = {};
|
||||
if (threadParams) {
|
||||
audioParams.message_thread_id = threadParams.message_thread_id;
|
||||
}
|
||||
await bot.api.sendVoice(chatId, audioFile, audioParams);
|
||||
logVerbose(`[telegram delivery] TTS audio sent: ${audioToSendAfter}`);
|
||||
} catch (err) {
|
||||
logVerbose(`[telegram delivery] TTS audio send failed: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
// media with optional caption on first item
|
||||
|
||||
Reference in New Issue
Block a user