From ce786762db2ba4d8ca8a3fd81a04cf7729819aed Mon Sep 17 00:00:00 2001 From: Jarvis Date: Thu, 8 Jan 2026 13:55:36 +0000 Subject: [PATCH] fix(telegram): guard voice note sends --- src/telegram/bot.ts | 15 ++++++++++++++- src/telegram/send.test.ts | 34 ++++++++++++++++++++++++++++++++++ src/telegram/send.ts | 13 ++++++++++++- src/telegram/voice.ts | 15 +++++++++++++++ 4 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 src/telegram/voice.ts diff --git a/src/telegram/bot.ts b/src/telegram/bot.ts index aaba18307..4ece77cc8 100644 --- a/src/telegram/bot.ts +++ b/src/telegram/bot.ts @@ -60,6 +60,7 @@ import { resolveTelegramAccount } from "./accounts.js"; import { createTelegramDraftStream } from "./draft-stream.js"; import { resolveTelegramFetch } from "./fetch.js"; import { markdownToTelegramHtml } from "./format.js"; +import { isTelegramVoiceCompatible } from "./voice.js"; import { readTelegramAllowFromStore, upsertTelegramPairingRequest, @@ -1387,7 +1388,19 @@ async function deliverReplies(params: { ...mediaParams, }); } else if (kind === "audio") { - const useVoice = reply.audioAsVoice === true; // default false (backward compatible) + const wantsVoice = reply.audioAsVoice === true; // default false (backward compatible) + const canVoice = wantsVoice + ? isTelegramVoiceCompatible({ + contentType: media.contentType, + fileName, + }) + : false; + const useVoice = wantsVoice && canVoice; + if (wantsVoice && !canVoice) { + logVerbose( + `Telegram voice requested but media is ${media.contentType ?? "unknown"} (${fileName}); sending as audio file instead.`, + ); + } if (useVoice) { // Voice message - displays as round playable bubble (opt-in via [[audio_as_voice]]) await bot.api.sendVoice(chatId, file, { diff --git a/src/telegram/send.test.ts b/src/telegram/send.test.ts index 115f88851..d2172d65d 100644 --- a/src/telegram/send.test.ts +++ b/src/telegram/send.test.ts @@ -324,6 +324,40 @@ describe("sendMessageTelegram", () => { expect(sendAudio).not.toHaveBeenCalled(); }); + it("falls back to audio when asVoice is true but media is not voice compatible", async () => { + const chatId = "123"; + const sendAudio = vi.fn().mockResolvedValue({ + message_id: 14, + chat: { id: chatId }, + }); + const sendVoice = vi.fn().mockResolvedValue({ + message_id: 15, + chat: { id: chatId }, + }); + const api = { sendAudio, sendVoice } as unknown as { + sendAudio: typeof sendAudio; + sendVoice: typeof sendVoice; + }; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("audio"), + contentType: "audio/mpeg", + fileName: "clip.mp3", + }); + + await sendMessageTelegram(chatId, "caption", { + token: "tok", + api, + mediaUrl: "https://example.com/clip.mp3", + asVoice: true, + }); + + expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), { + caption: "caption", + }); + expect(sendVoice).not.toHaveBeenCalled(); + }); + it("includes message_thread_id for forum topic messages", async () => { const chatId = "-1001234567890"; const sendMessage = vi.fn().mockResolvedValue({ diff --git a/src/telegram/send.ts b/src/telegram/send.ts index 8799e32f7..b293f7844 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -10,6 +10,7 @@ import { formatErrorMessage } from "../infra/errors.js"; import { recordProviderActivity } from "../infra/provider-activity.js"; import type { RetryConfig } from "../infra/retry.js"; import { createTelegramRetryRunner } from "../infra/retry-policy.js"; +import { logVerbose } from "../globals.js"; import { mediaKindFromMime } from "../media/constants.js"; import { isGifMedia } from "../media/mime.js"; import { loadWebMedia } from "../web/media.js"; @@ -20,6 +21,7 @@ import { parseTelegramTarget, stripTelegramInternalPrefixes, } from "./targets.js"; +import { resolveTelegramVoiceDecision } from "./voice.js"; type TelegramSendOpts = { token?: string; @@ -237,7 +239,16 @@ export async function sendMessageTelegram( throw wrapChatNotFound(err); }); } else if (kind === "audio") { - const useVoice = opts.asVoice === true; // default false (backward compatible) + const { useVoice, reason } = resolveTelegramVoiceDecision({ + wantsVoice: opts.asVoice === true, // default false (backward compatible) + contentType: media.contentType, + fileName, + }); + if (reason) { + logVerbose( + `Telegram voice requested but ${reason}; sending as audio file instead.`, + ); + } if (useVoice) { result = await request( () => api.sendVoice(chatId, file, mediaParams), diff --git a/src/telegram/voice.ts b/src/telegram/voice.ts new file mode 100644 index 000000000..623bc58ef --- /dev/null +++ b/src/telegram/voice.ts @@ -0,0 +1,15 @@ +import path from "node:path"; + +export function isTelegramVoiceCompatible(opts: { + contentType?: string | null; + fileName?: string | null; +}): boolean { + const mime = opts.contentType?.toLowerCase(); + if (mime && (mime.includes("ogg") || mime.includes("opus"))) { + return true; + } + const fileName = opts.fileName?.trim(); + if (!fileName) return false; + const ext = path.extname(fileName).toLowerCase(); + return ext === ".ogg" || ext === ".opus" || ext === ".oga"; +}