fix: render Telegram media captions

This commit is contained in:
Peter Steinberger
2026-01-24 03:39:21 +00:00
parent d57cb2e1a8
commit de2d986008
10 changed files with 176 additions and 80 deletions

View File

@@ -363,6 +363,7 @@ describe("createTelegramBot", () => {
expect(sendAnimationSpy).toHaveBeenCalledTimes(1);
expect(sendAnimationSpy).toHaveBeenCalledWith("1234", expect.anything(), {
caption: "caption",
parse_mode: "HTML",
reply_to_message_id: undefined,
});
expect(sendPhotoSpy).not.toHaveBeenCalled();

View File

@@ -1392,6 +1392,7 @@ describe("createTelegramBot", () => {
expect(sendAnimationSpy).toHaveBeenCalledTimes(1);
expect(sendAnimationSpy).toHaveBeenCalledWith("1234", expect.anything(), {
caption: "caption",
parse_mode: "HTML",
reply_to_message_id: undefined,
});
expect(sendPhotoSpy).not.toHaveBeenCalled();

View File

@@ -74,4 +74,38 @@ describe("deliverReplies", () => {
expect(sendVoice).toHaveBeenCalledTimes(1);
expect(events).toEqual(["recordVoice", "sendVoice"]);
});
it("renders markdown in media captions", async () => {
const runtime = { error: vi.fn(), log: vi.fn() };
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 2,
chat: { id: "123" },
});
const bot = { api: { sendPhoto } } as unknown as Bot;
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
await deliverReplies({
replies: [{ mediaUrl: "https://example.com/photo.jpg", text: "hi **boss**" }],
chatId: "123",
token: "tok",
runtime,
bot,
replyToMode: "off",
textLimit: 4000,
});
expect(sendPhoto).toHaveBeenCalledWith(
"123",
expect.anything(),
expect.objectContaining({
caption: "hi <b>boss</b>",
parse_mode: "HTML",
}),
);
});
});

View File

@@ -1,5 +1,9 @@
import { type Bot, InputFile } from "grammy";
import { markdownToTelegramChunks, markdownToTelegramHtml } from "../format.js";
import {
markdownToTelegramChunks,
markdownToTelegramHtml,
renderTelegramHtmlText,
} from "../format.js";
import { splitTelegramCaption } from "../caption.js";
import type { ReplyPayload } from "../../auto-reply/types.js";
import type { ReplyToMode } from "../../config/config.js";
@@ -87,6 +91,9 @@ export async function deliverReplies(params: {
const { caption, followUpText } = splitTelegramCaption(
isFirstMedia ? (reply.text ?? undefined) : undefined,
);
const htmlCaption = caption
? renderTelegramHtmlText(caption, { tableMode: params.tableMode })
: undefined;
if (followUpText) {
pendingFollowUpText = followUpText;
}
@@ -94,8 +101,9 @@ export async function deliverReplies(params: {
const replyToMessageId =
replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined;
const mediaParams: Record<string, unknown> = {
caption,
caption: htmlCaption,
reply_to_message_id: replyToMessageId,
...(htmlCaption ? { parse_mode: "HTML" } : {}),
};
if (threadParams) {
mediaParams.message_thread_id = threadParams.message_thread_id;
@@ -149,14 +157,12 @@ export async function deliverReplies(params: {
for (const chunk of chunks) {
const replyToMessageIdFollowup =
replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined;
await bot.api.sendMessage(
chatId,
chunk.text,
buildTelegramSendParams({
replyToMessageId: replyToMessageIdFollowup,
messageThreadId,
}),
);
await sendTelegramText(bot, chatId, chunk.html, runtime, {
replyToMessageId: replyToMessageIdFollowup,
messageThreadId,
textMode: "html",
plainText: chunk.text,
});
if (replyToId && !hasReplied) {
hasReplied = true;
}

View File

@@ -60,6 +60,15 @@ export function markdownToTelegramHtml(
return renderTelegramHtml(ir);
}
export function renderTelegramHtmlText(
text: string,
options: { textMode?: "markdown" | "html"; tableMode?: MarkdownTableMode } = {},
): string {
const textMode = options.textMode ?? "markdown";
if (textMode === "html") return text;
return markdownToTelegramHtml(text, { tableMode: options.tableMode });
}
export function markdownToTelegramChunks(
markdown: string,
limit: number,

View File

@@ -87,8 +87,10 @@ describe("sendMessageTelegram caption splitting", () => {
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: undefined,
});
// Then text sent as separate message (plain text, matching caption behavior)
expect(sendMessage).toHaveBeenCalledWith(chatId, longText);
// Then text sent as separate message (HTML formatting)
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
parse_mode: "HTML",
});
// Returns the text message ID (the "main" content)
expect(res.messageId).toBe("71");
});
@@ -123,12 +125,43 @@ describe("sendMessageTelegram caption splitting", () => {
// Caption should be included with media
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: shortText,
parse_mode: "HTML",
});
// No separate text message needed
expect(sendMessage).not.toHaveBeenCalled();
expect(res.messageId).toBe("72");
});
it("renders markdown in media captions", async () => {
const chatId = "123";
const caption = "hi **boss**";
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 90,
chat: { id: chatId },
});
const api = { sendPhoto } as unknown as {
sendPhoto: typeof sendPhoto;
};
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("fake-image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
await sendMessageTelegram(chatId, caption, {
token: "tok",
api,
mediaUrl: "https://example.com/photo.jpg",
});
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "hi <b>boss</b>",
parse_mode: "HTML",
});
});
it("preserves thread params when splitting long captions", async () => {
const chatId = "-1001234567890";
const longText = "C".repeat(1100);
@@ -166,8 +199,9 @@ describe("sendMessageTelegram caption splitting", () => {
message_thread_id: 271,
reply_to_message_id: 500,
});
// Text message also includes thread params (plain text, matching caption behavior)
// Text message also includes thread params (HTML formatting)
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
parse_mode: "HTML",
message_thread_id: 271,
reply_to_message_id: 500,
});
@@ -209,6 +243,7 @@ describe("sendMessageTelegram caption splitting", () => {
});
// Follow-up text has the reply_markup
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
parse_mode: "HTML",
reply_markup: {
inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]],
},
@@ -253,6 +288,7 @@ describe("sendMessageTelegram caption splitting", () => {
reply_to_message_id: 500,
});
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
parse_mode: "HTML",
message_thread_id: 271,
reply_to_message_id: 500,
reply_markup: {
@@ -353,6 +389,7 @@ describe("sendMessageTelegram caption splitting", () => {
// Media sent WITH reply_markup when not splitting
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: shortText,
parse_mode: "HTML",
reply_markup: {
inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]],
},

View File

@@ -94,6 +94,7 @@ describe("buildInlineKeyboard", () => {
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "photo in topic",
parse_mode: "HTML",
message_thread_id: 99,
});
});

View File

@@ -285,6 +285,7 @@ describe("sendMessageTelegram", () => {
expect(sendAnimation).toHaveBeenCalledTimes(1);
expect(sendAnimation).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "caption",
parse_mode: "HTML",
});
expect(res.messageId).toBe("9");
});
@@ -318,6 +319,7 @@ describe("sendMessageTelegram", () => {
expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "caption",
parse_mode: "HTML",
});
expect(sendVoice).not.toHaveBeenCalled();
});
@@ -354,6 +356,7 @@ describe("sendMessageTelegram", () => {
expect(sendVoice).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "voice note",
parse_mode: "HTML",
message_thread_id: 271,
reply_to_message_id: 500,
});
@@ -390,6 +393,7 @@ describe("sendMessageTelegram", () => {
expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "caption",
parse_mode: "HTML",
});
expect(sendVoice).not.toHaveBeenCalled();
});

View File

@@ -16,7 +16,7 @@ import { isGifMedia } from "../media/mime.js";
import { loadWebMedia } from "../web/media.js";
import { resolveTelegramAccount } from "./accounts.js";
import { resolveTelegramFetch } from "./fetch.js";
import { markdownToTelegramHtml } from "./format.js";
import { renderTelegramHtmlText } from "./format.js";
import { resolveMarkdownTableMode } from "../config/markdown-tables.js";
import { splitTelegramCaption } from "./caption.js";
import { recordSentMessage } from "./sent-message-cache.js";
@@ -190,6 +190,55 @@ export async function sendMessageTelegram(
);
};
const textMode = opts.textMode ?? "markdown";
const tableMode = resolveMarkdownTableMode({
cfg,
channel: "telegram",
accountId: account.accountId,
});
const renderHtmlText = (value: string) => renderTelegramHtmlText(value, { textMode, tableMode });
const sendTelegramText = async (
rawText: string,
params?: Record<string, unknown>,
fallbackText?: string,
) => {
const htmlText = renderHtmlText(rawText);
const sendParams = params
? {
parse_mode: "HTML" as const,
...params,
}
: {
parse_mode: "HTML" as const,
};
const res = await request(() => api.sendMessage(chatId, htmlText, sendParams), "message").catch(
async (err) => {
// Telegram rejects malformed HTML (e.g., unsupported tags or entities).
// When that happens, fall back to plain text so the message still delivers.
const errText = formatErrorMessage(err);
if (PARSE_ERR_RE.test(errText)) {
if (opts.verbose) {
console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`);
}
const fallback = fallbackText ?? rawText;
const plainParams = params && Object.keys(params).length > 0 ? { ...params } : undefined;
return await request(
() =>
plainParams
? api.sendMessage(chatId, fallback, plainParams)
: api.sendMessage(chatId, fallback),
"message-plain",
).catch((err2) => {
throw wrapChatNotFound(err2);
});
}
throw wrapChatNotFound(err);
},
);
return res;
};
if (mediaUrl) {
const media = await loadWebMedia(mediaUrl, opts.maxBytes);
const kind = mediaKindFromMime(media.contentType ?? undefined);
@@ -200,21 +249,21 @@ export async function sendMessageTelegram(
const fileName = media.fileName ?? (isGif ? "animation.gif" : inferFilename(kind)) ?? "file";
const file = new InputFile(media.buffer, fileName);
const { caption, followUpText } = splitTelegramCaption(text);
const htmlCaption = caption ? renderHtmlText(caption) : undefined;
// If text exceeds Telegram's caption limit, send media without caption
// then send text as a separate follow-up message.
const needsSeparateText = Boolean(followUpText);
// When splitting, put reply_markup only on the follow-up text (the "main" content),
// not on the media message.
const mediaParams = hasThreadParams
? {
caption,
...threadParams,
...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: {
caption,
...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}),
};
const baseMediaParams = {
...(hasThreadParams ? threadParams : {}),
...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}),
};
const mediaParams = {
caption: htmlCaption,
...(htmlCaption ? { parse_mode: "HTML" as const } : {}),
...baseMediaParams,
};
let result:
| Awaited<ReturnType<typeof api.sendPhoto>>
| Awaited<ReturnType<typeof api.sendVideo>>
@@ -279,7 +328,7 @@ export async function sendMessageTelegram(
});
// If text was too long for a caption, send it as a separate follow-up message.
// Use plain text to match caption behavior (captions don't use HTML conversion).
// Use HTML conversion so markdown renders like captions.
if (needsSeparateText && followUpText) {
const textParams =
hasThreadParams || replyMarkup
@@ -288,15 +337,7 @@ export async function sendMessageTelegram(
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
const textRes = await request(
() =>
textParams
? api.sendMessage(chatId, followUpText, textParams)
: api.sendMessage(chatId, followUpText),
"message",
).catch((err) => {
throw wrapChatNotFound(err);
});
const textRes = await sendTelegramText(followUpText, textParams);
// Return the text message ID as the "main" message (it's the actual content).
return {
messageId: String(textRes?.message_id ?? mediaMessageId),
@@ -310,53 +351,14 @@ export async function sendMessageTelegram(
if (!text || !text.trim()) {
throw new Error("Message must be non-empty for Telegram sends");
}
const textMode = opts.textMode ?? "markdown";
const tableMode = resolveMarkdownTableMode({
cfg,
channel: "telegram",
accountId: account.accountId,
});
const htmlText = textMode === "html" ? text : markdownToTelegramHtml(text, { tableMode });
const textParams = hasThreadParams
? {
parse_mode: "HTML" as const,
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: {
parse_mode: "HTML" as const,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
};
const res = await request(() => api.sendMessage(chatId, htmlText, textParams), "message").catch(
async (err) => {
// Telegram rejects malformed HTML (e.g., unsupported tags or entities).
// When that happens, fall back to plain text so the message still delivers.
const errText = formatErrorMessage(err);
if (PARSE_ERR_RE.test(errText)) {
if (opts.verbose) {
console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`);
const textParams =
hasThreadParams || replyMarkup
? {
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
const plainParams =
hasThreadParams || replyMarkup
? {
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
const fallbackText = opts.plainText ?? text;
return await request(
() =>
plainParams
? api.sendMessage(chatId, fallbackText, plainParams)
: api.sendMessage(chatId, fallbackText),
"message-plain",
).catch((err2) => {
throw wrapChatNotFound(err2);
});
}
throw wrapChatNotFound(err);
},
);
: undefined;
const res = await sendTelegramText(text, textParams, opts.plainText);
const messageId = String(res?.message_id ?? "unknown");
if (res?.message_id) {
recordSentMessage(chatId, res.message_id);