fix: render Telegram media captions

This commit is contained in:
Peter Steinberger
2026-01-24 03:39:21 +00:00
parent d57cb2e1a8
commit de2d986008
10 changed files with 176 additions and 80 deletions

View File

@@ -34,6 +34,7 @@ Docs: https://docs.clawd.bot
- CLI: explain when auth profiles are excluded by auth.order in probe details. - CLI: explain when auth profiles are excluded by auth.order in probe details.
- CLI: drop the em dash when the banner tagline wraps to a second line. - CLI: drop the em dash when the banner tagline wraps to a second line.
- CLI: inline auth probe errors in status rows to reduce wrapping. - CLI: inline auth probe errors in status rows to reduce wrapping.
- Telegram: render markdown in media captions. (#1478)
- Agents: honor enqueue overrides for embedded runs to avoid queue deadlocks in tests. - Agents: honor enqueue overrides for embedded runs to avoid queue deadlocks in tests.
- Daemon: use platform PATH delimiters when building minimal service paths. - Daemon: use platform PATH delimiters when building minimal service paths.
- Tests: skip embedded runner ordering assertion on Windows to avoid CI timeouts. - Tests: skip embedded runner ordering assertion on Windows to avoid CI timeouts.

View File

@@ -363,6 +363,7 @@ describe("createTelegramBot", () => {
expect(sendAnimationSpy).toHaveBeenCalledTimes(1); expect(sendAnimationSpy).toHaveBeenCalledTimes(1);
expect(sendAnimationSpy).toHaveBeenCalledWith("1234", expect.anything(), { expect(sendAnimationSpy).toHaveBeenCalledWith("1234", expect.anything(), {
caption: "caption", caption: "caption",
parse_mode: "HTML",
reply_to_message_id: undefined, reply_to_message_id: undefined,
}); });
expect(sendPhotoSpy).not.toHaveBeenCalled(); expect(sendPhotoSpy).not.toHaveBeenCalled();

View File

@@ -1392,6 +1392,7 @@ describe("createTelegramBot", () => {
expect(sendAnimationSpy).toHaveBeenCalledTimes(1); expect(sendAnimationSpy).toHaveBeenCalledTimes(1);
expect(sendAnimationSpy).toHaveBeenCalledWith("1234", expect.anything(), { expect(sendAnimationSpy).toHaveBeenCalledWith("1234", expect.anything(), {
caption: "caption", caption: "caption",
parse_mode: "HTML",
reply_to_message_id: undefined, reply_to_message_id: undefined,
}); });
expect(sendPhotoSpy).not.toHaveBeenCalled(); expect(sendPhotoSpy).not.toHaveBeenCalled();

View File

@@ -74,4 +74,38 @@ describe("deliverReplies", () => {
expect(sendVoice).toHaveBeenCalledTimes(1); expect(sendVoice).toHaveBeenCalledTimes(1);
expect(events).toEqual(["recordVoice", "sendVoice"]); expect(events).toEqual(["recordVoice", "sendVoice"]);
}); });
it("renders markdown in media captions", async () => {
const runtime = { error: vi.fn(), log: vi.fn() };
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 2,
chat: { id: "123" },
});
const bot = { api: { sendPhoto } } as unknown as Bot;
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
await deliverReplies({
replies: [{ mediaUrl: "https://example.com/photo.jpg", text: "hi **boss**" }],
chatId: "123",
token: "tok",
runtime,
bot,
replyToMode: "off",
textLimit: 4000,
});
expect(sendPhoto).toHaveBeenCalledWith(
"123",
expect.anything(),
expect.objectContaining({
caption: "hi <b>boss</b>",
parse_mode: "HTML",
}),
);
});
}); });

View File

@@ -1,5 +1,9 @@
import { type Bot, InputFile } from "grammy"; import { type Bot, InputFile } from "grammy";
import { markdownToTelegramChunks, markdownToTelegramHtml } from "../format.js"; import {
markdownToTelegramChunks,
markdownToTelegramHtml,
renderTelegramHtmlText,
} from "../format.js";
import { splitTelegramCaption } from "../caption.js"; import { splitTelegramCaption } from "../caption.js";
import type { ReplyPayload } from "../../auto-reply/types.js"; import type { ReplyPayload } from "../../auto-reply/types.js";
import type { ReplyToMode } from "../../config/config.js"; import type { ReplyToMode } from "../../config/config.js";
@@ -87,6 +91,9 @@ export async function deliverReplies(params: {
const { caption, followUpText } = splitTelegramCaption( const { caption, followUpText } = splitTelegramCaption(
isFirstMedia ? (reply.text ?? undefined) : undefined, isFirstMedia ? (reply.text ?? undefined) : undefined,
); );
const htmlCaption = caption
? renderTelegramHtmlText(caption, { tableMode: params.tableMode })
: undefined;
if (followUpText) { if (followUpText) {
pendingFollowUpText = followUpText; pendingFollowUpText = followUpText;
} }
@@ -94,8 +101,9 @@ export async function deliverReplies(params: {
const replyToMessageId = const replyToMessageId =
replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined; replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined;
const mediaParams: Record<string, unknown> = { const mediaParams: Record<string, unknown> = {
caption, caption: htmlCaption,
reply_to_message_id: replyToMessageId, reply_to_message_id: replyToMessageId,
...(htmlCaption ? { parse_mode: "HTML" } : {}),
}; };
if (threadParams) { if (threadParams) {
mediaParams.message_thread_id = threadParams.message_thread_id; mediaParams.message_thread_id = threadParams.message_thread_id;
@@ -149,14 +157,12 @@ export async function deliverReplies(params: {
for (const chunk of chunks) { for (const chunk of chunks) {
const replyToMessageIdFollowup = const replyToMessageIdFollowup =
replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined; replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined;
await bot.api.sendMessage( await sendTelegramText(bot, chatId, chunk.html, runtime, {
chatId, replyToMessageId: replyToMessageIdFollowup,
chunk.text, messageThreadId,
buildTelegramSendParams({ textMode: "html",
replyToMessageId: replyToMessageIdFollowup, plainText: chunk.text,
messageThreadId, });
}),
);
if (replyToId && !hasReplied) { if (replyToId && !hasReplied) {
hasReplied = true; hasReplied = true;
} }

View File

@@ -60,6 +60,15 @@ export function markdownToTelegramHtml(
return renderTelegramHtml(ir); return renderTelegramHtml(ir);
} }
export function renderTelegramHtmlText(
text: string,
options: { textMode?: "markdown" | "html"; tableMode?: MarkdownTableMode } = {},
): string {
const textMode = options.textMode ?? "markdown";
if (textMode === "html") return text;
return markdownToTelegramHtml(text, { tableMode: options.tableMode });
}
export function markdownToTelegramChunks( export function markdownToTelegramChunks(
markdown: string, markdown: string,
limit: number, limit: number,

View File

@@ -87,8 +87,10 @@ describe("sendMessageTelegram caption splitting", () => {
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: undefined, caption: undefined,
}); });
// Then text sent as separate message (plain text, matching caption behavior) // Then text sent as separate message (HTML formatting)
expect(sendMessage).toHaveBeenCalledWith(chatId, longText); expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
parse_mode: "HTML",
});
// Returns the text message ID (the "main" content) // Returns the text message ID (the "main" content)
expect(res.messageId).toBe("71"); expect(res.messageId).toBe("71");
}); });
@@ -123,12 +125,43 @@ describe("sendMessageTelegram caption splitting", () => {
// Caption should be included with media // Caption should be included with media
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: shortText, caption: shortText,
parse_mode: "HTML",
}); });
// No separate text message needed // No separate text message needed
expect(sendMessage).not.toHaveBeenCalled(); expect(sendMessage).not.toHaveBeenCalled();
expect(res.messageId).toBe("72"); expect(res.messageId).toBe("72");
}); });
it("renders markdown in media captions", async () => {
const chatId = "123";
const caption = "hi **boss**";
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 90,
chat: { id: chatId },
});
const api = { sendPhoto } as unknown as {
sendPhoto: typeof sendPhoto;
};
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("fake-image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
await sendMessageTelegram(chatId, caption, {
token: "tok",
api,
mediaUrl: "https://example.com/photo.jpg",
});
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "hi <b>boss</b>",
parse_mode: "HTML",
});
});
it("preserves thread params when splitting long captions", async () => { it("preserves thread params when splitting long captions", async () => {
const chatId = "-1001234567890"; const chatId = "-1001234567890";
const longText = "C".repeat(1100); const longText = "C".repeat(1100);
@@ -166,8 +199,9 @@ describe("sendMessageTelegram caption splitting", () => {
message_thread_id: 271, message_thread_id: 271,
reply_to_message_id: 500, reply_to_message_id: 500,
}); });
// Text message also includes thread params (plain text, matching caption behavior) // Text message also includes thread params (HTML formatting)
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, { expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
parse_mode: "HTML",
message_thread_id: 271, message_thread_id: 271,
reply_to_message_id: 500, reply_to_message_id: 500,
}); });
@@ -209,6 +243,7 @@ describe("sendMessageTelegram caption splitting", () => {
}); });
// Follow-up text has the reply_markup // Follow-up text has the reply_markup
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, { expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
parse_mode: "HTML",
reply_markup: { reply_markup: {
inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]], inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]],
}, },
@@ -253,6 +288,7 @@ describe("sendMessageTelegram caption splitting", () => {
reply_to_message_id: 500, reply_to_message_id: 500,
}); });
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, { expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
parse_mode: "HTML",
message_thread_id: 271, message_thread_id: 271,
reply_to_message_id: 500, reply_to_message_id: 500,
reply_markup: { reply_markup: {
@@ -353,6 +389,7 @@ describe("sendMessageTelegram caption splitting", () => {
// Media sent WITH reply_markup when not splitting // Media sent WITH reply_markup when not splitting
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: shortText, caption: shortText,
parse_mode: "HTML",
reply_markup: { reply_markup: {
inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]], inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]],
}, },

View File

@@ -94,6 +94,7 @@ describe("buildInlineKeyboard", () => {
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "photo in topic", caption: "photo in topic",
parse_mode: "HTML",
message_thread_id: 99, message_thread_id: 99,
}); });
}); });

View File

@@ -285,6 +285,7 @@ describe("sendMessageTelegram", () => {
expect(sendAnimation).toHaveBeenCalledTimes(1); expect(sendAnimation).toHaveBeenCalledTimes(1);
expect(sendAnimation).toHaveBeenCalledWith(chatId, expect.anything(), { expect(sendAnimation).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "caption", caption: "caption",
parse_mode: "HTML",
}); });
expect(res.messageId).toBe("9"); expect(res.messageId).toBe("9");
}); });
@@ -318,6 +319,7 @@ describe("sendMessageTelegram", () => {
expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), { expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "caption", caption: "caption",
parse_mode: "HTML",
}); });
expect(sendVoice).not.toHaveBeenCalled(); expect(sendVoice).not.toHaveBeenCalled();
}); });
@@ -354,6 +356,7 @@ describe("sendMessageTelegram", () => {
expect(sendVoice).toHaveBeenCalledWith(chatId, expect.anything(), { expect(sendVoice).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "voice note", caption: "voice note",
parse_mode: "HTML",
message_thread_id: 271, message_thread_id: 271,
reply_to_message_id: 500, reply_to_message_id: 500,
}); });
@@ -390,6 +393,7 @@ describe("sendMessageTelegram", () => {
expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), { expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "caption", caption: "caption",
parse_mode: "HTML",
}); });
expect(sendVoice).not.toHaveBeenCalled(); expect(sendVoice).not.toHaveBeenCalled();
}); });

View File

@@ -16,7 +16,7 @@ import { isGifMedia } from "../media/mime.js";
import { loadWebMedia } from "../web/media.js"; import { loadWebMedia } from "../web/media.js";
import { resolveTelegramAccount } from "./accounts.js"; import { resolveTelegramAccount } from "./accounts.js";
import { resolveTelegramFetch } from "./fetch.js"; import { resolveTelegramFetch } from "./fetch.js";
import { markdownToTelegramHtml } from "./format.js"; import { renderTelegramHtmlText } from "./format.js";
import { resolveMarkdownTableMode } from "../config/markdown-tables.js"; import { resolveMarkdownTableMode } from "../config/markdown-tables.js";
import { splitTelegramCaption } from "./caption.js"; import { splitTelegramCaption } from "./caption.js";
import { recordSentMessage } from "./sent-message-cache.js"; import { recordSentMessage } from "./sent-message-cache.js";
@@ -190,6 +190,55 @@ export async function sendMessageTelegram(
); );
}; };
const textMode = opts.textMode ?? "markdown";
const tableMode = resolveMarkdownTableMode({
cfg,
channel: "telegram",
accountId: account.accountId,
});
const renderHtmlText = (value: string) => renderTelegramHtmlText(value, { textMode, tableMode });
const sendTelegramText = async (
rawText: string,
params?: Record<string, unknown>,
fallbackText?: string,
) => {
const htmlText = renderHtmlText(rawText);
const sendParams = params
? {
parse_mode: "HTML" as const,
...params,
}
: {
parse_mode: "HTML" as const,
};
const res = await request(() => api.sendMessage(chatId, htmlText, sendParams), "message").catch(
async (err) => {
// Telegram rejects malformed HTML (e.g., unsupported tags or entities).
// When that happens, fall back to plain text so the message still delivers.
const errText = formatErrorMessage(err);
if (PARSE_ERR_RE.test(errText)) {
if (opts.verbose) {
console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`);
}
const fallback = fallbackText ?? rawText;
const plainParams = params && Object.keys(params).length > 0 ? { ...params } : undefined;
return await request(
() =>
plainParams
? api.sendMessage(chatId, fallback, plainParams)
: api.sendMessage(chatId, fallback),
"message-plain",
).catch((err2) => {
throw wrapChatNotFound(err2);
});
}
throw wrapChatNotFound(err);
},
);
return res;
};
if (mediaUrl) { if (mediaUrl) {
const media = await loadWebMedia(mediaUrl, opts.maxBytes); const media = await loadWebMedia(mediaUrl, opts.maxBytes);
const kind = mediaKindFromMime(media.contentType ?? undefined); const kind = mediaKindFromMime(media.contentType ?? undefined);
@@ -200,21 +249,21 @@ export async function sendMessageTelegram(
const fileName = media.fileName ?? (isGif ? "animation.gif" : inferFilename(kind)) ?? "file"; const fileName = media.fileName ?? (isGif ? "animation.gif" : inferFilename(kind)) ?? "file";
const file = new InputFile(media.buffer, fileName); const file = new InputFile(media.buffer, fileName);
const { caption, followUpText } = splitTelegramCaption(text); const { caption, followUpText } = splitTelegramCaption(text);
const htmlCaption = caption ? renderHtmlText(caption) : undefined;
// If text exceeds Telegram's caption limit, send media without caption // If text exceeds Telegram's caption limit, send media without caption
// then send text as a separate follow-up message. // then send text as a separate follow-up message.
const needsSeparateText = Boolean(followUpText); const needsSeparateText = Boolean(followUpText);
// When splitting, put reply_markup only on the follow-up text (the "main" content), // When splitting, put reply_markup only on the follow-up text (the "main" content),
// not on the media message. // not on the media message.
const mediaParams = hasThreadParams const baseMediaParams = {
? { ...(hasThreadParams ? threadParams : {}),
caption, ...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}),
...threadParams, };
...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}), const mediaParams = {
} caption: htmlCaption,
: { ...(htmlCaption ? { parse_mode: "HTML" as const } : {}),
caption, ...baseMediaParams,
...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}), };
};
let result: let result:
| Awaited<ReturnType<typeof api.sendPhoto>> | Awaited<ReturnType<typeof api.sendPhoto>>
| Awaited<ReturnType<typeof api.sendVideo>> | Awaited<ReturnType<typeof api.sendVideo>>
@@ -279,7 +328,7 @@ export async function sendMessageTelegram(
}); });
// If text was too long for a caption, send it as a separate follow-up message. // If text was too long for a caption, send it as a separate follow-up message.
// Use plain text to match caption behavior (captions don't use HTML conversion). // Use HTML conversion so markdown renders like captions.
if (needsSeparateText && followUpText) { if (needsSeparateText && followUpText) {
const textParams = const textParams =
hasThreadParams || replyMarkup hasThreadParams || replyMarkup
@@ -288,15 +337,7 @@ export async function sendMessageTelegram(
...(replyMarkup ? { reply_markup: replyMarkup } : {}), ...(replyMarkup ? { reply_markup: replyMarkup } : {}),
} }
: undefined; : undefined;
const textRes = await request( const textRes = await sendTelegramText(followUpText, textParams);
() =>
textParams
? api.sendMessage(chatId, followUpText, textParams)
: api.sendMessage(chatId, followUpText),
"message",
).catch((err) => {
throw wrapChatNotFound(err);
});
// Return the text message ID as the "main" message (it's the actual content). // Return the text message ID as the "main" message (it's the actual content).
return { return {
messageId: String(textRes?.message_id ?? mediaMessageId), messageId: String(textRes?.message_id ?? mediaMessageId),
@@ -310,53 +351,14 @@ export async function sendMessageTelegram(
if (!text || !text.trim()) { if (!text || !text.trim()) {
throw new Error("Message must be non-empty for Telegram sends"); throw new Error("Message must be non-empty for Telegram sends");
} }
const textMode = opts.textMode ?? "markdown"; const textParams =
const tableMode = resolveMarkdownTableMode({ hasThreadParams || replyMarkup
cfg, ? {
channel: "telegram", ...threadParams,
accountId: account.accountId, ...(replyMarkup ? { reply_markup: replyMarkup } : {}),
});
const htmlText = textMode === "html" ? text : markdownToTelegramHtml(text, { tableMode });
const textParams = hasThreadParams
? {
parse_mode: "HTML" as const,
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: {
parse_mode: "HTML" as const,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
};
const res = await request(() => api.sendMessage(chatId, htmlText, textParams), "message").catch(
async (err) => {
// Telegram rejects malformed HTML (e.g., unsupported tags or entities).
// When that happens, fall back to plain text so the message still delivers.
const errText = formatErrorMessage(err);
if (PARSE_ERR_RE.test(errText)) {
if (opts.verbose) {
console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`);
} }
const plainParams = : undefined;
hasThreadParams || replyMarkup const res = await sendTelegramText(text, textParams, opts.plainText);
? {
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
const fallbackText = opts.plainText ?? text;
return await request(
() =>
plainParams
? api.sendMessage(chatId, fallbackText, plainParams)
: api.sendMessage(chatId, fallbackText),
"message-plain",
).catch((err2) => {
throw wrapChatNotFound(err2);
});
}
throw wrapChatNotFound(err);
},
);
const messageId = String(res?.message_id ?? "unknown"); const messageId = String(res?.message_id ?? "unknown");
if (res?.message_id) { if (res?.message_id) {
recordSentMessage(chatId, res.message_id); recordSentMessage(chatId, res.message_id);