From 2140caaf67c76965093f7d3a0bb201191a8c1af7 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 8 Jan 2026 02:34:32 +0100 Subject: [PATCH] fix: telegram html formatting (#435, thanks @RandyVentures) --- CHANGELOG.md | 1 + docs/providers/telegram.md | 6 ++ src/telegram/bot.ts | 8 +- src/telegram/format.test.ts | 50 +++++++++++++ src/telegram/format.ts | 141 +++++++++++++++++++++++++++++++++++ src/telegram/monitor.test.ts | 4 +- src/telegram/send.test.ts | 31 ++++---- src/telegram/send.ts | 12 +-- 8 files changed, 230 insertions(+), 23 deletions(-) create mode 100644 src/telegram/format.test.ts create mode 100644 src/telegram/format.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 2165735ac..b3425cd15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -85,6 +85,7 @@ - Telegram: add draft streaming via `sendMessageDraft` with `telegram.streamMode`, plus `/reasoning stream` for draft-only reasoning. - Telegram: honor `/activation` session mode for group mention gating and clarify group activation docs. Thanks @julianengel for PR #377. - Telegram: isolate forum topic transcripts per thread and validate Gemini turn ordering in multi-topic sessions. Thanks @hsrvc for PR #407. +- Telegram: render Telegram-safe HTML for outbound formatting and fall back to plain text on parse errors. Thanks @RandyVentures for PR #435. - iMessage: ignore disconnect errors during shutdown (avoid unhandled promise rejections). Thanks @antons for PR #359. - Messages: stop defaulting ack reactions to 👀 when identity emoji is missing. - Auto-reply: require slash for control commands to avoid false triggers in normal text. diff --git a/docs/providers/telegram.md b/docs/providers/telegram.md index a3cc5837f..dc63984e5 100644 --- a/docs/providers/telegram.md +++ b/docs/providers/telegram.md @@ -41,6 +41,12 @@ Multi-account support: use `telegram.accounts` with per-account tokens and optio - Replies always route back to the same Telegram chat. - Long-polling uses grammY runner with per-chat sequencing; overall concurrency is capped by `agent.maxConcurrent`. +## Formatting (Telegram HTML) +- Outbound Telegram text uses `parse_mode: "HTML"` (Telegram’s supported tag subset). +- Markdown-ish input is rendered into **Telegram-safe HTML** (bold/italic/strike/code/links); block elements are flattened to text with newlines/bullets. +- Raw HTML from models is escaped to avoid Telegram parse errors. +- If Telegram rejects the HTML payload, Clawdbot retries the same message as plain text. + ## Group activation modes By default, the bot only responds to mentions in groups (`@botname` or patterns in `routing.groupChat.mentionPatterns`). To change this behavior: diff --git a/src/telegram/bot.ts b/src/telegram/bot.ts index 68d149e56..64591a1ee 100644 --- a/src/telegram/bot.ts +++ b/src/telegram/bot.ts @@ -52,6 +52,7 @@ import type { RuntimeEnv } from "../runtime.js"; import { loadWebMedia } from "../web/media.js"; import { resolveTelegramAccount } from "./accounts.js"; import { createTelegramDraftStream } from "./draft-stream.js"; +import { markdownToTelegramHtml } from "./format.js"; import { readTelegramAllowFromStore, upsertTelegramPairingRequest, @@ -1427,9 +1428,10 @@ async function sendTelegramText( if (threadParams) { baseParams.message_thread_id = threadParams.message_thread_id; } + const htmlText = markdownToTelegramHtml(text); try { - const res = await bot.api.sendMessage(chatId, text, { - parse_mode: "Markdown", + const res = await bot.api.sendMessage(chatId, htmlText, { + parse_mode: "HTML", ...baseParams, }); return res.message_id; @@ -1437,7 +1439,7 @@ async function sendTelegramText( const errText = formatErrorMessage(err); if (PARSE_ERR_RE.test(errText)) { runtime.log?.( - `telegram markdown parse failed; retrying without formatting: ${errText}`, + `telegram HTML parse failed; retrying without formatting: ${errText}`, ); const res = await bot.api.sendMessage(chatId, text, { ...baseParams, diff --git a/src/telegram/format.test.ts b/src/telegram/format.test.ts new file mode 100644 index 000000000..831782815 --- /dev/null +++ b/src/telegram/format.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it } from "vitest"; + +import { markdownToTelegramHtml } from "./format.js"; + +describe("markdownToTelegramHtml", () => { + it("renders basic inline formatting", () => { + const res = markdownToTelegramHtml("hi _there_ **boss** `code`"); + expect(res).toBe("hi there boss code"); + }); + + it("renders links as Telegram-safe HTML", () => { + const res = markdownToTelegramHtml("see [docs](https://example.com)"); + expect(res).toBe('see docs'); + }); + + it("escapes raw HTML", () => { + const res = markdownToTelegramHtml("nope"); + expect(res).toBe("<b>nope</b>"); + }); + + it("escapes unsafe characters", () => { + const res = markdownToTelegramHtml("a & b < c"); + expect(res).toBe("a & b < c"); + }); + + it("renders paragraphs with blank lines", () => { + const res = markdownToTelegramHtml("first\n\nsecond"); + expect(res).toBe("first\n\nsecond"); + }); + + it("renders lists without block HTML", () => { + const res = markdownToTelegramHtml("- one\n- two"); + expect(res).toBe("• one\n• two"); + }); + + it("renders ordered lists with numbering", () => { + const res = markdownToTelegramHtml("2. two\n3. three"); + expect(res).toBe("2. two\n3. three"); + }); + + it("flattens headings and blockquotes", () => { + const res = markdownToTelegramHtml("# Title\n\n> Quote"); + expect(res).toBe("Title\n\nQuote"); + }); + + it("renders fenced code blocks", () => { + const res = markdownToTelegramHtml("```js\nconst x = 1;\n```"); + expect(res).toBe("
const x = 1;\n
"); + }); +}); diff --git a/src/telegram/format.ts b/src/telegram/format.ts new file mode 100644 index 000000000..0e67c98a5 --- /dev/null +++ b/src/telegram/format.ts @@ -0,0 +1,141 @@ +import MarkdownIt from "markdown-it"; + +type ListState = { + type: "bullet" | "ordered"; + index: number; +}; + +type RenderEnv = { + telegramListStack?: ListState[]; + telegramLinkStack?: boolean[]; +}; + +const md = new MarkdownIt({ + html: false, + linkify: true, + breaks: false, + typographer: false, +}); + +md.enable("strikethrough"); + +const { escapeHtml } = md.utils; + +function getListStack(env: RenderEnv): ListState[] { + if (!env.telegramListStack) env.telegramListStack = []; + return env.telegramListStack; +} + +function getLinkStack(env: RenderEnv): boolean[] { + if (!env.telegramLinkStack) env.telegramLinkStack = []; + return env.telegramLinkStack; +} + +md.renderer.rules.text = (tokens, idx) => + escapeHtml(tokens[idx]?.content ?? ""); + +md.renderer.rules.softbreak = () => "\n"; +md.renderer.rules.hardbreak = () => "\n"; + +md.renderer.rules.paragraph_open = () => ""; +md.renderer.rules.paragraph_close = (_tokens, _idx, _opts, env) => { + const stack = getListStack(env as RenderEnv); + return stack.length ? "" : "\n\n"; +}; + +md.renderer.rules.heading_open = () => ""; +md.renderer.rules.heading_close = () => "\n\n"; + +md.renderer.rules.blockquote_open = () => ""; +md.renderer.rules.blockquote_close = () => "\n"; + +md.renderer.rules.bullet_list_open = (_tokens, _idx, _opts, env) => { + getListStack(env as RenderEnv).push({ type: "bullet", index: 0 }); + return ""; +}; +md.renderer.rules.bullet_list_close = (_tokens, _idx, _opts, env) => { + getListStack(env as RenderEnv).pop(); + return ""; +}; +md.renderer.rules.ordered_list_open = (tokens, idx, _opts, env) => { + const start = Number(tokens[idx]?.attrGet("start") ?? "1"); + getListStack(env as RenderEnv).push({ type: "ordered", index: start - 1 }); + return ""; +}; +md.renderer.rules.ordered_list_close = (_tokens, _idx, _opts, env) => { + getListStack(env as RenderEnv).pop(); + return ""; +}; +md.renderer.rules.list_item_open = (_tokens, _idx, _opts, env) => { + const stack = getListStack(env as RenderEnv); + const top = stack[stack.length - 1]; + if (!top) return ""; + top.index += 1; + const indent = " ".repeat(Math.max(0, stack.length - 1)); + const prefix = top.type === "ordered" ? `${top.index}. ` : "• "; + return `${indent}${prefix}`; +}; +md.renderer.rules.list_item_close = () => "\n"; + +md.renderer.rules.em_open = () => ""; +md.renderer.rules.em_close = () => ""; +md.renderer.rules.strong_open = () => ""; +md.renderer.rules.strong_close = () => ""; +md.renderer.rules.s_open = () => ""; +md.renderer.rules.s_close = () => ""; + +md.renderer.rules.code_inline = (tokens, idx) => + `${escapeHtml(tokens[idx]?.content ?? "")}`; +md.renderer.rules.code_block = (tokens, idx) => + `
${escapeHtml(tokens[idx]?.content ?? "")}
\n`; +md.renderer.rules.fence = (tokens, idx) => + `
${escapeHtml(tokens[idx]?.content ?? "")}
\n`; + +md.renderer.rules.link_open = (tokens, idx, _opts, env) => { + const href = tokens[idx]?.attrGet("href") ?? ""; + const safeHref = escapeHtml(href); + const stack = getLinkStack(env as RenderEnv); + const hasHref = Boolean(safeHref); + stack.push(hasHref); + return hasHref ? `` : ""; +}; +md.renderer.rules.link_close = (_tokens, _idx, _opts, env) => { + const stack = getLinkStack(env as RenderEnv); + const hasHref = stack.pop(); + return hasHref ? "" : ""; +}; + +md.renderer.rules.image = (tokens, idx) => { + const alt = tokens[idx]?.content ?? ""; + return escapeHtml(alt); +}; + +md.renderer.rules.html_block = (tokens, idx) => + escapeHtml(tokens[idx]?.content ?? ""); +md.renderer.rules.html_inline = (tokens, idx) => + escapeHtml(tokens[idx]?.content ?? ""); + +md.renderer.rules.table_open = () => ""; +md.renderer.rules.table_close = () => ""; +md.renderer.rules.thead_open = () => ""; +md.renderer.rules.thead_close = () => ""; +md.renderer.rules.tbody_open = () => ""; +md.renderer.rules.tbody_close = () => ""; +md.renderer.rules.tr_open = () => ""; +md.renderer.rules.tr_close = () => "\n"; +md.renderer.rules.th_open = () => ""; +md.renderer.rules.th_close = () => "\t"; +md.renderer.rules.td_open = () => ""; +md.renderer.rules.td_close = () => "\t"; + +md.renderer.rules.hr = () => "\n"; + +export function markdownToTelegramHtml(markdown: string): string { + const env: RenderEnv = {}; + const rendered = md.render(markdown ?? "", env); + return rendered + .replace(/[ \t]+\n/g, "\n") + .replace(/\t+\n/g, "\n") + .replace(/\n{3,}/g, "\n\n") + .trimEnd(); +} diff --git a/src/telegram/monitor.test.ts b/src/telegram/monitor.test.ts index 740d28d95..75f21b672 100644 --- a/src/telegram/monitor.test.ts +++ b/src/telegram/monitor.test.ts @@ -51,7 +51,7 @@ vi.mock("./bot.js", () => ({ const text = ctx.message.text ?? ctx.message.caption ?? ""; if (isGroup && !text.includes("@mybot")) return; if (!text.trim()) return; - await api.sendMessage(chatId, `echo:${text}`, { parse_mode: "Markdown" }); + await api.sendMessage(chatId, `echo:${text}`, { parse_mode: "HTML" }); }; return { on: vi.fn(), @@ -102,7 +102,7 @@ describe("monitorTelegramProvider (grammY)", () => { getFile: vi.fn(async () => ({})), }); expect(api.sendMessage).toHaveBeenCalledWith(123, "echo:hi", { - parse_mode: "Markdown", + parse_mode: "HTML", }); }); diff --git a/src/telegram/send.test.ts b/src/telegram/send.test.ts index 2fbe209ff..823d641eb 100644 --- a/src/telegram/send.test.ts +++ b/src/telegram/send.test.ts @@ -15,7 +15,7 @@ describe("sendMessageTelegram", () => { loadWebMedia.mockReset(); }); - it("falls back to plain text when Telegram rejects Markdown", async () => { + it("falls back to plain text when Telegram rejects HTML", async () => { const chatId = "123"; const parseErr = new Error( "400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9", @@ -37,8 +37,8 @@ describe("sendMessageTelegram", () => { verbose: true, }); - expect(sendMessage).toHaveBeenNthCalledWith(1, chatId, "_oops_", { - parse_mode: "Markdown", + expect(sendMessage).toHaveBeenNthCalledWith(1, chatId, "oops", { + parse_mode: "HTML", }); expect(sendMessage).toHaveBeenNthCalledWith(2, chatId, "_oops_"); expect(res.chatId).toBe(chatId); @@ -60,7 +60,7 @@ describe("sendMessageTelegram", () => { }); expect(sendMessage).toHaveBeenCalledWith("123", "hi", { - parse_mode: "Markdown", + parse_mode: "HTML", }); }); @@ -175,7 +175,7 @@ describe("sendMessageTelegram", () => { }); expect(sendMessage).toHaveBeenCalledWith(chatId, "hello forum", { - parse_mode: "Markdown", + parse_mode: "HTML", message_thread_id: 271, }); }); @@ -197,7 +197,7 @@ describe("sendMessageTelegram", () => { }); expect(sendMessage).toHaveBeenCalledWith(chatId, "reply text", { - parse_mode: "Markdown", + parse_mode: "HTML", reply_to_message_id: 100, }); }); @@ -220,7 +220,7 @@ describe("sendMessageTelegram", () => { }); expect(sendMessage).toHaveBeenCalledWith(chatId, "forum reply", { - parse_mode: "Markdown", + parse_mode: "HTML", message_thread_id: 271, reply_to_message_id: 500, }); @@ -249,12 +249,17 @@ describe("sendMessageTelegram", () => { replyToMessageId: 100, }); - // First call: with Markdown + thread params - expect(sendMessage).toHaveBeenNthCalledWith(1, chatId, "_bad markdown_", { - parse_mode: "Markdown", - message_thread_id: 271, - reply_to_message_id: 100, - }); + // First call: with HTML + thread params + expect(sendMessage).toHaveBeenNthCalledWith( + 1, + chatId, + "bad markdown", + { + parse_mode: "HTML", + message_thread_id: 271, + reply_to_message_id: 100, + }, + ); // Second call: plain text BUT still with thread params (critical!) expect(sendMessage).toHaveBeenNthCalledWith(2, chatId, "_bad markdown_", { message_thread_id: 271, diff --git a/src/telegram/send.ts b/src/telegram/send.ts index 9b1b01c5d..087cb4dfa 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -8,6 +8,7 @@ import { mediaKindFromMime } from "../media/constants.js"; import { isGifMedia } from "../media/mime.js"; import { loadWebMedia } from "../web/media.js"; import { resolveTelegramAccount } from "./accounts.js"; +import { markdownToTelegramHtml } from "./format.js"; type TelegramSendOpts = { token?: string; @@ -204,20 +205,21 @@ export async function sendMessageTelegram( if (!text || !text.trim()) { throw new Error("Message must be non-empty for Telegram sends"); } + const htmlText = markdownToTelegramHtml(text); const textParams = hasThreadParams - ? { parse_mode: "Markdown" as const, ...threadParams } - : { parse_mode: "Markdown" as const }; + ? { parse_mode: "HTML" as const, ...threadParams } + : { parse_mode: "HTML" as const }; const res = await request( - () => api.sendMessage(chatId, text, textParams), + () => api.sendMessage(chatId, htmlText, textParams), "message", ).catch(async (err) => { - // Telegram rejects malformed Markdown (e.g., unbalanced '_' or '*'). + // Telegram rejects malformed HTML (e.g., unsupported tags or entities). // When that happens, fall back to plain text so the message still delivers. const errText = formatErrorMessage(err); if (PARSE_ERR_RE.test(errText)) { if (opts.verbose) { console.warn( - `telegram markdown parse failed, retrying as plain text: ${errText}`, + `telegram HTML parse failed, retrying as plain text: ${errText}`, ); } return await request(