fix: telegram html formatting (#435, thanks @RandyVentures)

This commit is contained in:
Peter Steinberger
2026-01-08 02:34:32 +01:00
parent 61f5ed8bb7
commit 2140caaf67
8 changed files with 230 additions and 23 deletions

View File

@@ -85,6 +85,7 @@
- Telegram: add draft streaming via `sendMessageDraft` with `telegram.streamMode`, plus `/reasoning stream` for draft-only reasoning.
- Telegram: honor `/activation` session mode for group mention gating and clarify group activation docs. Thanks @julianengel for PR #377.
- Telegram: isolate forum topic transcripts per thread and validate Gemini turn ordering in multi-topic sessions. Thanks @hsrvc for PR #407.
- Telegram: render Telegram-safe HTML for outbound formatting and fall back to plain text on parse errors. Thanks @RandyVentures for PR #435.
- iMessage: ignore disconnect errors during shutdown (avoid unhandled promise rejections). Thanks @antons for PR #359.
- Messages: stop defaulting ack reactions to 👀 when identity emoji is missing.
- Auto-reply: require slash for control commands to avoid false triggers in normal text.

View File

@@ -41,6 +41,12 @@ Multi-account support: use `telegram.accounts` with per-account tokens and optio
- Replies always route back to the same Telegram chat.
- Long-polling uses grammY runner with per-chat sequencing; overall concurrency is capped by `agent.maxConcurrent`.
## Formatting (Telegram HTML)
- Outbound Telegram text uses `parse_mode: "HTML"` (Telegrams supported tag subset).
- Markdown-ish input is rendered into **Telegram-safe HTML** (bold/italic/strike/code/links); block elements are flattened to text with newlines/bullets.
- Raw HTML from models is escaped to avoid Telegram parse errors.
- If Telegram rejects the HTML payload, Clawdbot retries the same message as plain text.
## Group activation modes
By default, the bot only responds to mentions in groups (`@botname` or patterns in `routing.groupChat.mentionPatterns`). To change this behavior:

View File

@@ -52,6 +52,7 @@ import type { RuntimeEnv } from "../runtime.js";
import { loadWebMedia } from "../web/media.js";
import { resolveTelegramAccount } from "./accounts.js";
import { createTelegramDraftStream } from "./draft-stream.js";
import { markdownToTelegramHtml } from "./format.js";
import {
readTelegramAllowFromStore,
upsertTelegramPairingRequest,
@@ -1427,9 +1428,10 @@ async function sendTelegramText(
if (threadParams) {
baseParams.message_thread_id = threadParams.message_thread_id;
}
const htmlText = markdownToTelegramHtml(text);
try {
const res = await bot.api.sendMessage(chatId, text, {
parse_mode: "Markdown",
const res = await bot.api.sendMessage(chatId, htmlText, {
parse_mode: "HTML",
...baseParams,
});
return res.message_id;
@@ -1437,7 +1439,7 @@ async function sendTelegramText(
const errText = formatErrorMessage(err);
if (PARSE_ERR_RE.test(errText)) {
runtime.log?.(
`telegram markdown parse failed; retrying without formatting: ${errText}`,
`telegram HTML parse failed; retrying without formatting: ${errText}`,
);
const res = await bot.api.sendMessage(chatId, text, {
...baseParams,

View File

@@ -0,0 +1,50 @@
import { describe, expect, it } from "vitest";
import { markdownToTelegramHtml } from "./format.js";
describe("markdownToTelegramHtml", () => {
it("renders basic inline formatting", () => {
const res = markdownToTelegramHtml("hi _there_ **boss** `code`");
expect(res).toBe("hi <i>there</i> <b>boss</b> <code>code</code>");
});
it("renders links as Telegram-safe HTML", () => {
const res = markdownToTelegramHtml("see [docs](https://example.com)");
expect(res).toBe('see <a href="https://example.com">docs</a>');
});
it("escapes raw HTML", () => {
const res = markdownToTelegramHtml("<b>nope</b>");
expect(res).toBe("&lt;b&gt;nope&lt;/b&gt;");
});
it("escapes unsafe characters", () => {
const res = markdownToTelegramHtml("a & b < c");
expect(res).toBe("a &amp; b &lt; c");
});
it("renders paragraphs with blank lines", () => {
const res = markdownToTelegramHtml("first\n\nsecond");
expect(res).toBe("first\n\nsecond");
});
it("renders lists without block HTML", () => {
const res = markdownToTelegramHtml("- one\n- two");
expect(res).toBe("• one\n• two");
});
it("renders ordered lists with numbering", () => {
const res = markdownToTelegramHtml("2. two\n3. three");
expect(res).toBe("2. two\n3. three");
});
it("flattens headings and blockquotes", () => {
const res = markdownToTelegramHtml("# Title\n\n> Quote");
expect(res).toBe("Title\n\nQuote");
});
it("renders fenced code blocks", () => {
const res = markdownToTelegramHtml("```js\nconst x = 1;\n```");
expect(res).toBe("<pre><code>const x = 1;\n</code></pre>");
});
});

141
src/telegram/format.ts Normal file
View File

@@ -0,0 +1,141 @@
import MarkdownIt from "markdown-it";
type ListState = {
type: "bullet" | "ordered";
index: number;
};
type RenderEnv = {
telegramListStack?: ListState[];
telegramLinkStack?: boolean[];
};
const md = new MarkdownIt({
html: false,
linkify: true,
breaks: false,
typographer: false,
});
md.enable("strikethrough");
const { escapeHtml } = md.utils;
function getListStack(env: RenderEnv): ListState[] {
if (!env.telegramListStack) env.telegramListStack = [];
return env.telegramListStack;
}
function getLinkStack(env: RenderEnv): boolean[] {
if (!env.telegramLinkStack) env.telegramLinkStack = [];
return env.telegramLinkStack;
}
md.renderer.rules.text = (tokens, idx) =>
escapeHtml(tokens[idx]?.content ?? "");
md.renderer.rules.softbreak = () => "\n";
md.renderer.rules.hardbreak = () => "\n";
md.renderer.rules.paragraph_open = () => "";
md.renderer.rules.paragraph_close = (_tokens, _idx, _opts, env) => {
const stack = getListStack(env as RenderEnv);
return stack.length ? "" : "\n\n";
};
md.renderer.rules.heading_open = () => "";
md.renderer.rules.heading_close = () => "\n\n";
md.renderer.rules.blockquote_open = () => "";
md.renderer.rules.blockquote_close = () => "\n";
md.renderer.rules.bullet_list_open = (_tokens, _idx, _opts, env) => {
getListStack(env as RenderEnv).push({ type: "bullet", index: 0 });
return "";
};
md.renderer.rules.bullet_list_close = (_tokens, _idx, _opts, env) => {
getListStack(env as RenderEnv).pop();
return "";
};
md.renderer.rules.ordered_list_open = (tokens, idx, _opts, env) => {
const start = Number(tokens[idx]?.attrGet("start") ?? "1");
getListStack(env as RenderEnv).push({ type: "ordered", index: start - 1 });
return "";
};
md.renderer.rules.ordered_list_close = (_tokens, _idx, _opts, env) => {
getListStack(env as RenderEnv).pop();
return "";
};
md.renderer.rules.list_item_open = (_tokens, _idx, _opts, env) => {
const stack = getListStack(env as RenderEnv);
const top = stack[stack.length - 1];
if (!top) return "";
top.index += 1;
const indent = " ".repeat(Math.max(0, stack.length - 1));
const prefix = top.type === "ordered" ? `${top.index}. ` : "• ";
return `${indent}${prefix}`;
};
md.renderer.rules.list_item_close = () => "\n";
md.renderer.rules.em_open = () => "<i>";
md.renderer.rules.em_close = () => "</i>";
md.renderer.rules.strong_open = () => "<b>";
md.renderer.rules.strong_close = () => "</b>";
md.renderer.rules.s_open = () => "<s>";
md.renderer.rules.s_close = () => "</s>";
md.renderer.rules.code_inline = (tokens, idx) =>
`<code>${escapeHtml(tokens[idx]?.content ?? "")}</code>`;
md.renderer.rules.code_block = (tokens, idx) =>
`<pre><code>${escapeHtml(tokens[idx]?.content ?? "")}</code></pre>\n`;
md.renderer.rules.fence = (tokens, idx) =>
`<pre><code>${escapeHtml(tokens[idx]?.content ?? "")}</code></pre>\n`;
md.renderer.rules.link_open = (tokens, idx, _opts, env) => {
const href = tokens[idx]?.attrGet("href") ?? "";
const safeHref = escapeHtml(href);
const stack = getLinkStack(env as RenderEnv);
const hasHref = Boolean(safeHref);
stack.push(hasHref);
return hasHref ? `<a href="${safeHref}">` : "";
};
md.renderer.rules.link_close = (_tokens, _idx, _opts, env) => {
const stack = getLinkStack(env as RenderEnv);
const hasHref = stack.pop();
return hasHref ? "</a>" : "";
};
md.renderer.rules.image = (tokens, idx) => {
const alt = tokens[idx]?.content ?? "";
return escapeHtml(alt);
};
md.renderer.rules.html_block = (tokens, idx) =>
escapeHtml(tokens[idx]?.content ?? "");
md.renderer.rules.html_inline = (tokens, idx) =>
escapeHtml(tokens[idx]?.content ?? "");
md.renderer.rules.table_open = () => "";
md.renderer.rules.table_close = () => "";
md.renderer.rules.thead_open = () => "";
md.renderer.rules.thead_close = () => "";
md.renderer.rules.tbody_open = () => "";
md.renderer.rules.tbody_close = () => "";
md.renderer.rules.tr_open = () => "";
md.renderer.rules.tr_close = () => "\n";
md.renderer.rules.th_open = () => "";
md.renderer.rules.th_close = () => "\t";
md.renderer.rules.td_open = () => "";
md.renderer.rules.td_close = () => "\t";
md.renderer.rules.hr = () => "\n";
export function markdownToTelegramHtml(markdown: string): string {
const env: RenderEnv = {};
const rendered = md.render(markdown ?? "", env);
return rendered
.replace(/[ \t]+\n/g, "\n")
.replace(/\t+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.trimEnd();
}

View File

@@ -51,7 +51,7 @@ vi.mock("./bot.js", () => ({
const text = ctx.message.text ?? ctx.message.caption ?? "";
if (isGroup && !text.includes("@mybot")) return;
if (!text.trim()) return;
await api.sendMessage(chatId, `echo:${text}`, { parse_mode: "Markdown" });
await api.sendMessage(chatId, `echo:${text}`, { parse_mode: "HTML" });
};
return {
on: vi.fn(),
@@ -102,7 +102,7 @@ describe("monitorTelegramProvider (grammY)", () => {
getFile: vi.fn(async () => ({})),
});
expect(api.sendMessage).toHaveBeenCalledWith(123, "echo:hi", {
parse_mode: "Markdown",
parse_mode: "HTML",
});
});

View File

@@ -15,7 +15,7 @@ describe("sendMessageTelegram", () => {
loadWebMedia.mockReset();
});
it("falls back to plain text when Telegram rejects Markdown", async () => {
it("falls back to plain text when Telegram rejects HTML", async () => {
const chatId = "123";
const parseErr = new Error(
"400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9",
@@ -37,8 +37,8 @@ describe("sendMessageTelegram", () => {
verbose: true,
});
expect(sendMessage).toHaveBeenNthCalledWith(1, chatId, "_oops_", {
parse_mode: "Markdown",
expect(sendMessage).toHaveBeenNthCalledWith(1, chatId, "<i>oops</i>", {
parse_mode: "HTML",
});
expect(sendMessage).toHaveBeenNthCalledWith(2, chatId, "_oops_");
expect(res.chatId).toBe(chatId);
@@ -60,7 +60,7 @@ describe("sendMessageTelegram", () => {
});
expect(sendMessage).toHaveBeenCalledWith("123", "hi", {
parse_mode: "Markdown",
parse_mode: "HTML",
});
});
@@ -175,7 +175,7 @@ describe("sendMessageTelegram", () => {
});
expect(sendMessage).toHaveBeenCalledWith(chatId, "hello forum", {
parse_mode: "Markdown",
parse_mode: "HTML",
message_thread_id: 271,
});
});
@@ -197,7 +197,7 @@ describe("sendMessageTelegram", () => {
});
expect(sendMessage).toHaveBeenCalledWith(chatId, "reply text", {
parse_mode: "Markdown",
parse_mode: "HTML",
reply_to_message_id: 100,
});
});
@@ -220,7 +220,7 @@ describe("sendMessageTelegram", () => {
});
expect(sendMessage).toHaveBeenCalledWith(chatId, "forum reply", {
parse_mode: "Markdown",
parse_mode: "HTML",
message_thread_id: 271,
reply_to_message_id: 500,
});
@@ -249,12 +249,17 @@ describe("sendMessageTelegram", () => {
replyToMessageId: 100,
});
// First call: with Markdown + thread params
expect(sendMessage).toHaveBeenNthCalledWith(1, chatId, "_bad markdown_", {
parse_mode: "Markdown",
message_thread_id: 271,
reply_to_message_id: 100,
});
// First call: with HTML + thread params
expect(sendMessage).toHaveBeenNthCalledWith(
1,
chatId,
"<i>bad markdown</i>",
{
parse_mode: "HTML",
message_thread_id: 271,
reply_to_message_id: 100,
},
);
// Second call: plain text BUT still with thread params (critical!)
expect(sendMessage).toHaveBeenNthCalledWith(2, chatId, "_bad markdown_", {
message_thread_id: 271,

View File

@@ -8,6 +8,7 @@ import { mediaKindFromMime } from "../media/constants.js";
import { isGifMedia } from "../media/mime.js";
import { loadWebMedia } from "../web/media.js";
import { resolveTelegramAccount } from "./accounts.js";
import { markdownToTelegramHtml } from "./format.js";
type TelegramSendOpts = {
token?: string;
@@ -204,20 +205,21 @@ export async function sendMessageTelegram(
if (!text || !text.trim()) {
throw new Error("Message must be non-empty for Telegram sends");
}
const htmlText = markdownToTelegramHtml(text);
const textParams = hasThreadParams
? { parse_mode: "Markdown" as const, ...threadParams }
: { parse_mode: "Markdown" as const };
? { parse_mode: "HTML" as const, ...threadParams }
: { parse_mode: "HTML" as const };
const res = await request(
() => api.sendMessage(chatId, text, textParams),
() => api.sendMessage(chatId, htmlText, textParams),
"message",
).catch(async (err) => {
// Telegram rejects malformed Markdown (e.g., unbalanced '_' or '*').
// Telegram rejects malformed HTML (e.g., unsupported tags or entities).
// When that happens, fall back to plain text so the message still delivers.
const errText = formatErrorMessage(err);
if (PARSE_ERR_RE.test(errText)) {
if (opts.verbose) {
console.warn(
`telegram markdown parse failed, retrying as plain text: ${errText}`,
`telegram HTML parse failed, retrying as plain text: ${errText}`,
);
}
return await request(