fix: telegram html formatting (#435, thanks @RandyVentures)

This commit is contained in:
Peter Steinberger
2026-01-08 02:34:32 +01:00
parent 61f5ed8bb7
commit 2140caaf67
8 changed files with 230 additions and 23 deletions

View File

@@ -85,6 +85,7 @@
- Telegram: add draft streaming via `sendMessageDraft` with `telegram.streamMode`, plus `/reasoning stream` for draft-only reasoning. - Telegram: add draft streaming via `sendMessageDraft` with `telegram.streamMode`, plus `/reasoning stream` for draft-only reasoning.
- Telegram: honor `/activation` session mode for group mention gating and clarify group activation docs. Thanks @julianengel for PR #377. - Telegram: honor `/activation` session mode for group mention gating and clarify group activation docs. Thanks @julianengel for PR #377.
- Telegram: isolate forum topic transcripts per thread and validate Gemini turn ordering in multi-topic sessions. Thanks @hsrvc for PR #407. - Telegram: isolate forum topic transcripts per thread and validate Gemini turn ordering in multi-topic sessions. Thanks @hsrvc for PR #407.
- Telegram: render Telegram-safe HTML for outbound formatting and fall back to plain text on parse errors. Thanks @RandyVentures for PR #435.
- iMessage: ignore disconnect errors during shutdown (avoid unhandled promise rejections). Thanks @antons for PR #359. - iMessage: ignore disconnect errors during shutdown (avoid unhandled promise rejections). Thanks @antons for PR #359.
- Messages: stop defaulting ack reactions to 👀 when identity emoji is missing. - Messages: stop defaulting ack reactions to 👀 when identity emoji is missing.
- Auto-reply: require slash for control commands to avoid false triggers in normal text. - Auto-reply: require slash for control commands to avoid false triggers in normal text.

View File

@@ -41,6 +41,12 @@ Multi-account support: use `telegram.accounts` with per-account tokens and optio
- Replies always route back to the same Telegram chat. - Replies always route back to the same Telegram chat.
- Long-polling uses grammY runner with per-chat sequencing; overall concurrency is capped by `agent.maxConcurrent`. - Long-polling uses grammY runner with per-chat sequencing; overall concurrency is capped by `agent.maxConcurrent`.
## Formatting (Telegram HTML)
- Outbound Telegram text uses `parse_mode: "HTML"` (Telegrams supported tag subset).
- Markdown-ish input is rendered into **Telegram-safe HTML** (bold/italic/strike/code/links); block elements are flattened to text with newlines/bullets.
- Raw HTML from models is escaped to avoid Telegram parse errors.
- If Telegram rejects the HTML payload, Clawdbot retries the same message as plain text.
## Group activation modes ## Group activation modes
By default, the bot only responds to mentions in groups (`@botname` or patterns in `routing.groupChat.mentionPatterns`). To change this behavior: By default, the bot only responds to mentions in groups (`@botname` or patterns in `routing.groupChat.mentionPatterns`). To change this behavior:

View File

@@ -52,6 +52,7 @@ import type { RuntimeEnv } from "../runtime.js";
import { loadWebMedia } from "../web/media.js"; import { loadWebMedia } from "../web/media.js";
import { resolveTelegramAccount } from "./accounts.js"; import { resolveTelegramAccount } from "./accounts.js";
import { createTelegramDraftStream } from "./draft-stream.js"; import { createTelegramDraftStream } from "./draft-stream.js";
import { markdownToTelegramHtml } from "./format.js";
import { import {
readTelegramAllowFromStore, readTelegramAllowFromStore,
upsertTelegramPairingRequest, upsertTelegramPairingRequest,
@@ -1427,9 +1428,10 @@ async function sendTelegramText(
if (threadParams) { if (threadParams) {
baseParams.message_thread_id = threadParams.message_thread_id; baseParams.message_thread_id = threadParams.message_thread_id;
} }
const htmlText = markdownToTelegramHtml(text);
try { try {
const res = await bot.api.sendMessage(chatId, text, { const res = await bot.api.sendMessage(chatId, htmlText, {
parse_mode: "Markdown", parse_mode: "HTML",
...baseParams, ...baseParams,
}); });
return res.message_id; return res.message_id;
@@ -1437,7 +1439,7 @@ async function sendTelegramText(
const errText = formatErrorMessage(err); const errText = formatErrorMessage(err);
if (PARSE_ERR_RE.test(errText)) { if (PARSE_ERR_RE.test(errText)) {
runtime.log?.( runtime.log?.(
`telegram markdown parse failed; retrying without formatting: ${errText}`, `telegram HTML parse failed; retrying without formatting: ${errText}`,
); );
const res = await bot.api.sendMessage(chatId, text, { const res = await bot.api.sendMessage(chatId, text, {
...baseParams, ...baseParams,

View File

@@ -0,0 +1,50 @@
import { describe, expect, it } from "vitest";
import { markdownToTelegramHtml } from "./format.js";
describe("markdownToTelegramHtml", () => {
it("renders basic inline formatting", () => {
const res = markdownToTelegramHtml("hi _there_ **boss** `code`");
expect(res).toBe("hi <i>there</i> <b>boss</b> <code>code</code>");
});
it("renders links as Telegram-safe HTML", () => {
const res = markdownToTelegramHtml("see [docs](https://example.com)");
expect(res).toBe('see <a href="https://example.com">docs</a>');
});
it("escapes raw HTML", () => {
const res = markdownToTelegramHtml("<b>nope</b>");
expect(res).toBe("&lt;b&gt;nope&lt;/b&gt;");
});
it("escapes unsafe characters", () => {
const res = markdownToTelegramHtml("a & b < c");
expect(res).toBe("a &amp; b &lt; c");
});
it("renders paragraphs with blank lines", () => {
const res = markdownToTelegramHtml("first\n\nsecond");
expect(res).toBe("first\n\nsecond");
});
it("renders lists without block HTML", () => {
const res = markdownToTelegramHtml("- one\n- two");
expect(res).toBe("• one\n• two");
});
it("renders ordered lists with numbering", () => {
const res = markdownToTelegramHtml("2. two\n3. three");
expect(res).toBe("2. two\n3. three");
});
it("flattens headings and blockquotes", () => {
const res = markdownToTelegramHtml("# Title\n\n> Quote");
expect(res).toBe("Title\n\nQuote");
});
it("renders fenced code blocks", () => {
const res = markdownToTelegramHtml("```js\nconst x = 1;\n```");
expect(res).toBe("<pre><code>const x = 1;\n</code></pre>");
});
});

141
src/telegram/format.ts Normal file
View File

@@ -0,0 +1,141 @@
import MarkdownIt from "markdown-it";
type ListState = {
type: "bullet" | "ordered";
index: number;
};
type RenderEnv = {
telegramListStack?: ListState[];
telegramLinkStack?: boolean[];
};
const md = new MarkdownIt({
html: false,
linkify: true,
breaks: false,
typographer: false,
});
md.enable("strikethrough");
const { escapeHtml } = md.utils;
function getListStack(env: RenderEnv): ListState[] {
if (!env.telegramListStack) env.telegramListStack = [];
return env.telegramListStack;
}
function getLinkStack(env: RenderEnv): boolean[] {
if (!env.telegramLinkStack) env.telegramLinkStack = [];
return env.telegramLinkStack;
}
md.renderer.rules.text = (tokens, idx) =>
escapeHtml(tokens[idx]?.content ?? "");
md.renderer.rules.softbreak = () => "\n";
md.renderer.rules.hardbreak = () => "\n";
md.renderer.rules.paragraph_open = () => "";
md.renderer.rules.paragraph_close = (_tokens, _idx, _opts, env) => {
const stack = getListStack(env as RenderEnv);
return stack.length ? "" : "\n\n";
};
md.renderer.rules.heading_open = () => "";
md.renderer.rules.heading_close = () => "\n\n";
md.renderer.rules.blockquote_open = () => "";
md.renderer.rules.blockquote_close = () => "\n";
md.renderer.rules.bullet_list_open = (_tokens, _idx, _opts, env) => {
getListStack(env as RenderEnv).push({ type: "bullet", index: 0 });
return "";
};
md.renderer.rules.bullet_list_close = (_tokens, _idx, _opts, env) => {
getListStack(env as RenderEnv).pop();
return "";
};
md.renderer.rules.ordered_list_open = (tokens, idx, _opts, env) => {
const start = Number(tokens[idx]?.attrGet("start") ?? "1");
getListStack(env as RenderEnv).push({ type: "ordered", index: start - 1 });
return "";
};
md.renderer.rules.ordered_list_close = (_tokens, _idx, _opts, env) => {
getListStack(env as RenderEnv).pop();
return "";
};
md.renderer.rules.list_item_open = (_tokens, _idx, _opts, env) => {
const stack = getListStack(env as RenderEnv);
const top = stack[stack.length - 1];
if (!top) return "";
top.index += 1;
const indent = " ".repeat(Math.max(0, stack.length - 1));
const prefix = top.type === "ordered" ? `${top.index}. ` : "• ";
return `${indent}${prefix}`;
};
md.renderer.rules.list_item_close = () => "\n";
md.renderer.rules.em_open = () => "<i>";
md.renderer.rules.em_close = () => "</i>";
md.renderer.rules.strong_open = () => "<b>";
md.renderer.rules.strong_close = () => "</b>";
md.renderer.rules.s_open = () => "<s>";
md.renderer.rules.s_close = () => "</s>";
md.renderer.rules.code_inline = (tokens, idx) =>
`<code>${escapeHtml(tokens[idx]?.content ?? "")}</code>`;
md.renderer.rules.code_block = (tokens, idx) =>
`<pre><code>${escapeHtml(tokens[idx]?.content ?? "")}</code></pre>\n`;
md.renderer.rules.fence = (tokens, idx) =>
`<pre><code>${escapeHtml(tokens[idx]?.content ?? "")}</code></pre>\n`;
md.renderer.rules.link_open = (tokens, idx, _opts, env) => {
const href = tokens[idx]?.attrGet("href") ?? "";
const safeHref = escapeHtml(href);
const stack = getLinkStack(env as RenderEnv);
const hasHref = Boolean(safeHref);
stack.push(hasHref);
return hasHref ? `<a href="${safeHref}">` : "";
};
md.renderer.rules.link_close = (_tokens, _idx, _opts, env) => {
const stack = getLinkStack(env as RenderEnv);
const hasHref = stack.pop();
return hasHref ? "</a>" : "";
};
md.renderer.rules.image = (tokens, idx) => {
const alt = tokens[idx]?.content ?? "";
return escapeHtml(alt);
};
md.renderer.rules.html_block = (tokens, idx) =>
escapeHtml(tokens[idx]?.content ?? "");
md.renderer.rules.html_inline = (tokens, idx) =>
escapeHtml(tokens[idx]?.content ?? "");
md.renderer.rules.table_open = () => "";
md.renderer.rules.table_close = () => "";
md.renderer.rules.thead_open = () => "";
md.renderer.rules.thead_close = () => "";
md.renderer.rules.tbody_open = () => "";
md.renderer.rules.tbody_close = () => "";
md.renderer.rules.tr_open = () => "";
md.renderer.rules.tr_close = () => "\n";
md.renderer.rules.th_open = () => "";
md.renderer.rules.th_close = () => "\t";
md.renderer.rules.td_open = () => "";
md.renderer.rules.td_close = () => "\t";
md.renderer.rules.hr = () => "\n";
export function markdownToTelegramHtml(markdown: string): string {
const env: RenderEnv = {};
const rendered = md.render(markdown ?? "", env);
return rendered
.replace(/[ \t]+\n/g, "\n")
.replace(/\t+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.trimEnd();
}

View File

@@ -51,7 +51,7 @@ vi.mock("./bot.js", () => ({
const text = ctx.message.text ?? ctx.message.caption ?? ""; const text = ctx.message.text ?? ctx.message.caption ?? "";
if (isGroup && !text.includes("@mybot")) return; if (isGroup && !text.includes("@mybot")) return;
if (!text.trim()) return; if (!text.trim()) return;
await api.sendMessage(chatId, `echo:${text}`, { parse_mode: "Markdown" }); await api.sendMessage(chatId, `echo:${text}`, { parse_mode: "HTML" });
}; };
return { return {
on: vi.fn(), on: vi.fn(),
@@ -102,7 +102,7 @@ describe("monitorTelegramProvider (grammY)", () => {
getFile: vi.fn(async () => ({})), getFile: vi.fn(async () => ({})),
}); });
expect(api.sendMessage).toHaveBeenCalledWith(123, "echo:hi", { expect(api.sendMessage).toHaveBeenCalledWith(123, "echo:hi", {
parse_mode: "Markdown", parse_mode: "HTML",
}); });
}); });

View File

@@ -15,7 +15,7 @@ describe("sendMessageTelegram", () => {
loadWebMedia.mockReset(); loadWebMedia.mockReset();
}); });
it("falls back to plain text when Telegram rejects Markdown", async () => { it("falls back to plain text when Telegram rejects HTML", async () => {
const chatId = "123"; const chatId = "123";
const parseErr = new Error( const parseErr = new Error(
"400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9", "400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9",
@@ -37,8 +37,8 @@ describe("sendMessageTelegram", () => {
verbose: true, verbose: true,
}); });
expect(sendMessage).toHaveBeenNthCalledWith(1, chatId, "_oops_", { expect(sendMessage).toHaveBeenNthCalledWith(1, chatId, "<i>oops</i>", {
parse_mode: "Markdown", parse_mode: "HTML",
}); });
expect(sendMessage).toHaveBeenNthCalledWith(2, chatId, "_oops_"); expect(sendMessage).toHaveBeenNthCalledWith(2, chatId, "_oops_");
expect(res.chatId).toBe(chatId); expect(res.chatId).toBe(chatId);
@@ -60,7 +60,7 @@ describe("sendMessageTelegram", () => {
}); });
expect(sendMessage).toHaveBeenCalledWith("123", "hi", { expect(sendMessage).toHaveBeenCalledWith("123", "hi", {
parse_mode: "Markdown", parse_mode: "HTML",
}); });
}); });
@@ -175,7 +175,7 @@ describe("sendMessageTelegram", () => {
}); });
expect(sendMessage).toHaveBeenCalledWith(chatId, "hello forum", { expect(sendMessage).toHaveBeenCalledWith(chatId, "hello forum", {
parse_mode: "Markdown", parse_mode: "HTML",
message_thread_id: 271, message_thread_id: 271,
}); });
}); });
@@ -197,7 +197,7 @@ describe("sendMessageTelegram", () => {
}); });
expect(sendMessage).toHaveBeenCalledWith(chatId, "reply text", { expect(sendMessage).toHaveBeenCalledWith(chatId, "reply text", {
parse_mode: "Markdown", parse_mode: "HTML",
reply_to_message_id: 100, reply_to_message_id: 100,
}); });
}); });
@@ -220,7 +220,7 @@ describe("sendMessageTelegram", () => {
}); });
expect(sendMessage).toHaveBeenCalledWith(chatId, "forum reply", { expect(sendMessage).toHaveBeenCalledWith(chatId, "forum reply", {
parse_mode: "Markdown", parse_mode: "HTML",
message_thread_id: 271, message_thread_id: 271,
reply_to_message_id: 500, reply_to_message_id: 500,
}); });
@@ -249,12 +249,17 @@ describe("sendMessageTelegram", () => {
replyToMessageId: 100, replyToMessageId: 100,
}); });
// First call: with Markdown + thread params // First call: with HTML + thread params
expect(sendMessage).toHaveBeenNthCalledWith(1, chatId, "_bad markdown_", { expect(sendMessage).toHaveBeenNthCalledWith(
parse_mode: "Markdown", 1,
message_thread_id: 271, chatId,
reply_to_message_id: 100, "<i>bad markdown</i>",
}); {
parse_mode: "HTML",
message_thread_id: 271,
reply_to_message_id: 100,
},
);
// Second call: plain text BUT still with thread params (critical!) // Second call: plain text BUT still with thread params (critical!)
expect(sendMessage).toHaveBeenNthCalledWith(2, chatId, "_bad markdown_", { expect(sendMessage).toHaveBeenNthCalledWith(2, chatId, "_bad markdown_", {
message_thread_id: 271, message_thread_id: 271,

View File

@@ -8,6 +8,7 @@ import { mediaKindFromMime } from "../media/constants.js";
import { isGifMedia } from "../media/mime.js"; import { isGifMedia } from "../media/mime.js";
import { loadWebMedia } from "../web/media.js"; import { loadWebMedia } from "../web/media.js";
import { resolveTelegramAccount } from "./accounts.js"; import { resolveTelegramAccount } from "./accounts.js";
import { markdownToTelegramHtml } from "./format.js";
type TelegramSendOpts = { type TelegramSendOpts = {
token?: string; token?: string;
@@ -204,20 +205,21 @@ export async function sendMessageTelegram(
if (!text || !text.trim()) { if (!text || !text.trim()) {
throw new Error("Message must be non-empty for Telegram sends"); throw new Error("Message must be non-empty for Telegram sends");
} }
const htmlText = markdownToTelegramHtml(text);
const textParams = hasThreadParams const textParams = hasThreadParams
? { parse_mode: "Markdown" as const, ...threadParams } ? { parse_mode: "HTML" as const, ...threadParams }
: { parse_mode: "Markdown" as const }; : { parse_mode: "HTML" as const };
const res = await request( const res = await request(
() => api.sendMessage(chatId, text, textParams), () => api.sendMessage(chatId, htmlText, textParams),
"message", "message",
).catch(async (err) => { ).catch(async (err) => {
// Telegram rejects malformed Markdown (e.g., unbalanced '_' or '*'). // Telegram rejects malformed HTML (e.g., unsupported tags or entities).
// When that happens, fall back to plain text so the message still delivers. // When that happens, fall back to plain text so the message still delivers.
const errText = formatErrorMessage(err); const errText = formatErrorMessage(err);
if (PARSE_ERR_RE.test(errText)) { if (PARSE_ERR_RE.test(errText)) {
if (opts.verbose) { if (opts.verbose) {
console.warn( console.warn(
`telegram markdown parse failed, retrying as plain text: ${errText}`, `telegram HTML parse failed, retrying as plain text: ${errText}`,
); );
} }
return await request( return await request(