fix: split long Telegram captions (#907) - thanks @jalehman

Co-authored-by: Josh Lehman <josh@martian.engineering>
This commit is contained in:
Peter Steinberger
2026-01-14 15:52:54 +00:00
parent 4e837cfa2d
commit 53465a4d2d
3 changed files with 410 additions and 5 deletions

View File

@@ -9,6 +9,7 @@
- Gateway/Dev: ensure `pnpm gateway:dev` always uses the dev profile config + state (`~/.clawdbot-dev`).
- macOS: fix cron preview/testing payload to use `channel` key. (#867) — thanks @wes-davis.
- Telegram: honor `channels.telegram.timeoutSeconds` for grammY API requests. (#863) — thanks @Snaver.
- Telegram: split long captions into media + follow-up text messages. (#907) - thanks @jalehman.
## 2026.1.13

View File

@@ -0,0 +1,366 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const { botApi, botCtorSpy } = vi.hoisted(() => ({
botApi: {
sendMessage: vi.fn(),
sendPhoto: vi.fn(),
},
botCtorSpy: vi.fn(),
}));
const { loadWebMedia } = vi.hoisted(() => ({
loadWebMedia: vi.fn(),
}));
vi.mock("../web/media.js", () => ({
loadWebMedia,
}));
vi.mock("grammy", () => ({
Bot: class {
api = botApi;
constructor(
public token: string,
public options?: {
client?: { fetch?: typeof fetch; timeoutSeconds?: number };
},
) {
botCtorSpy(token, options);
}
},
InputFile: class {},
}));
const { loadConfig } = vi.hoisted(() => ({
loadConfig: vi.fn(() => ({})),
}));
vi.mock("../config/config.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../config/config.js")>();
return {
...actual,
loadConfig,
};
});
import { sendMessageTelegram } from "./send.js";
describe("sendMessageTelegram caption splitting", () => {
beforeEach(() => {
loadConfig.mockReturnValue({});
loadWebMedia.mockReset();
botApi.sendMessage.mockReset();
botApi.sendPhoto.mockReset();
botCtorSpy.mockReset();
});
it("splits long captions into media + text messages when text exceeds 1024 chars", async () => {
const chatId = "123";
// Generate text longer than 1024 characters
const longText = "A".repeat(1100);
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 70,
chat: { id: chatId },
});
const sendMessage = vi.fn().mockResolvedValue({
message_id: 71,
chat: { id: chatId },
});
const api = { sendPhoto, sendMessage } as unknown as {
sendPhoto: typeof sendPhoto;
sendMessage: typeof sendMessage;
};
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("fake-image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
const res = await sendMessageTelegram(chatId, longText, {
token: "tok",
api,
mediaUrl: "https://example.com/photo.jpg",
});
// Media should be sent first without caption
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: undefined,
});
// Then text sent as separate message (plain text, matching caption behavior)
expect(sendMessage).toHaveBeenCalledWith(chatId, longText);
// Returns the text message ID (the "main" content)
expect(res.messageId).toBe("71");
});
it("uses caption when text is within 1024 char limit", async () => {
const chatId = "123";
// Text exactly at 1024 characters should still use caption
const shortText = "B".repeat(1024);
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 72,
chat: { id: chatId },
});
const sendMessage = vi.fn();
const api = { sendPhoto, sendMessage } as unknown as {
sendPhoto: typeof sendPhoto;
sendMessage: typeof sendMessage;
};
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("fake-image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
const res = await sendMessageTelegram(chatId, shortText, {
token: "tok",
api,
mediaUrl: "https://example.com/photo.jpg",
});
// Caption should be included with media
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: shortText,
});
// No separate text message needed
expect(sendMessage).not.toHaveBeenCalled();
expect(res.messageId).toBe("72");
});
it("preserves thread params when splitting long captions", async () => {
const chatId = "-1001234567890";
const longText = "C".repeat(1100);
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 73,
chat: { id: chatId },
});
const sendMessage = vi.fn().mockResolvedValue({
message_id: 74,
chat: { id: chatId },
});
const api = { sendPhoto, sendMessage } as unknown as {
sendPhoto: typeof sendPhoto;
sendMessage: typeof sendMessage;
};
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("fake-image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
await sendMessageTelegram(chatId, longText, {
token: "tok",
api,
mediaUrl: "https://example.com/photo.jpg",
messageThreadId: 271,
replyToMessageId: 500,
});
// Media sent with thread params but no caption
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: undefined,
message_thread_id: 271,
reply_to_message_id: 500,
});
// Text message also includes thread params (plain text, matching caption behavior)
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
message_thread_id: 271,
reply_to_message_id: 500,
});
});
it("puts reply_markup only on follow-up text when splitting", async () => {
const chatId = "123";
const longText = "D".repeat(1100);
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 75,
chat: { id: chatId },
});
const sendMessage = vi.fn().mockResolvedValue({
message_id: 76,
chat: { id: chatId },
});
const api = { sendPhoto, sendMessage } as unknown as {
sendPhoto: typeof sendPhoto;
sendMessage: typeof sendMessage;
};
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("fake-image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
await sendMessageTelegram(chatId, longText, {
token: "tok",
api,
mediaUrl: "https://example.com/photo.jpg",
buttons: [[{ text: "Click me", callback_data: "action:click" }]],
});
// Media sent WITHOUT reply_markup
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: undefined,
});
// Follow-up text has the reply_markup
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
reply_markup: {
inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]],
},
});
});
it("includes thread params and reply_markup on follow-up text when splitting", async () => {
const chatId = "-1001234567890";
const longText = "F".repeat(1100);
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 78,
chat: { id: chatId },
});
const sendMessage = vi.fn().mockResolvedValue({
message_id: 79,
chat: { id: chatId },
});
const api = { sendPhoto, sendMessage } as unknown as {
sendPhoto: typeof sendPhoto;
sendMessage: typeof sendMessage;
};
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("fake-image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
await sendMessageTelegram(chatId, longText, {
token: "tok",
api,
mediaUrl: "https://example.com/photo.jpg",
messageThreadId: 271,
replyToMessageId: 500,
buttons: [[{ text: "Click me", callback_data: "action:click" }]],
});
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: undefined,
message_thread_id: 271,
reply_to_message_id: 500,
});
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
message_thread_id: 271,
reply_to_message_id: 500,
reply_markup: {
inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]],
},
});
});
it("wraps chat-not-found errors from follow-up message", async () => {
const chatId = "123";
const longText = "G".repeat(1100);
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 80,
chat: { id: chatId },
});
const sendMessage = vi
.fn()
.mockRejectedValue(new Error("400: Bad Request: chat not found"));
const api = { sendPhoto, sendMessage } as unknown as {
sendPhoto: typeof sendPhoto;
sendMessage: typeof sendMessage;
};
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("fake-image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
await expect(
sendMessageTelegram(chatId, longText, {
token: "tok",
api,
mediaUrl: "https://example.com/photo.jpg",
}),
).rejects.toThrow(
/Telegram send failed: chat not found \(chat_id=123\)\./,
);
});
it("does not send follow-up text when caption is empty", async () => {
const chatId = "123";
const emptyText = " ";
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 81,
chat: { id: chatId },
});
const sendMessage = vi.fn();
const api = { sendPhoto, sendMessage } as unknown as {
sendPhoto: typeof sendPhoto;
sendMessage: typeof sendMessage;
};
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("fake-image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
const res = await sendMessageTelegram(chatId, emptyText, {
token: "tok",
api,
mediaUrl: "https://example.com/photo.jpg",
});
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: undefined,
});
expect(sendMessage).not.toHaveBeenCalled();
expect(res.messageId).toBe("81");
});
it("keeps reply_markup on media when not splitting", async () => {
const chatId = "123";
const shortText = "E".repeat(100);
const sendPhoto = vi.fn().mockResolvedValue({
message_id: 77,
chat: { id: chatId },
});
const sendMessage = vi.fn();
const api = { sendPhoto, sendMessage } as unknown as {
sendPhoto: typeof sendPhoto;
sendMessage: typeof sendMessage;
};
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("fake-image"),
contentType: "image/jpeg",
fileName: "photo.jpg",
});
await sendMessageTelegram(chatId, shortText, {
token: "tok",
api,
mediaUrl: "https://example.com/photo.jpg",
buttons: [[{ text: "Click me", callback_data: "action:click" }]],
});
// Media sent WITH reply_markup when not splitting
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: shortText,
reply_markup: {
inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]],
},
});
expect(sendMessage).not.toHaveBeenCalled();
});
});

View File

@@ -54,6 +54,10 @@ type TelegramReactionOpts = {
const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i;
// Telegram limits media captions to 1024 characters.
// Text beyond this must be sent as a separate follow-up message.
const TELEGRAM_MAX_CAPTION_LENGTH = 1024;
function resolveToken(explicit: string | undefined, params: { accountId: string; token: string }) {
if (explicit?.trim()) return explicit.trim();
if (!params.token) {
@@ -195,16 +199,22 @@ export async function sendMessageTelegram(
});
const fileName = media.fileName ?? (isGif ? "animation.gif" : inferFilename(kind)) ?? "file";
const file = new InputFile(media.buffer, fileName);
const caption = text?.trim() || undefined;
const trimmedText = text?.trim() || "";
// If text exceeds Telegram's caption limit, send media without caption
// then send text as a separate follow-up message.
const needsSeparateText = trimmedText.length > TELEGRAM_MAX_CAPTION_LENGTH;
const caption = needsSeparateText ? undefined : trimmedText || undefined;
// When splitting, put reply_markup only on the follow-up text (the "main" content),
// not on the media message.
const mediaParams = hasThreadParams
? {
caption,
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: {
caption,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}),
};
let result:
| Awaited<ReturnType<typeof api.sendPhoto>>
@@ -258,13 +268,41 @@ export async function sendMessageTelegram(
},
);
}
const messageId = String(result?.message_id ?? "unknown");
const mediaMessageId = String(result?.message_id ?? "unknown");
const resolvedChatId = String(result?.chat?.id ?? chatId);
recordChannelActivity({
channel: "telegram",
accountId: account.accountId,
direction: "outbound",
});
return { messageId, chatId: String(result?.chat?.id ?? chatId) };
// If text was too long for a caption, send it as a separate follow-up message.
// Use plain text to match caption behavior (captions don't use HTML conversion).
if (needsSeparateText && trimmedText) {
const textParams =
hasThreadParams || replyMarkup
? {
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
const textRes = await request(
() =>
textParams
? api.sendMessage(chatId, trimmedText, textParams)
: api.sendMessage(chatId, trimmedText),
"message",
).catch((err) => {
throw wrapChatNotFound(err);
});
// Return the text message ID as the "main" message (it's the actual content).
return {
messageId: String(textRes?.message_id ?? mediaMessageId),
chatId: resolvedChatId,
};
}
return { messageId: mediaMessageId, chatId: resolvedChatId };
}
if (!text || !text.trim()) {