diff --git a/CHANGELOG.md b/CHANGELOG.md index 136ef397a..a1028f42b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Gateway/Dev: ensure `pnpm gateway:dev` always uses the dev profile config + state (`~/.clawdbot-dev`). - macOS: fix cron preview/testing payload to use `channel` key. (#867) — thanks @wes-davis. - Telegram: honor `channels.telegram.timeoutSeconds` for grammY API requests. (#863) — thanks @Snaver. +- Telegram: split long captions into media + follow-up text messages. (#907) - thanks @jalehman. ## 2026.1.13 diff --git a/src/telegram/send.caption-split.test.ts b/src/telegram/send.caption-split.test.ts new file mode 100644 index 000000000..64a144c76 --- /dev/null +++ b/src/telegram/send.caption-split.test.ts @@ -0,0 +1,366 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const { botApi, botCtorSpy } = vi.hoisted(() => ({ + botApi: { + sendMessage: vi.fn(), + sendPhoto: vi.fn(), + }, + botCtorSpy: vi.fn(), +})); + +const { loadWebMedia } = vi.hoisted(() => ({ + loadWebMedia: vi.fn(), +})); + +vi.mock("../web/media.js", () => ({ + loadWebMedia, +})); + +vi.mock("grammy", () => ({ + Bot: class { + api = botApi; + constructor( + public token: string, + public options?: { + client?: { fetch?: typeof fetch; timeoutSeconds?: number }; + }, + ) { + botCtorSpy(token, options); + } + }, + InputFile: class {}, +})); + +const { loadConfig } = vi.hoisted(() => ({ + loadConfig: vi.fn(() => ({})), +})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig, + }; +}); + +import { sendMessageTelegram } from "./send.js"; + +describe("sendMessageTelegram caption splitting", () => { + beforeEach(() => { + loadConfig.mockReturnValue({}); + loadWebMedia.mockReset(); + botApi.sendMessage.mockReset(); + botApi.sendPhoto.mockReset(); + botCtorSpy.mockReset(); + }); + + it("splits long captions into media + text messages when text exceeds 1024 chars", async () => { + const chatId = "123"; + // Generate text longer than 1024 characters + const longText = "A".repeat(1100); + + const sendPhoto = vi.fn().mockResolvedValue({ + message_id: 70, + chat: { id: chatId }, + }); + const sendMessage = vi.fn().mockResolvedValue({ + message_id: 71, + chat: { id: chatId }, + }); + const api = { sendPhoto, sendMessage } as unknown as { + sendPhoto: typeof sendPhoto; + sendMessage: typeof sendMessage; + }; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("fake-image"), + contentType: "image/jpeg", + fileName: "photo.jpg", + }); + + const res = await sendMessageTelegram(chatId, longText, { + token: "tok", + api, + mediaUrl: "https://example.com/photo.jpg", + }); + + // Media should be sent first without caption + expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { + caption: undefined, + }); + // Then text sent as separate message (plain text, matching caption behavior) + expect(sendMessage).toHaveBeenCalledWith(chatId, longText); + // Returns the text message ID (the "main" content) + expect(res.messageId).toBe("71"); + }); + + it("uses caption when text is within 1024 char limit", async () => { + const chatId = "123"; + // Text exactly at 1024 characters should still use caption + const shortText = "B".repeat(1024); + + const sendPhoto = vi.fn().mockResolvedValue({ + message_id: 72, + chat: { id: chatId }, + }); + const sendMessage = vi.fn(); + const api = { sendPhoto, sendMessage } as unknown as { + sendPhoto: typeof sendPhoto; + sendMessage: typeof sendMessage; + }; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("fake-image"), + contentType: "image/jpeg", + fileName: "photo.jpg", + }); + + const res = await sendMessageTelegram(chatId, shortText, { + token: "tok", + api, + mediaUrl: "https://example.com/photo.jpg", + }); + + // Caption should be included with media + expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { + caption: shortText, + }); + // No separate text message needed + expect(sendMessage).not.toHaveBeenCalled(); + expect(res.messageId).toBe("72"); + }); + + it("preserves thread params when splitting long captions", async () => { + const chatId = "-1001234567890"; + const longText = "C".repeat(1100); + + const sendPhoto = vi.fn().mockResolvedValue({ + message_id: 73, + chat: { id: chatId }, + }); + const sendMessage = vi.fn().mockResolvedValue({ + message_id: 74, + chat: { id: chatId }, + }); + const api = { sendPhoto, sendMessage } as unknown as { + sendPhoto: typeof sendPhoto; + sendMessage: typeof sendMessage; + }; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("fake-image"), + contentType: "image/jpeg", + fileName: "photo.jpg", + }); + + await sendMessageTelegram(chatId, longText, { + token: "tok", + api, + mediaUrl: "https://example.com/photo.jpg", + messageThreadId: 271, + replyToMessageId: 500, + }); + + // Media sent with thread params but no caption + expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { + caption: undefined, + message_thread_id: 271, + reply_to_message_id: 500, + }); + // Text message also includes thread params (plain text, matching caption behavior) + expect(sendMessage).toHaveBeenCalledWith(chatId, longText, { + message_thread_id: 271, + reply_to_message_id: 500, + }); + }); + + it("puts reply_markup only on follow-up text when splitting", async () => { + const chatId = "123"; + const longText = "D".repeat(1100); + + const sendPhoto = vi.fn().mockResolvedValue({ + message_id: 75, + chat: { id: chatId }, + }); + const sendMessage = vi.fn().mockResolvedValue({ + message_id: 76, + chat: { id: chatId }, + }); + const api = { sendPhoto, sendMessage } as unknown as { + sendPhoto: typeof sendPhoto; + sendMessage: typeof sendMessage; + }; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("fake-image"), + contentType: "image/jpeg", + fileName: "photo.jpg", + }); + + await sendMessageTelegram(chatId, longText, { + token: "tok", + api, + mediaUrl: "https://example.com/photo.jpg", + buttons: [[{ text: "Click me", callback_data: "action:click" }]], + }); + + // Media sent WITHOUT reply_markup + expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { + caption: undefined, + }); + // Follow-up text has the reply_markup + expect(sendMessage).toHaveBeenCalledWith(chatId, longText, { + reply_markup: { + inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]], + }, + }); + }); + + it("includes thread params and reply_markup on follow-up text when splitting", async () => { + const chatId = "-1001234567890"; + const longText = "F".repeat(1100); + + const sendPhoto = vi.fn().mockResolvedValue({ + message_id: 78, + chat: { id: chatId }, + }); + const sendMessage = vi.fn().mockResolvedValue({ + message_id: 79, + chat: { id: chatId }, + }); + const api = { sendPhoto, sendMessage } as unknown as { + sendPhoto: typeof sendPhoto; + sendMessage: typeof sendMessage; + }; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("fake-image"), + contentType: "image/jpeg", + fileName: "photo.jpg", + }); + + await sendMessageTelegram(chatId, longText, { + token: "tok", + api, + mediaUrl: "https://example.com/photo.jpg", + messageThreadId: 271, + replyToMessageId: 500, + buttons: [[{ text: "Click me", callback_data: "action:click" }]], + }); + + expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { + caption: undefined, + message_thread_id: 271, + reply_to_message_id: 500, + }); + expect(sendMessage).toHaveBeenCalledWith(chatId, longText, { + message_thread_id: 271, + reply_to_message_id: 500, + reply_markup: { + inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]], + }, + }); + }); + + it("wraps chat-not-found errors from follow-up message", async () => { + const chatId = "123"; + const longText = "G".repeat(1100); + + const sendPhoto = vi.fn().mockResolvedValue({ + message_id: 80, + chat: { id: chatId }, + }); + const sendMessage = vi + .fn() + .mockRejectedValue(new Error("400: Bad Request: chat not found")); + const api = { sendPhoto, sendMessage } as unknown as { + sendPhoto: typeof sendPhoto; + sendMessage: typeof sendMessage; + }; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("fake-image"), + contentType: "image/jpeg", + fileName: "photo.jpg", + }); + + await expect( + sendMessageTelegram(chatId, longText, { + token: "tok", + api, + mediaUrl: "https://example.com/photo.jpg", + }), + ).rejects.toThrow( + /Telegram send failed: chat not found \(chat_id=123\)\./, + ); + }); + + it("does not send follow-up text when caption is empty", async () => { + const chatId = "123"; + const emptyText = " "; + + const sendPhoto = vi.fn().mockResolvedValue({ + message_id: 81, + chat: { id: chatId }, + }); + const sendMessage = vi.fn(); + const api = { sendPhoto, sendMessage } as unknown as { + sendPhoto: typeof sendPhoto; + sendMessage: typeof sendMessage; + }; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("fake-image"), + contentType: "image/jpeg", + fileName: "photo.jpg", + }); + + const res = await sendMessageTelegram(chatId, emptyText, { + token: "tok", + api, + mediaUrl: "https://example.com/photo.jpg", + }); + + expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { + caption: undefined, + }); + expect(sendMessage).not.toHaveBeenCalled(); + expect(res.messageId).toBe("81"); + }); + + it("keeps reply_markup on media when not splitting", async () => { + const chatId = "123"; + const shortText = "E".repeat(100); + + const sendPhoto = vi.fn().mockResolvedValue({ + message_id: 77, + chat: { id: chatId }, + }); + const sendMessage = vi.fn(); + const api = { sendPhoto, sendMessage } as unknown as { + sendPhoto: typeof sendPhoto; + sendMessage: typeof sendMessage; + }; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("fake-image"), + contentType: "image/jpeg", + fileName: "photo.jpg", + }); + + await sendMessageTelegram(chatId, shortText, { + token: "tok", + api, + mediaUrl: "https://example.com/photo.jpg", + buttons: [[{ text: "Click me", callback_data: "action:click" }]], + }); + + // Media sent WITH reply_markup when not splitting + expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { + caption: shortText, + reply_markup: { + inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]], + }, + }); + expect(sendMessage).not.toHaveBeenCalled(); + }); +}); diff --git a/src/telegram/send.ts b/src/telegram/send.ts index 2e5096004..291c73017 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -54,6 +54,10 @@ type TelegramReactionOpts = { const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i; +// Telegram limits media captions to 1024 characters. +// Text beyond this must be sent as a separate follow-up message. +const TELEGRAM_MAX_CAPTION_LENGTH = 1024; + function resolveToken(explicit: string | undefined, params: { accountId: string; token: string }) { if (explicit?.trim()) return explicit.trim(); if (!params.token) { @@ -195,16 +199,22 @@ export async function sendMessageTelegram( }); const fileName = media.fileName ?? (isGif ? "animation.gif" : inferFilename(kind)) ?? "file"; const file = new InputFile(media.buffer, fileName); - const caption = text?.trim() || undefined; + const trimmedText = text?.trim() || ""; + // If text exceeds Telegram's caption limit, send media without caption + // then send text as a separate follow-up message. + const needsSeparateText = trimmedText.length > TELEGRAM_MAX_CAPTION_LENGTH; + const caption = needsSeparateText ? undefined : trimmedText || undefined; + // When splitting, put reply_markup only on the follow-up text (the "main" content), + // not on the media message. const mediaParams = hasThreadParams ? { caption, ...threadParams, - ...(replyMarkup ? { reply_markup: replyMarkup } : {}), + ...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}), } : { caption, - ...(replyMarkup ? { reply_markup: replyMarkup } : {}), + ...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}), }; let result: | Awaited> @@ -258,13 +268,41 @@ export async function sendMessageTelegram( }, ); } - const messageId = String(result?.message_id ?? "unknown"); + const mediaMessageId = String(result?.message_id ?? "unknown"); + const resolvedChatId = String(result?.chat?.id ?? chatId); recordChannelActivity({ channel: "telegram", accountId: account.accountId, direction: "outbound", }); - return { messageId, chatId: String(result?.chat?.id ?? chatId) }; + + // If text was too long for a caption, send it as a separate follow-up message. + // Use plain text to match caption behavior (captions don't use HTML conversion). + if (needsSeparateText && trimmedText) { + const textParams = + hasThreadParams || replyMarkup + ? { + ...threadParams, + ...(replyMarkup ? { reply_markup: replyMarkup } : {}), + } + : undefined; + const textRes = await request( + () => + textParams + ? api.sendMessage(chatId, trimmedText, textParams) + : api.sendMessage(chatId, trimmedText), + "message", + ).catch((err) => { + throw wrapChatNotFound(err); + }); + // Return the text message ID as the "main" message (it's the actual content). + return { + messageId: String(textRes?.message_id ?? mediaMessageId), + chatId: resolvedChatId, + }; + } + + return { messageId: mediaMessageId, chatId: resolvedChatId }; } if (!text || !text.trim()) {