feat(telegram): support media groups (multi-image messages) (#220)
This commit is contained in:
@@ -10,6 +10,9 @@ export type MsgContext = {
|
||||
MediaPath?: string;
|
||||
MediaUrl?: string;
|
||||
MediaType?: string;
|
||||
MediaPaths?: string[];
|
||||
MediaUrls?: string[];
|
||||
MediaTypes?: string[];
|
||||
Transcript?: string;
|
||||
ChatType?: string;
|
||||
GroupSubject?: string;
|
||||
|
||||
@@ -209,3 +209,135 @@ describe("telegram inbound media", () => {
|
||||
fetchSpy.mockRestore();
|
||||
});
|
||||
});
|
||||
|
||||
describe("telegram media groups", () => {
|
||||
const waitForMediaGroupProcessing = () =>
|
||||
new Promise((resolve) => setTimeout(resolve, 600));
|
||||
|
||||
it("buffers messages with same media_group_id and processes them together", async () => {
|
||||
const { createTelegramBot } = await import("./bot.js");
|
||||
const replyModule = await import("../auto-reply/reply.js");
|
||||
const replySpy = replyModule.__replySpy as unknown as ReturnType<
|
||||
typeof vi.fn
|
||||
>;
|
||||
|
||||
onSpy.mockReset();
|
||||
replySpy.mockReset();
|
||||
|
||||
const runtimeError = vi.fn();
|
||||
const fetchSpy = vi.spyOn(globalThis, "fetch" as never).mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: "OK",
|
||||
headers: { get: () => "image/png" },
|
||||
arrayBuffer: async () => new Uint8Array([0x89, 0x50, 0x4e, 0x47]).buffer,
|
||||
} as Response);
|
||||
|
||||
createTelegramBot({
|
||||
token: "tok",
|
||||
runtime: {
|
||||
log: vi.fn(),
|
||||
error: runtimeError,
|
||||
exit: () => {
|
||||
throw new Error("exit");
|
||||
},
|
||||
},
|
||||
});
|
||||
const handler = onSpy.mock.calls[0][1] as (
|
||||
ctx: Record<string, unknown>,
|
||||
) => Promise<void>;
|
||||
|
||||
await handler({
|
||||
message: {
|
||||
chat: { id: 42, type: "private" },
|
||||
message_id: 1,
|
||||
caption: "Here are my photos",
|
||||
date: 1736380800,
|
||||
media_group_id: "album123",
|
||||
photo: [{ file_id: "photo1" }],
|
||||
},
|
||||
me: { username: "clawdbot_bot" },
|
||||
getFile: async () => ({ file_path: "photos/photo1.jpg" }),
|
||||
});
|
||||
|
||||
await handler({
|
||||
message: {
|
||||
chat: { id: 42, type: "private" },
|
||||
message_id: 2,
|
||||
date: 1736380801,
|
||||
media_group_id: "album123",
|
||||
photo: [{ file_id: "photo2" }],
|
||||
},
|
||||
me: { username: "clawdbot_bot" },
|
||||
getFile: async () => ({ file_path: "photos/photo2.jpg" }),
|
||||
});
|
||||
|
||||
expect(replySpy).not.toHaveBeenCalled();
|
||||
await waitForMediaGroupProcessing();
|
||||
|
||||
expect(runtimeError).not.toHaveBeenCalled();
|
||||
expect(replySpy).toHaveBeenCalledTimes(1);
|
||||
const payload = replySpy.mock.calls[0][0];
|
||||
expect(payload.Body).toContain("Here are my photos");
|
||||
expect(payload.MediaPaths).toHaveLength(2);
|
||||
|
||||
fetchSpy.mockRestore();
|
||||
}, 2000);
|
||||
|
||||
it("processes separate media groups independently", async () => {
|
||||
const { createTelegramBot } = await import("./bot.js");
|
||||
const replyModule = await import("../auto-reply/reply.js");
|
||||
const replySpy = replyModule.__replySpy as unknown as ReturnType<
|
||||
typeof vi.fn
|
||||
>;
|
||||
|
||||
onSpy.mockReset();
|
||||
replySpy.mockReset();
|
||||
|
||||
const fetchSpy = vi.spyOn(globalThis, "fetch" as never).mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: "OK",
|
||||
headers: { get: () => "image/png" },
|
||||
arrayBuffer: async () => new Uint8Array([0x89, 0x50, 0x4e, 0x47]).buffer,
|
||||
} as Response);
|
||||
|
||||
createTelegramBot({ token: "tok" });
|
||||
const handler = onSpy.mock.calls[0][1] as (
|
||||
ctx: Record<string, unknown>,
|
||||
) => Promise<void>;
|
||||
|
||||
await handler({
|
||||
message: {
|
||||
chat: { id: 42, type: "private" },
|
||||
message_id: 1,
|
||||
caption: "Album A",
|
||||
date: 1736380800,
|
||||
media_group_id: "albumA",
|
||||
photo: [{ file_id: "photoA1" }],
|
||||
},
|
||||
me: { username: "clawdbot_bot" },
|
||||
getFile: async () => ({ file_path: "photos/photoA1.jpg" }),
|
||||
});
|
||||
|
||||
await handler({
|
||||
message: {
|
||||
chat: { id: 42, type: "private" },
|
||||
message_id: 2,
|
||||
caption: "Album B",
|
||||
date: 1736380801,
|
||||
media_group_id: "albumB",
|
||||
photo: [{ file_id: "photoB1" }],
|
||||
},
|
||||
me: { username: "clawdbot_bot" },
|
||||
getFile: async () => ({ file_path: "photos/photoB1.jpg" }),
|
||||
});
|
||||
|
||||
expect(replySpy).not.toHaveBeenCalled();
|
||||
await waitForMediaGroupProcessing();
|
||||
|
||||
expect(replySpy).toHaveBeenCalledTimes(2);
|
||||
|
||||
fetchSpy.mockRestore();
|
||||
}, 2000);
|
||||
});
|
||||
|
||||
@@ -34,8 +34,20 @@ import { loadWebMedia } from "../web/media.js";
|
||||
const PARSE_ERR_RE =
|
||||
/can't parse entities|parse entities|find end of the entity/i;
|
||||
|
||||
// Media group aggregation - Telegram sends multi-image messages as separate updates
|
||||
// with a shared media_group_id. We buffer them and process as a single message after a short delay.
|
||||
const MEDIA_GROUP_TIMEOUT_MS = 500;
|
||||
|
||||
type TelegramMessage = Message.CommonMessage;
|
||||
|
||||
type MediaGroupEntry = {
|
||||
messages: Array<{
|
||||
msg: TelegramMessage;
|
||||
ctx: TelegramContext;
|
||||
}>;
|
||||
timer: ReturnType<typeof setTimeout>;
|
||||
};
|
||||
|
||||
type TelegramContext = {
|
||||
message: TelegramMessage;
|
||||
me?: { username?: string };
|
||||
@@ -69,6 +81,8 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
const bot = new Bot(opts.token, { client });
|
||||
bot.api.config.use(apiThrottler());
|
||||
|
||||
const mediaGroupBuffer = new Map<string, MediaGroupEntry>();
|
||||
|
||||
const cfg = loadConfig();
|
||||
const textLimit = resolveTextChunkLimit(cfg, "telegram");
|
||||
const allowFrom = opts.allowFrom ?? cfg.telegram?.allowFrom;
|
||||
@@ -94,24 +108,13 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
overrideOrder: "after-config",
|
||||
});
|
||||
|
||||
bot.on("message", async (ctx) => {
|
||||
try {
|
||||
const msg = ctx.message;
|
||||
if (!msg) return;
|
||||
const processMessage = async (
|
||||
primaryCtx: TelegramContext,
|
||||
allMedia: Array<{ path: string; contentType?: string }>,
|
||||
) => {
|
||||
const msg = primaryCtx.message;
|
||||
const chatId = msg.chat.id;
|
||||
const isGroup =
|
||||
msg.chat.type === "group" || msg.chat.type === "supergroup";
|
||||
|
||||
if (isGroup) {
|
||||
const groupPolicy = resolveGroupPolicy(chatId);
|
||||
if (groupPolicy.allowlistEnabled && !groupPolicy.allowed) {
|
||||
logger.info(
|
||||
{ chatId, title: msg.chat.title, reason: "not-allowed" },
|
||||
"skipping group message",
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
const isGroup = msg.chat.type === "group" || msg.chat.type === "supergroup";
|
||||
|
||||
const sendTyping = async () => {
|
||||
try {
|
||||
@@ -140,7 +143,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
}
|
||||
}
|
||||
|
||||
const botUsername = ctx.me?.username?.toLowerCase();
|
||||
const botUsername = primaryCtx.me?.username?.toLowerCase();
|
||||
const allowFromList = Array.isArray(allowFrom)
|
||||
? allowFrom.map((entry) => String(entry).trim()).filter(Boolean)
|
||||
: [];
|
||||
@@ -171,32 +174,15 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
!hasAnyMention &&
|
||||
commandAuthorized &&
|
||||
hasControlCommand(msg.text ?? msg.caption ?? "");
|
||||
const canDetectMention =
|
||||
Boolean(botUsername) || mentionRegexes.length > 0;
|
||||
const canDetectMention = Boolean(botUsername) || mentionRegexes.length > 0;
|
||||
if (isGroup && requireMention && canDetectMention) {
|
||||
if (!wasMentioned && !shouldBypassMention) {
|
||||
logger.info(
|
||||
{ chatId, reason: "no-mention" },
|
||||
"skipping group message",
|
||||
);
|
||||
logger.info({ chatId, reason: "no-mention" }, "skipping group message");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const media = await resolveMedia(
|
||||
ctx,
|
||||
mediaMaxBytes,
|
||||
opts.token,
|
||||
opts.proxyFetch,
|
||||
);
|
||||
const replyTarget = describeReplyTarget(msg);
|
||||
const rawBody = (
|
||||
msg.text ??
|
||||
msg.caption ??
|
||||
media?.placeholder ??
|
||||
""
|
||||
).trim();
|
||||
if (!rawBody) return;
|
||||
// ACK reactions
|
||||
const shouldAckReaction = () => {
|
||||
if (!ackReaction) return false;
|
||||
if (ackReactionScope === "all") return true;
|
||||
@@ -204,7 +190,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
if (ackReactionScope === "group-all") return isGroup;
|
||||
if (ackReactionScope === "group-mentions") {
|
||||
if (!isGroup) return false;
|
||||
if (!resolveGroupRequireMention(chatId)) return false;
|
||||
if (!requireMention) return false;
|
||||
if (!canDetectMention) return false;
|
||||
return wasMentioned || shouldBypassMention;
|
||||
}
|
||||
@@ -230,8 +216,26 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let placeholder = "";
|
||||
if (msg.photo) placeholder = "<media:image>";
|
||||
else if (msg.video) placeholder = "<media:video>";
|
||||
else if (msg.audio || msg.voice) placeholder = "<media:audio>";
|
||||
else if (msg.document) placeholder = "<media:document>";
|
||||
|
||||
const replyTarget = describeReplyTarget(msg);
|
||||
const rawBody = (msg.text ?? msg.caption ?? placeholder).trim();
|
||||
if (!rawBody && allMedia.length === 0) return;
|
||||
|
||||
let bodyText = rawBody;
|
||||
if (!bodyText && allMedia.length > 0) {
|
||||
bodyText = `<media:image>${allMedia.length > 1 ? ` (${allMedia.length} images)` : ""}`;
|
||||
}
|
||||
|
||||
const replySuffix = replyTarget
|
||||
? `\n\n[Replying to ${replyTarget.sender}${replyTarget.id ? ` id:${replyTarget.id}` : ""}]\n${replyTarget.body}\n[/Replying]`
|
||||
? `\n\n[Replying to ${replyTarget.sender}${
|
||||
replyTarget.id ? ` id:${replyTarget.id}` : ""
|
||||
}]\n${replyTarget.body}\n[/Replying]`
|
||||
: "";
|
||||
const body = formatAgentEnvelope({
|
||||
surface: "Telegram",
|
||||
@@ -239,7 +243,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
? buildGroupLabel(msg, chatId)
|
||||
: buildSenderLabel(msg, chatId),
|
||||
timestamp: msg.date ? msg.date * 1000 : undefined,
|
||||
body: `${rawBody}${replySuffix}`,
|
||||
body: `${bodyText}${replySuffix}`,
|
||||
});
|
||||
|
||||
const ctxPayload = {
|
||||
@@ -258,9 +262,15 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
ReplyToSender: replyTarget?.sender,
|
||||
Timestamp: msg.date ? msg.date * 1000 : undefined,
|
||||
WasMentioned: isGroup ? wasMentioned : undefined,
|
||||
MediaPath: media?.path,
|
||||
MediaType: media?.contentType,
|
||||
MediaUrl: media?.path,
|
||||
MediaPath: allMedia[0]?.path,
|
||||
MediaType: allMedia[0]?.contentType,
|
||||
MediaUrl: allMedia[0]?.path,
|
||||
MediaPaths: allMedia.length > 0 ? allMedia.map((m) => m.path) : undefined,
|
||||
MediaUrls: allMedia.length > 0 ? allMedia.map((m) => m.path) : undefined,
|
||||
MediaTypes:
|
||||
allMedia.length > 0
|
||||
? (allMedia.map((m) => m.contentType).filter(Boolean) as string[])
|
||||
: undefined,
|
||||
CommandAuthorized: commandAuthorized,
|
||||
};
|
||||
|
||||
@@ -285,8 +295,10 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
|
||||
if (shouldLogVerbose()) {
|
||||
const preview = body.slice(0, 200).replace(/\n/g, "\\n");
|
||||
const mediaInfo =
|
||||
allMedia.length > 1 ? ` mediaCount=${allMedia.length}` : "";
|
||||
logVerbose(
|
||||
`telegram inbound: chatId=${chatId} from=${ctxPayload.From} len=${body.length} preview="${preview}"`,
|
||||
`telegram inbound: chatId=${chatId} from=${ctxPayload.From} len=${body.length}${mediaInfo} preview="${preview}"`,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -327,11 +339,96 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
});
|
||||
typingController?.markDispatchIdle();
|
||||
if (!queuedFinal) return;
|
||||
};
|
||||
|
||||
bot.on("message", async (ctx) => {
|
||||
try {
|
||||
const msg = ctx.message;
|
||||
if (!msg) return;
|
||||
|
||||
const chatId = msg.chat.id;
|
||||
const isGroup =
|
||||
msg.chat.type === "group" || msg.chat.type === "supergroup";
|
||||
|
||||
// Group policy check - skip disallowed groups early
|
||||
if (isGroup) {
|
||||
const groupPolicy = resolveGroupPolicy(chatId);
|
||||
if (groupPolicy.allowlistEnabled && !groupPolicy.allowed) {
|
||||
logger.info(
|
||||
{ chatId, title: msg.chat.title, reason: "not-allowed" },
|
||||
"skipping group message",
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Media group handling - buffer multi-image messages
|
||||
const mediaGroupId = (msg as { media_group_id?: string }).media_group_id;
|
||||
if (mediaGroupId) {
|
||||
const existing = mediaGroupBuffer.get(mediaGroupId);
|
||||
if (existing) {
|
||||
clearTimeout(existing.timer);
|
||||
existing.messages.push({ msg, ctx });
|
||||
existing.timer = setTimeout(async () => {
|
||||
mediaGroupBuffer.delete(mediaGroupId);
|
||||
await processMediaGroup(existing);
|
||||
}, MEDIA_GROUP_TIMEOUT_MS);
|
||||
} else {
|
||||
const entry: MediaGroupEntry = {
|
||||
messages: [{ msg, ctx }],
|
||||
timer: setTimeout(async () => {
|
||||
mediaGroupBuffer.delete(mediaGroupId);
|
||||
await processMediaGroup(entry);
|
||||
}, MEDIA_GROUP_TIMEOUT_MS),
|
||||
};
|
||||
mediaGroupBuffer.set(mediaGroupId, entry);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const media = await resolveMedia(
|
||||
ctx,
|
||||
mediaMaxBytes,
|
||||
opts.token,
|
||||
opts.proxyFetch,
|
||||
);
|
||||
const allMedia = media
|
||||
? [{ path: media.path, contentType: media.contentType }]
|
||||
: [];
|
||||
await processMessage(ctx, allMedia);
|
||||
} catch (err) {
|
||||
runtime.error?.(danger(`handler failed: ${String(err)}`));
|
||||
}
|
||||
});
|
||||
|
||||
const processMediaGroup = async (entry: MediaGroupEntry) => {
|
||||
try {
|
||||
entry.messages.sort((a, b) => a.msg.message_id - b.msg.message_id);
|
||||
|
||||
const captionMsg = entry.messages.find(
|
||||
(m) => m.msg.caption || m.msg.text,
|
||||
);
|
||||
const primaryEntry = captionMsg ?? entry.messages[0];
|
||||
|
||||
const allMedia: Array<{ path: string; contentType?: string }> = [];
|
||||
for (const { ctx } of entry.messages) {
|
||||
const media = await resolveMedia(
|
||||
ctx,
|
||||
mediaMaxBytes,
|
||||
opts.token,
|
||||
opts.proxyFetch,
|
||||
);
|
||||
if (media) {
|
||||
allMedia.push({ path: media.path, contentType: media.contentType });
|
||||
}
|
||||
}
|
||||
|
||||
await processMessage(primaryEntry.ctx, allMedia);
|
||||
} catch (err) {
|
||||
runtime.error?.(danger(`media group handler failed: ${String(err)}`));
|
||||
}
|
||||
};
|
||||
|
||||
return bot;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user