feat(telegram): support media groups (multi-image messages) (#220)
This commit is contained in:
@@ -10,6 +10,9 @@ export type MsgContext = {
|
|||||||
MediaPath?: string;
|
MediaPath?: string;
|
||||||
MediaUrl?: string;
|
MediaUrl?: string;
|
||||||
MediaType?: string;
|
MediaType?: string;
|
||||||
|
MediaPaths?: string[];
|
||||||
|
MediaUrls?: string[];
|
||||||
|
MediaTypes?: string[];
|
||||||
Transcript?: string;
|
Transcript?: string;
|
||||||
ChatType?: string;
|
ChatType?: string;
|
||||||
GroupSubject?: string;
|
GroupSubject?: string;
|
||||||
|
|||||||
@@ -209,3 +209,135 @@ describe("telegram inbound media", () => {
|
|||||||
fetchSpy.mockRestore();
|
fetchSpy.mockRestore();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("telegram media groups", () => {
|
||||||
|
const waitForMediaGroupProcessing = () =>
|
||||||
|
new Promise((resolve) => setTimeout(resolve, 600));
|
||||||
|
|
||||||
|
it("buffers messages with same media_group_id and processes them together", async () => {
|
||||||
|
const { createTelegramBot } = await import("./bot.js");
|
||||||
|
const replyModule = await import("../auto-reply/reply.js");
|
||||||
|
const replySpy = replyModule.__replySpy as unknown as ReturnType<
|
||||||
|
typeof vi.fn
|
||||||
|
>;
|
||||||
|
|
||||||
|
onSpy.mockReset();
|
||||||
|
replySpy.mockReset();
|
||||||
|
|
||||||
|
const runtimeError = vi.fn();
|
||||||
|
const fetchSpy = vi.spyOn(globalThis, "fetch" as never).mockResolvedValue({
|
||||||
|
ok: true,
|
||||||
|
status: 200,
|
||||||
|
statusText: "OK",
|
||||||
|
headers: { get: () => "image/png" },
|
||||||
|
arrayBuffer: async () => new Uint8Array([0x89, 0x50, 0x4e, 0x47]).buffer,
|
||||||
|
} as Response);
|
||||||
|
|
||||||
|
createTelegramBot({
|
||||||
|
token: "tok",
|
||||||
|
runtime: {
|
||||||
|
log: vi.fn(),
|
||||||
|
error: runtimeError,
|
||||||
|
exit: () => {
|
||||||
|
throw new Error("exit");
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const handler = onSpy.mock.calls[0][1] as (
|
||||||
|
ctx: Record<string, unknown>,
|
||||||
|
) => Promise<void>;
|
||||||
|
|
||||||
|
await handler({
|
||||||
|
message: {
|
||||||
|
chat: { id: 42, type: "private" },
|
||||||
|
message_id: 1,
|
||||||
|
caption: "Here are my photos",
|
||||||
|
date: 1736380800,
|
||||||
|
media_group_id: "album123",
|
||||||
|
photo: [{ file_id: "photo1" }],
|
||||||
|
},
|
||||||
|
me: { username: "clawdbot_bot" },
|
||||||
|
getFile: async () => ({ file_path: "photos/photo1.jpg" }),
|
||||||
|
});
|
||||||
|
|
||||||
|
await handler({
|
||||||
|
message: {
|
||||||
|
chat: { id: 42, type: "private" },
|
||||||
|
message_id: 2,
|
||||||
|
date: 1736380801,
|
||||||
|
media_group_id: "album123",
|
||||||
|
photo: [{ file_id: "photo2" }],
|
||||||
|
},
|
||||||
|
me: { username: "clawdbot_bot" },
|
||||||
|
getFile: async () => ({ file_path: "photos/photo2.jpg" }),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(replySpy).not.toHaveBeenCalled();
|
||||||
|
await waitForMediaGroupProcessing();
|
||||||
|
|
||||||
|
expect(runtimeError).not.toHaveBeenCalled();
|
||||||
|
expect(replySpy).toHaveBeenCalledTimes(1);
|
||||||
|
const payload = replySpy.mock.calls[0][0];
|
||||||
|
expect(payload.Body).toContain("Here are my photos");
|
||||||
|
expect(payload.MediaPaths).toHaveLength(2);
|
||||||
|
|
||||||
|
fetchSpy.mockRestore();
|
||||||
|
}, 2000);
|
||||||
|
|
||||||
|
it("processes separate media groups independently", async () => {
|
||||||
|
const { createTelegramBot } = await import("./bot.js");
|
||||||
|
const replyModule = await import("../auto-reply/reply.js");
|
||||||
|
const replySpy = replyModule.__replySpy as unknown as ReturnType<
|
||||||
|
typeof vi.fn
|
||||||
|
>;
|
||||||
|
|
||||||
|
onSpy.mockReset();
|
||||||
|
replySpy.mockReset();
|
||||||
|
|
||||||
|
const fetchSpy = vi.spyOn(globalThis, "fetch" as never).mockResolvedValue({
|
||||||
|
ok: true,
|
||||||
|
status: 200,
|
||||||
|
statusText: "OK",
|
||||||
|
headers: { get: () => "image/png" },
|
||||||
|
arrayBuffer: async () => new Uint8Array([0x89, 0x50, 0x4e, 0x47]).buffer,
|
||||||
|
} as Response);
|
||||||
|
|
||||||
|
createTelegramBot({ token: "tok" });
|
||||||
|
const handler = onSpy.mock.calls[0][1] as (
|
||||||
|
ctx: Record<string, unknown>,
|
||||||
|
) => Promise<void>;
|
||||||
|
|
||||||
|
await handler({
|
||||||
|
message: {
|
||||||
|
chat: { id: 42, type: "private" },
|
||||||
|
message_id: 1,
|
||||||
|
caption: "Album A",
|
||||||
|
date: 1736380800,
|
||||||
|
media_group_id: "albumA",
|
||||||
|
photo: [{ file_id: "photoA1" }],
|
||||||
|
},
|
||||||
|
me: { username: "clawdbot_bot" },
|
||||||
|
getFile: async () => ({ file_path: "photos/photoA1.jpg" }),
|
||||||
|
});
|
||||||
|
|
||||||
|
await handler({
|
||||||
|
message: {
|
||||||
|
chat: { id: 42, type: "private" },
|
||||||
|
message_id: 2,
|
||||||
|
caption: "Album B",
|
||||||
|
date: 1736380801,
|
||||||
|
media_group_id: "albumB",
|
||||||
|
photo: [{ file_id: "photoB1" }],
|
||||||
|
},
|
||||||
|
me: { username: "clawdbot_bot" },
|
||||||
|
getFile: async () => ({ file_path: "photos/photoB1.jpg" }),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(replySpy).not.toHaveBeenCalled();
|
||||||
|
await waitForMediaGroupProcessing();
|
||||||
|
|
||||||
|
expect(replySpy).toHaveBeenCalledTimes(2);
|
||||||
|
|
||||||
|
fetchSpy.mockRestore();
|
||||||
|
}, 2000);
|
||||||
|
});
|
||||||
|
|||||||
@@ -34,8 +34,20 @@ import { loadWebMedia } from "../web/media.js";
|
|||||||
const PARSE_ERR_RE =
|
const PARSE_ERR_RE =
|
||||||
/can't parse entities|parse entities|find end of the entity/i;
|
/can't parse entities|parse entities|find end of the entity/i;
|
||||||
|
|
||||||
|
// Media group aggregation - Telegram sends multi-image messages as separate updates
|
||||||
|
// with a shared media_group_id. We buffer them and process as a single message after a short delay.
|
||||||
|
const MEDIA_GROUP_TIMEOUT_MS = 500;
|
||||||
|
|
||||||
type TelegramMessage = Message.CommonMessage;
|
type TelegramMessage = Message.CommonMessage;
|
||||||
|
|
||||||
|
type MediaGroupEntry = {
|
||||||
|
messages: Array<{
|
||||||
|
msg: TelegramMessage;
|
||||||
|
ctx: TelegramContext;
|
||||||
|
}>;
|
||||||
|
timer: ReturnType<typeof setTimeout>;
|
||||||
|
};
|
||||||
|
|
||||||
type TelegramContext = {
|
type TelegramContext = {
|
||||||
message: TelegramMessage;
|
message: TelegramMessage;
|
||||||
me?: { username?: string };
|
me?: { username?: string };
|
||||||
@@ -69,6 +81,8 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
const bot = new Bot(opts.token, { client });
|
const bot = new Bot(opts.token, { client });
|
||||||
bot.api.config.use(apiThrottler());
|
bot.api.config.use(apiThrottler());
|
||||||
|
|
||||||
|
const mediaGroupBuffer = new Map<string, MediaGroupEntry>();
|
||||||
|
|
||||||
const cfg = loadConfig();
|
const cfg = loadConfig();
|
||||||
const textLimit = resolveTextChunkLimit(cfg, "telegram");
|
const textLimit = resolveTextChunkLimit(cfg, "telegram");
|
||||||
const allowFrom = opts.allowFrom ?? cfg.telegram?.allowFrom;
|
const allowFrom = opts.allowFrom ?? cfg.telegram?.allowFrom;
|
||||||
@@ -94,24 +108,13 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
overrideOrder: "after-config",
|
overrideOrder: "after-config",
|
||||||
});
|
});
|
||||||
|
|
||||||
bot.on("message", async (ctx) => {
|
const processMessage = async (
|
||||||
try {
|
primaryCtx: TelegramContext,
|
||||||
const msg = ctx.message;
|
allMedia: Array<{ path: string; contentType?: string }>,
|
||||||
if (!msg) return;
|
) => {
|
||||||
|
const msg = primaryCtx.message;
|
||||||
const chatId = msg.chat.id;
|
const chatId = msg.chat.id;
|
||||||
const isGroup =
|
const isGroup = msg.chat.type === "group" || msg.chat.type === "supergroup";
|
||||||
msg.chat.type === "group" || msg.chat.type === "supergroup";
|
|
||||||
|
|
||||||
if (isGroup) {
|
|
||||||
const groupPolicy = resolveGroupPolicy(chatId);
|
|
||||||
if (groupPolicy.allowlistEnabled && !groupPolicy.allowed) {
|
|
||||||
logger.info(
|
|
||||||
{ chatId, title: msg.chat.title, reason: "not-allowed" },
|
|
||||||
"skipping group message",
|
|
||||||
);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const sendTyping = async () => {
|
const sendTyping = async () => {
|
||||||
try {
|
try {
|
||||||
@@ -140,7 +143,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const botUsername = ctx.me?.username?.toLowerCase();
|
const botUsername = primaryCtx.me?.username?.toLowerCase();
|
||||||
const allowFromList = Array.isArray(allowFrom)
|
const allowFromList = Array.isArray(allowFrom)
|
||||||
? allowFrom.map((entry) => String(entry).trim()).filter(Boolean)
|
? allowFrom.map((entry) => String(entry).trim()).filter(Boolean)
|
||||||
: [];
|
: [];
|
||||||
@@ -171,32 +174,15 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
!hasAnyMention &&
|
!hasAnyMention &&
|
||||||
commandAuthorized &&
|
commandAuthorized &&
|
||||||
hasControlCommand(msg.text ?? msg.caption ?? "");
|
hasControlCommand(msg.text ?? msg.caption ?? "");
|
||||||
const canDetectMention =
|
const canDetectMention = Boolean(botUsername) || mentionRegexes.length > 0;
|
||||||
Boolean(botUsername) || mentionRegexes.length > 0;
|
|
||||||
if (isGroup && requireMention && canDetectMention) {
|
if (isGroup && requireMention && canDetectMention) {
|
||||||
if (!wasMentioned && !shouldBypassMention) {
|
if (!wasMentioned && !shouldBypassMention) {
|
||||||
logger.info(
|
logger.info({ chatId, reason: "no-mention" }, "skipping group message");
|
||||||
{ chatId, reason: "no-mention" },
|
|
||||||
"skipping group message",
|
|
||||||
);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const media = await resolveMedia(
|
// ACK reactions
|
||||||
ctx,
|
|
||||||
mediaMaxBytes,
|
|
||||||
opts.token,
|
|
||||||
opts.proxyFetch,
|
|
||||||
);
|
|
||||||
const replyTarget = describeReplyTarget(msg);
|
|
||||||
const rawBody = (
|
|
||||||
msg.text ??
|
|
||||||
msg.caption ??
|
|
||||||
media?.placeholder ??
|
|
||||||
""
|
|
||||||
).trim();
|
|
||||||
if (!rawBody) return;
|
|
||||||
const shouldAckReaction = () => {
|
const shouldAckReaction = () => {
|
||||||
if (!ackReaction) return false;
|
if (!ackReaction) return false;
|
||||||
if (ackReactionScope === "all") return true;
|
if (ackReactionScope === "all") return true;
|
||||||
@@ -204,7 +190,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
if (ackReactionScope === "group-all") return isGroup;
|
if (ackReactionScope === "group-all") return isGroup;
|
||||||
if (ackReactionScope === "group-mentions") {
|
if (ackReactionScope === "group-mentions") {
|
||||||
if (!isGroup) return false;
|
if (!isGroup) return false;
|
||||||
if (!resolveGroupRequireMention(chatId)) return false;
|
if (!requireMention) return false;
|
||||||
if (!canDetectMention) return false;
|
if (!canDetectMention) return false;
|
||||||
return wasMentioned || shouldBypassMention;
|
return wasMentioned || shouldBypassMention;
|
||||||
}
|
}
|
||||||
@@ -230,8 +216,26 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let placeholder = "";
|
||||||
|
if (msg.photo) placeholder = "<media:image>";
|
||||||
|
else if (msg.video) placeholder = "<media:video>";
|
||||||
|
else if (msg.audio || msg.voice) placeholder = "<media:audio>";
|
||||||
|
else if (msg.document) placeholder = "<media:document>";
|
||||||
|
|
||||||
|
const replyTarget = describeReplyTarget(msg);
|
||||||
|
const rawBody = (msg.text ?? msg.caption ?? placeholder).trim();
|
||||||
|
if (!rawBody && allMedia.length === 0) return;
|
||||||
|
|
||||||
|
let bodyText = rawBody;
|
||||||
|
if (!bodyText && allMedia.length > 0) {
|
||||||
|
bodyText = `<media:image>${allMedia.length > 1 ? ` (${allMedia.length} images)` : ""}`;
|
||||||
|
}
|
||||||
|
|
||||||
const replySuffix = replyTarget
|
const replySuffix = replyTarget
|
||||||
? `\n\n[Replying to ${replyTarget.sender}${replyTarget.id ? ` id:${replyTarget.id}` : ""}]\n${replyTarget.body}\n[/Replying]`
|
? `\n\n[Replying to ${replyTarget.sender}${
|
||||||
|
replyTarget.id ? ` id:${replyTarget.id}` : ""
|
||||||
|
}]\n${replyTarget.body}\n[/Replying]`
|
||||||
: "";
|
: "";
|
||||||
const body = formatAgentEnvelope({
|
const body = formatAgentEnvelope({
|
||||||
surface: "Telegram",
|
surface: "Telegram",
|
||||||
@@ -239,7 +243,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
? buildGroupLabel(msg, chatId)
|
? buildGroupLabel(msg, chatId)
|
||||||
: buildSenderLabel(msg, chatId),
|
: buildSenderLabel(msg, chatId),
|
||||||
timestamp: msg.date ? msg.date * 1000 : undefined,
|
timestamp: msg.date ? msg.date * 1000 : undefined,
|
||||||
body: `${rawBody}${replySuffix}`,
|
body: `${bodyText}${replySuffix}`,
|
||||||
});
|
});
|
||||||
|
|
||||||
const ctxPayload = {
|
const ctxPayload = {
|
||||||
@@ -258,9 +262,15 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
ReplyToSender: replyTarget?.sender,
|
ReplyToSender: replyTarget?.sender,
|
||||||
Timestamp: msg.date ? msg.date * 1000 : undefined,
|
Timestamp: msg.date ? msg.date * 1000 : undefined,
|
||||||
WasMentioned: isGroup ? wasMentioned : undefined,
|
WasMentioned: isGroup ? wasMentioned : undefined,
|
||||||
MediaPath: media?.path,
|
MediaPath: allMedia[0]?.path,
|
||||||
MediaType: media?.contentType,
|
MediaType: allMedia[0]?.contentType,
|
||||||
MediaUrl: media?.path,
|
MediaUrl: allMedia[0]?.path,
|
||||||
|
MediaPaths: allMedia.length > 0 ? allMedia.map((m) => m.path) : undefined,
|
||||||
|
MediaUrls: allMedia.length > 0 ? allMedia.map((m) => m.path) : undefined,
|
||||||
|
MediaTypes:
|
||||||
|
allMedia.length > 0
|
||||||
|
? (allMedia.map((m) => m.contentType).filter(Boolean) as string[])
|
||||||
|
: undefined,
|
||||||
CommandAuthorized: commandAuthorized,
|
CommandAuthorized: commandAuthorized,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -285,8 +295,10 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
|
|
||||||
if (shouldLogVerbose()) {
|
if (shouldLogVerbose()) {
|
||||||
const preview = body.slice(0, 200).replace(/\n/g, "\\n");
|
const preview = body.slice(0, 200).replace(/\n/g, "\\n");
|
||||||
|
const mediaInfo =
|
||||||
|
allMedia.length > 1 ? ` mediaCount=${allMedia.length}` : "";
|
||||||
logVerbose(
|
logVerbose(
|
||||||
`telegram inbound: chatId=${chatId} from=${ctxPayload.From} len=${body.length} preview="${preview}"`,
|
`telegram inbound: chatId=${chatId} from=${ctxPayload.From} len=${body.length}${mediaInfo} preview="${preview}"`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -327,11 +339,96 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
});
|
});
|
||||||
typingController?.markDispatchIdle();
|
typingController?.markDispatchIdle();
|
||||||
if (!queuedFinal) return;
|
if (!queuedFinal) return;
|
||||||
|
};
|
||||||
|
|
||||||
|
bot.on("message", async (ctx) => {
|
||||||
|
try {
|
||||||
|
const msg = ctx.message;
|
||||||
|
if (!msg) return;
|
||||||
|
|
||||||
|
const chatId = msg.chat.id;
|
||||||
|
const isGroup =
|
||||||
|
msg.chat.type === "group" || msg.chat.type === "supergroup";
|
||||||
|
|
||||||
|
// Group policy check - skip disallowed groups early
|
||||||
|
if (isGroup) {
|
||||||
|
const groupPolicy = resolveGroupPolicy(chatId);
|
||||||
|
if (groupPolicy.allowlistEnabled && !groupPolicy.allowed) {
|
||||||
|
logger.info(
|
||||||
|
{ chatId, title: msg.chat.title, reason: "not-allowed" },
|
||||||
|
"skipping group message",
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Media group handling - buffer multi-image messages
|
||||||
|
const mediaGroupId = (msg as { media_group_id?: string }).media_group_id;
|
||||||
|
if (mediaGroupId) {
|
||||||
|
const existing = mediaGroupBuffer.get(mediaGroupId);
|
||||||
|
if (existing) {
|
||||||
|
clearTimeout(existing.timer);
|
||||||
|
existing.messages.push({ msg, ctx });
|
||||||
|
existing.timer = setTimeout(async () => {
|
||||||
|
mediaGroupBuffer.delete(mediaGroupId);
|
||||||
|
await processMediaGroup(existing);
|
||||||
|
}, MEDIA_GROUP_TIMEOUT_MS);
|
||||||
|
} else {
|
||||||
|
const entry: MediaGroupEntry = {
|
||||||
|
messages: [{ msg, ctx }],
|
||||||
|
timer: setTimeout(async () => {
|
||||||
|
mediaGroupBuffer.delete(mediaGroupId);
|
||||||
|
await processMediaGroup(entry);
|
||||||
|
}, MEDIA_GROUP_TIMEOUT_MS),
|
||||||
|
};
|
||||||
|
mediaGroupBuffer.set(mediaGroupId, entry);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const media = await resolveMedia(
|
||||||
|
ctx,
|
||||||
|
mediaMaxBytes,
|
||||||
|
opts.token,
|
||||||
|
opts.proxyFetch,
|
||||||
|
);
|
||||||
|
const allMedia = media
|
||||||
|
? [{ path: media.path, contentType: media.contentType }]
|
||||||
|
: [];
|
||||||
|
await processMessage(ctx, allMedia);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
runtime.error?.(danger(`handler failed: ${String(err)}`));
|
runtime.error?.(danger(`handler failed: ${String(err)}`));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const processMediaGroup = async (entry: MediaGroupEntry) => {
|
||||||
|
try {
|
||||||
|
entry.messages.sort((a, b) => a.msg.message_id - b.msg.message_id);
|
||||||
|
|
||||||
|
const captionMsg = entry.messages.find(
|
||||||
|
(m) => m.msg.caption || m.msg.text,
|
||||||
|
);
|
||||||
|
const primaryEntry = captionMsg ?? entry.messages[0];
|
||||||
|
|
||||||
|
const allMedia: Array<{ path: string; contentType?: string }> = [];
|
||||||
|
for (const { ctx } of entry.messages) {
|
||||||
|
const media = await resolveMedia(
|
||||||
|
ctx,
|
||||||
|
mediaMaxBytes,
|
||||||
|
opts.token,
|
||||||
|
opts.proxyFetch,
|
||||||
|
);
|
||||||
|
if (media) {
|
||||||
|
allMedia.push({ path: media.path, contentType: media.contentType });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await processMessage(primaryEntry.ctx, allMedia);
|
||||||
|
} catch (err) {
|
||||||
|
runtime.error?.(danger(`media group handler failed: ${String(err)}`));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
return bot;
|
return bot;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user