refactor: consolidate reply/media helpers

This commit is contained in:
Peter Steinberger
2026-01-10 02:40:41 +01:00
parent 9cd2662a86
commit 4075895c4c
17 changed files with 437 additions and 277 deletions

View File

@@ -1,6 +1,4 @@
// @ts-nocheck
import { Buffer } from "node:buffer";
import { sequentialize } from "@grammyjs/runner";
import { apiThrottler } from "@grammyjs/transformer-throttler";
import type { ApiClientOptions, Message } from "grammy";
@@ -22,12 +20,11 @@ import {
} from "../auto-reply/commands-registry.js";
import { formatAgentEnvelope } from "../auto-reply/envelope.js";
import { resolveBlockStreamingChunking } from "../auto-reply/reply/block-streaming.js";
import { dispatchReplyFromConfig } from "../auto-reply/reply/dispatch-from-config.js";
import {
buildMentionRegexes,
matchesMentionPatterns,
} from "../auto-reply/reply/mentions.js";
import { createReplyDispatcherWithTyping } from "../auto-reply/reply/reply-dispatcher.js";
import { dispatchReplyWithBufferedBlockDispatcher } from "../auto-reply/reply/provider-dispatcher.js";
import { getReplyFromConfig } from "../auto-reply/reply.js";
import type { ReplyPayload } from "../auto-reply/types.js";
import type { ClawdbotConfig, ReplyToMode } from "../config/config.js";
@@ -46,7 +43,8 @@ import { formatErrorMessage } from "../infra/errors.js";
import { recordProviderActivity } from "../infra/provider-activity.js";
import { getChildLogger } from "../logging.js";
import { mediaKindFromMime } from "../media/constants.js";
import { detectMime, isGifMedia } from "../media/mime.js";
import { fetchRemoteMedia } from "../media/fetch.js";
import { isGifMedia } from "../media/mime.js";
import { saveMediaBuffer } from "../media/store.js";
import {
formatLocationText,
@@ -64,7 +62,7 @@ import {
readTelegramAllowFromStore,
upsertTelegramPairingRequest,
} from "./pairing-store.js";
import { resolveTelegramVoiceDecision } from "./voice.js";
import { resolveTelegramVoiceSend } from "./voice.js";
const PARSE_ERR_RE =
/can't parse entities|parse entities|find end of the entity/i;
@@ -805,8 +803,16 @@ export function createTelegramBot(opts: TelegramBotOptions) {
await draftStream.flush();
};
const { dispatcher, replyOptions, markDispatchIdle } =
createReplyDispatcherWithTyping({
const disableBlockStreaming =
Boolean(draftStream) ||
(typeof telegramCfg.blockStreaming === "boolean"
? !telegramCfg.blockStreaming
: undefined);
const { queuedFinal } = await dispatchReplyWithBufferedBlockDispatcher({
ctx: ctxPayload,
cfg,
dispatcherOptions: {
responsePrefix: resolveEffectiveMessagesConfig(cfg, route.agentId)
.responsePrefix,
deliver: async (payload, info) => {
@@ -831,20 +837,8 @@ export function createTelegramBot(opts: TelegramBotOptions) {
);
},
onReplyStart: sendTyping,
});
const disableBlockStreaming =
Boolean(draftStream) ||
(typeof telegramCfg.blockStreaming === "boolean"
? !telegramCfg.blockStreaming
: undefined);
const { queuedFinal } = await dispatchReplyFromConfig({
ctx: ctxPayload,
cfg,
dispatcher,
},
replyOptions: {
...replyOptions,
skillFilter,
onPartialReply: draftStream
? (payload) => updateDraftFromPartial(payload.text)
@@ -857,7 +851,6 @@ export function createTelegramBot(opts: TelegramBotOptions) {
disableBlockStreaming,
},
});
markDispatchIdle();
draftStream?.stop();
if (!queuedFinal) return;
if (
@@ -1409,16 +1402,12 @@ async function deliverReplies(params: {
...mediaParams,
});
} else if (kind === "audio") {
const { useVoice, reason } = resolveTelegramVoiceDecision({
const { useVoice } = resolveTelegramVoiceSend({
wantsVoice: reply.audioAsVoice === true, // default false (backward compatible)
contentType: media.contentType,
fileName,
logFallback: logVerbose,
});
if (reason) {
logVerbose(
`Telegram voice requested but ${reason}; sending as audio file instead.`,
);
}
if (useVoice) {
// Voice message - displays as round playable bubble (opt-in via [[audio_as_voice]])
await bot.api.sendVoice(chatId, file, {
@@ -1571,19 +1560,17 @@ async function resolveMedia(
throw new Error("fetch is not available; set telegram.proxy in config");
}
const url = `https://api.telegram.org/file/bot${token}/${file.file_path}`;
const res = await fetchImpl(url);
if (!res.ok) {
throw new Error(
`Failed to download telegram file: HTTP ${res.status} ${res.statusText}`,
);
}
const data = Buffer.from(await res.arrayBuffer());
const mime = await detectMime({
buffer: data,
headerMime: res.headers.get("content-type"),
filePath: file.file_path,
const fetched = await fetchRemoteMedia({
url,
fetchImpl,
filePathHint: file.file_path,
});
const saved = await saveMediaBuffer(data, mime, "inbound", maxBytes);
const saved = await saveMediaBuffer(
fetched.buffer,
fetched.contentType,
"inbound",
maxBytes,
);
let placeholder = "<media:document>";
if (msg.photo) placeholder = "<media:image>";
else if (msg.video) placeholder = "<media:video>";

View File

@@ -21,7 +21,7 @@ import {
parseTelegramTarget,
stripTelegramInternalPrefixes,
} from "./targets.js";
import { resolveTelegramVoiceDecision } from "./voice.js";
import { resolveTelegramVoiceSend } from "./voice.js";
type TelegramSendOpts = {
token?: string;
@@ -239,16 +239,12 @@ export async function sendMessageTelegram(
throw wrapChatNotFound(err);
});
} else if (kind === "audio") {
const { useVoice, reason } = resolveTelegramVoiceDecision({
const { useVoice } = resolveTelegramVoiceSend({
wantsVoice: opts.asVoice === true, // default false (backward compatible)
contentType: media.contentType,
fileName,
logFallback: logVerbose,
});
if (reason) {
logVerbose(
`Telegram voice requested but ${reason}; sending as audio file instead.`,
);
}
if (useVoice) {
result = await request(
() => api.sendVoice(chatId, file, mediaParams),

View File

@@ -0,0 +1,43 @@
import { describe, expect, it, vi } from "vitest";
import { resolveTelegramVoiceSend } from "./voice.js";
describe("resolveTelegramVoiceSend", () => {
it("skips voice when wantsVoice is false", () => {
const logFallback = vi.fn();
const result = resolveTelegramVoiceSend({
wantsVoice: false,
contentType: "audio/ogg",
fileName: "voice.ogg",
logFallback,
});
expect(result.useVoice).toBe(false);
expect(logFallback).not.toHaveBeenCalled();
});
it("logs fallback for incompatible media", () => {
const logFallback = vi.fn();
const result = resolveTelegramVoiceSend({
wantsVoice: true,
contentType: "audio/mpeg",
fileName: "track.mp3",
logFallback,
});
expect(result.useVoice).toBe(false);
expect(logFallback).toHaveBeenCalledWith(
"Telegram voice requested but media is audio/mpeg (track.mp3); sending as audio file instead.",
);
});
it("keeps voice when compatible", () => {
const logFallback = vi.fn();
const result = resolveTelegramVoiceSend({
wantsVoice: true,
contentType: "audio/ogg",
fileName: "voice.ogg",
logFallback,
});
expect(result.useVoice).toBe(true);
expect(logFallback).not.toHaveBeenCalled();
});
});

View File

@@ -1,4 +1,4 @@
import path from "node:path";
import { getFileExtension } from "../media/mime.js";
export function isTelegramVoiceCompatible(opts: {
contentType?: string | null;
@@ -10,7 +10,8 @@ export function isTelegramVoiceCompatible(opts: {
}
const fileName = opts.fileName?.trim();
if (!fileName) return false;
const ext = path.extname(fileName).toLowerCase();
const ext = getFileExtension(fileName);
if (!ext) return false;
return ext === ".ogg" || ext === ".opus" || ext === ".oga";
}
@@ -28,3 +29,18 @@ export function resolveTelegramVoiceDecision(opts: {
reason: `media is ${contentType} (${fileName})`,
};
}
export function resolveTelegramVoiceSend(opts: {
wantsVoice: boolean;
contentType?: string | null;
fileName?: string | null;
logFallback?: (message: string) => void;
}): { useVoice: boolean } {
const decision = resolveTelegramVoiceDecision(opts);
if (decision.reason && opts.logFallback) {
opts.logFallback(
`Telegram voice requested but ${decision.reason}; sending as audio file instead.`,
);
}
return { useVoice: decision.useVoice };
}