refactor: consolidate reply/media helpers

2026-01-10 02:40:41 +01:00
parent 9cd2662a86
commit 4075895c4c
17 changed files with 437 additions and 277 deletions
--- a/src/telegram/bot.ts
+++ b/src/telegram/bot.ts
@@ -1,6 +1,4 @@
 // @ts-nocheck
-import { Buffer } from "node:buffer";
-
 import { sequentialize } from "@grammyjs/runner";
 import { apiThrottler } from "@grammyjs/transformer-throttler";
 import type { ApiClientOptions, Message } from "grammy";
@@ -22,12 +20,11 @@ import {
 } from "../auto-reply/commands-registry.js";
 import { formatAgentEnvelope } from "../auto-reply/envelope.js";
 import { resolveBlockStreamingChunking } from "../auto-reply/reply/block-streaming.js";
-import { dispatchReplyFromConfig } from "../auto-reply/reply/dispatch-from-config.js";
 import {
  buildMentionRegexes,
  matchesMentionPatterns,
 } from "../auto-reply/reply/mentions.js";
-import { createReplyDispatcherWithTyping } from "../auto-reply/reply/reply-dispatcher.js";
+import { dispatchReplyWithBufferedBlockDispatcher } from "../auto-reply/reply/provider-dispatcher.js";
 import { getReplyFromConfig } from "../auto-reply/reply.js";
 import type { ReplyPayload } from "../auto-reply/types.js";
 import type { ClawdbotConfig, ReplyToMode } from "../config/config.js";
@@ -46,7 +43,8 @@ import { formatErrorMessage } from "../infra/errors.js";
 import { recordProviderActivity } from "../infra/provider-activity.js";
 import { getChildLogger } from "../logging.js";
 import { mediaKindFromMime } from "../media/constants.js";
-import { detectMime, isGifMedia } from "../media/mime.js";
+import { fetchRemoteMedia } from "../media/fetch.js";
+import { isGifMedia } from "../media/mime.js";
 import { saveMediaBuffer } from "../media/store.js";
 import {
  formatLocationText,
@@ -64,7 +62,7 @@ import {
  readTelegramAllowFromStore,
  upsertTelegramPairingRequest,
 } from "./pairing-store.js";
-import { resolveTelegramVoiceDecision } from "./voice.js";
+import { resolveTelegramVoiceSend } from "./voice.js";

 const PARSE_ERR_RE =
  /can't parse entities|parse entities|find end of the entity/i;
@@ -805,8 +803,16 @@ export function createTelegramBot(opts: TelegramBotOptions) {
      await draftStream.flush();
    };

-    const { dispatcher, replyOptions, markDispatchIdle } =
-      createReplyDispatcherWithTyping({
+    const disableBlockStreaming =
+      Boolean(draftStream) ||
+      (typeof telegramCfg.blockStreaming === "boolean"
+        ? !telegramCfg.blockStreaming
+        : undefined);
+
+    const { queuedFinal } = await dispatchReplyWithBufferedBlockDispatcher({
+      ctx: ctxPayload,
+      cfg,
+      dispatcherOptions: {
        responsePrefix: resolveEffectiveMessagesConfig(cfg, route.agentId)
          .responsePrefix,
        deliver: async (payload, info) => {
@@ -831,20 +837,8 @@ export function createTelegramBot(opts: TelegramBotOptions) {
          );
        },
        onReplyStart: sendTyping,
-      });
-
-    const disableBlockStreaming =
-      Boolean(draftStream) ||
-      (typeof telegramCfg.blockStreaming === "boolean"
-        ? !telegramCfg.blockStreaming
-        : undefined);
-
-    const { queuedFinal } = await dispatchReplyFromConfig({
-      ctx: ctxPayload,
-      cfg,
-      dispatcher,
+      },
      replyOptions: {
-        ...replyOptions,
        skillFilter,
        onPartialReply: draftStream
          ? (payload) => updateDraftFromPartial(payload.text)
@@ -857,7 +851,6 @@ export function createTelegramBot(opts: TelegramBotOptions) {
        disableBlockStreaming,
      },
    });
-    markDispatchIdle();
    draftStream?.stop();
    if (!queuedFinal) return;
    if (
@@ -1409,16 +1402,12 @@ async function deliverReplies(params: {
          ...mediaParams,
        });
      } else if (kind === "audio") {
-        const { useVoice, reason } = resolveTelegramVoiceDecision({
+        const { useVoice } = resolveTelegramVoiceSend({
          wantsVoice: reply.audioAsVoice === true, // default false (backward compatible)
          contentType: media.contentType,
          fileName,
+          logFallback: logVerbose,
        });
-        if (reason) {
-          logVerbose(
-            `Telegram voice requested but ${reason}; sending as audio file instead.`,
-          );
-        }
        if (useVoice) {
          // Voice message - displays as round playable bubble (opt-in via [[audio_as_voice]])
          await bot.api.sendVoice(chatId, file, {
@@ -1571,19 +1560,17 @@ async function resolveMedia(
    throw new Error("fetch is not available; set telegram.proxy in config");
  }
  const url = `https://api.telegram.org/file/bot${token}/${file.file_path}`;
-  const res = await fetchImpl(url);
-  if (!res.ok) {
-    throw new Error(
-      `Failed to download telegram file: HTTP ${res.status} ${res.statusText}`,
-    );
-  }
-  const data = Buffer.from(await res.arrayBuffer());
-  const mime = await detectMime({
-    buffer: data,
-    headerMime: res.headers.get("content-type"),
-    filePath: file.file_path,
+  const fetched = await fetchRemoteMedia({
+    url,
+    fetchImpl,
+    filePathHint: file.file_path,
  });
-  const saved = await saveMediaBuffer(data, mime, "inbound", maxBytes);
+  const saved = await saveMediaBuffer(
+    fetched.buffer,
+    fetched.contentType,
+    "inbound",
+    maxBytes,
+  );
  let placeholder = "<media:document>";
  if (msg.photo) placeholder = "<media:image>";
  else if (msg.video) placeholder = "<media:video>";
--- a/src/telegram/send.ts
+++ b/src/telegram/send.ts
@@ -21,7 +21,7 @@ import {
  parseTelegramTarget,
  stripTelegramInternalPrefixes,
 } from "./targets.js";
-import { resolveTelegramVoiceDecision } from "./voice.js";
+import { resolveTelegramVoiceSend } from "./voice.js";

 type TelegramSendOpts = {
  token?: string;
@@ -239,16 +239,12 @@ export async function sendMessageTelegram(
        throw wrapChatNotFound(err);
      });
    } else if (kind === "audio") {
-      const { useVoice, reason } = resolveTelegramVoiceDecision({
+      const { useVoice } = resolveTelegramVoiceSend({
        wantsVoice: opts.asVoice === true, // default false (backward compatible)
        contentType: media.contentType,
        fileName,
+        logFallback: logVerbose,
      });
-      if (reason) {
-        logVerbose(
-          `Telegram voice requested but ${reason}; sending as audio file instead.`,
-        );
-      }
      if (useVoice) {
        result = await request(
          () => api.sendVoice(chatId, file, mediaParams),
--- a/src/telegram/voice.test.ts
+++ b/src/telegram/voice.test.ts
@@ -0,0 +1,43 @@
+import { describe, expect, it, vi } from "vitest";
+
+import { resolveTelegramVoiceSend } from "./voice.js";
+
+describe("resolveTelegramVoiceSend", () => {
+  it("skips voice when wantsVoice is false", () => {
+    const logFallback = vi.fn();
+    const result = resolveTelegramVoiceSend({
+      wantsVoice: false,
+      contentType: "audio/ogg",
+      fileName: "voice.ogg",
+      logFallback,
+    });
+    expect(result.useVoice).toBe(false);
+    expect(logFallback).not.toHaveBeenCalled();
+  });
+
+  it("logs fallback for incompatible media", () => {
+    const logFallback = vi.fn();
+    const result = resolveTelegramVoiceSend({
+      wantsVoice: true,
+      contentType: "audio/mpeg",
+      fileName: "track.mp3",
+      logFallback,
+    });
+    expect(result.useVoice).toBe(false);
+    expect(logFallback).toHaveBeenCalledWith(
+      "Telegram voice requested but media is audio/mpeg (track.mp3); sending as audio file instead.",
+    );
+  });
+
+  it("keeps voice when compatible", () => {
+    const logFallback = vi.fn();
+    const result = resolveTelegramVoiceSend({
+      wantsVoice: true,
+      contentType: "audio/ogg",
+      fileName: "voice.ogg",
+      logFallback,
+    });
+    expect(result.useVoice).toBe(true);
+    expect(logFallback).not.toHaveBeenCalled();
+  });
+});
--- a/src/telegram/voice.ts
+++ b/src/telegram/voice.ts
@@ -1,4 +1,4 @@
-import path from "node:path";
+import { getFileExtension } from "../media/mime.js";

 export function isTelegramVoiceCompatible(opts: {
  contentType?: string | null;
@@ -10,7 +10,8 @@ export function isTelegramVoiceCompatible(opts: {
  }
  const fileName = opts.fileName?.trim();
  if (!fileName) return false;
-  const ext = path.extname(fileName).toLowerCase();
+  const ext = getFileExtension(fileName);
+  if (!ext) return false;
  return ext === ".ogg" || ext === ".opus" || ext === ".oga";
 }

@@ -28,3 +29,18 @@ export function resolveTelegramVoiceDecision(opts: {
    reason: `media is ${contentType} (${fileName})`,
  };
 }
+
+export function resolveTelegramVoiceSend(opts: {
+  wantsVoice: boolean;
+  contentType?: string | null;
+  fileName?: string | null;
+  logFallback?: (message: string) => void;
+}): { useVoice: boolean } {
+  const decision = resolveTelegramVoiceDecision(opts);
+  if (decision.reason && opts.logFallback) {
+    opts.logFallback(
+      `Telegram voice requested but ${decision.reason}; sending as audio file instead.`,
+    );
+  }
+  return { useVoice: decision.useVoice };
+}