fix(telegram): send sticker pixels to vision models

2026-01-27 13:33:04 +05:30
parent 2ad550abe8
commit cc80495baa
3 changed files with 132 additions and 29 deletions
--- a/src/telegram/bot-message-dispatch.ts
+++ b/src/telegram/bot-message-dispatch.ts
@@ -1,5 +1,11 @@
 // @ts-nocheck
 import { EmbeddedBlockChunker } from "../agents/pi-embedded-block-chunker.js";
+import {
+  findModelInCatalog,
+  loadModelCatalog,
+  modelSupportsVision,
+} from "../agents/model-catalog.js";
+import { resolveDefaultModelForAgent } from "../agents/model-selection.js";
 import { resolveChunkMode } from "../auto-reply/chunk.js";
 import { clearHistoryEntriesIfEnabled } from "../auto-reply/reply/history.js";
 import { dispatchReplyWithBufferedBlockDispatcher } from "../auto-reply/reply/provider-dispatcher.js";
@@ -15,6 +21,18 @@ import { createTelegramDraftStream } from "./draft-stream.js";
 import { cacheSticker, describeStickerImage } from "./sticker-cache.js";
 import { resolveAgentDir } from "../agents/agent-scope.js";

+async function resolveStickerVisionSupport(cfg, agentId) {
+  try {
+    const catalog = await loadModelCatalog({ config: cfg });
+    const defaultModel = resolveDefaultModelForAgent({ cfg, agentId });
+    const entry = findModelInCatalog(catalog, defaultModel.provider, defaultModel.model);
+    if (!entry) return false;
+    return entry.input ? modelSupportsVision(entry) : true;
+  } catch {
+    return false;
+  }
+}
+
 export const dispatchTelegramMessage = async ({
  context,
  bot,
@@ -133,14 +151,18 @@ export const dispatchTelegramMessage = async ({
  // Handle uncached stickers: get a dedicated vision description before dispatch
  // This ensures we cache a raw description rather than a conversational response
  const sticker = ctxPayload.Sticker;
-  if (sticker?.fileUniqueId && !sticker.cachedDescription && ctxPayload.MediaPath) {
+  if (sticker?.fileUniqueId && ctxPayload.MediaPath) {
    const agentDir = resolveAgentDir(cfg, route.agentId);
-    const description = await describeStickerImage({
-      imagePath: ctxPayload.MediaPath,
-      cfg,
-      agentDir,
-      agentId: route.agentId,
-    });
+    const stickerSupportsVision = await resolveStickerVisionSupport(cfg, route.agentId);
+    let description = sticker.cachedDescription ?? null;
+    if (!description) {
+      description = await describeStickerImage({
+        imagePath: ctxPayload.MediaPath,
+        cfg,
+        agentDir,
+        agentId: route.agentId,
+      });
+    }
    if (description) {
      // Format the description with sticker context
      const stickerContext = [sticker.emoji, sticker.setName ? `from "${sticker.setName}"` : null]
@@ -148,17 +170,19 @@ export const dispatchTelegramMessage = async ({
        .join(" ");
      const formattedDesc = `[Sticker${stickerContext ? ` ${stickerContext}` : ""}] ${description}`;

-      // Update context to use description instead of image
      sticker.cachedDescription = description;
-      ctxPayload.Body = formattedDesc;
-      ctxPayload.BodyForAgent = formattedDesc;
-      // Clear media paths so native vision doesn't process the image again
-      ctxPayload.MediaPath = undefined;
-      ctxPayload.MediaType = undefined;
-      ctxPayload.MediaUrl = undefined;
-      ctxPayload.MediaPaths = undefined;
-      ctxPayload.MediaUrls = undefined;
-      ctxPayload.MediaTypes = undefined;
+      if (!stickerSupportsVision) {
+        // Update context to use description instead of image
+        ctxPayload.Body = formattedDesc;
+        ctxPayload.BodyForAgent = formattedDesc;
+        // Clear media paths so native vision doesn't process the image again
+        ctxPayload.MediaPath = undefined;
+        ctxPayload.MediaType = undefined;
+        ctxPayload.MediaUrl = undefined;
+        ctxPayload.MediaPaths = undefined;
+        ctxPayload.MediaUrls = undefined;
+        ctxPayload.MediaTypes = undefined;
+      }

      // Cache the description for future encounters
      cacheSticker({