fix(telegram): send sticker pixels to vision models

This commit is contained in:
Ayaan Zaidi
2026-01-27 13:33:04 +05:30
committed by Ayaan Zaidi
parent 2ad550abe8
commit cc80495baa
3 changed files with 132 additions and 29 deletions

View File

@@ -1,5 +1,11 @@
// @ts-nocheck
import { EmbeddedBlockChunker } from "../agents/pi-embedded-block-chunker.js";
import {
findModelInCatalog,
loadModelCatalog,
modelSupportsVision,
} from "../agents/model-catalog.js";
import { resolveDefaultModelForAgent } from "../agents/model-selection.js";
import { resolveChunkMode } from "../auto-reply/chunk.js";
import { clearHistoryEntriesIfEnabled } from "../auto-reply/reply/history.js";
import { dispatchReplyWithBufferedBlockDispatcher } from "../auto-reply/reply/provider-dispatcher.js";
@@ -15,6 +21,18 @@ import { createTelegramDraftStream } from "./draft-stream.js";
import { cacheSticker, describeStickerImage } from "./sticker-cache.js";
import { resolveAgentDir } from "../agents/agent-scope.js";
async function resolveStickerVisionSupport(cfg, agentId) {
try {
const catalog = await loadModelCatalog({ config: cfg });
const defaultModel = resolveDefaultModelForAgent({ cfg, agentId });
const entry = findModelInCatalog(catalog, defaultModel.provider, defaultModel.model);
if (!entry) return false;
return entry.input ? modelSupportsVision(entry) : true;
} catch {
return false;
}
}
export const dispatchTelegramMessage = async ({
context,
bot,
@@ -133,14 +151,18 @@ export const dispatchTelegramMessage = async ({
// Handle uncached stickers: get a dedicated vision description before dispatch
// This ensures we cache a raw description rather than a conversational response
const sticker = ctxPayload.Sticker;
if (sticker?.fileUniqueId && !sticker.cachedDescription && ctxPayload.MediaPath) {
if (sticker?.fileUniqueId && ctxPayload.MediaPath) {
const agentDir = resolveAgentDir(cfg, route.agentId);
const description = await describeStickerImage({
imagePath: ctxPayload.MediaPath,
cfg,
agentDir,
agentId: route.agentId,
});
const stickerSupportsVision = await resolveStickerVisionSupport(cfg, route.agentId);
let description = sticker.cachedDescription ?? null;
if (!description) {
description = await describeStickerImage({
imagePath: ctxPayload.MediaPath,
cfg,
agentDir,
agentId: route.agentId,
});
}
if (description) {
// Format the description with sticker context
const stickerContext = [sticker.emoji, sticker.setName ? `from "${sticker.setName}"` : null]
@@ -148,17 +170,19 @@ export const dispatchTelegramMessage = async ({
.join(" ");
const formattedDesc = `[Sticker${stickerContext ? ` ${stickerContext}` : ""}] ${description}`;
// Update context to use description instead of image
sticker.cachedDescription = description;
ctxPayload.Body = formattedDesc;
ctxPayload.BodyForAgent = formattedDesc;
// Clear media paths so native vision doesn't process the image again
ctxPayload.MediaPath = undefined;
ctxPayload.MediaType = undefined;
ctxPayload.MediaUrl = undefined;
ctxPayload.MediaPaths = undefined;
ctxPayload.MediaUrls = undefined;
ctxPayload.MediaTypes = undefined;
if (!stickerSupportsVision) {
// Update context to use description instead of image
ctxPayload.Body = formattedDesc;
ctxPayload.BodyForAgent = formattedDesc;
// Clear media paths so native vision doesn't process the image again
ctxPayload.MediaPath = undefined;
ctxPayload.MediaType = undefined;
ctxPayload.MediaUrl = undefined;
ctxPayload.MediaPaths = undefined;
ctxPayload.MediaUrls = undefined;
ctxPayload.MediaTypes = undefined;
}
// Cache the description for future encounters
cacheSticker({