fix(telegram): improve sticker vision + cache (#2548) (thanks @longjos)
This commit is contained in:
@@ -53,6 +53,7 @@ Status: unreleased.
|
|||||||
- Telegram: keep topic IDs in restart sentinel notifications. (#1807) Thanks @hsrvc.
|
- Telegram: keep topic IDs in restart sentinel notifications. (#1807) Thanks @hsrvc.
|
||||||
- Telegram: add optional silent send flag (disable notifications). (#2382) Thanks @Suksham-sharma.
|
- Telegram: add optional silent send flag (disable notifications). (#2382) Thanks @Suksham-sharma.
|
||||||
- Telegram: support editing sent messages via message(action="edit"). (#2394) Thanks @marcelomar21.
|
- Telegram: support editing sent messages via message(action="edit"). (#2394) Thanks @marcelomar21.
|
||||||
|
- Telegram: add sticker receive/send with vision caching. (#2548) Thanks @longjos.
|
||||||
- Config: apply config.env before ${VAR} substitution. (#1813) Thanks @spanishflu-est1918.
|
- Config: apply config.env before ${VAR} substitution. (#1813) Thanks @spanishflu-est1918.
|
||||||
- Slack: clear ack reaction after streamed replies. (#2044) Thanks @fancyboi999.
|
- Slack: clear ack reaction after streamed replies. (#2044) Thanks @fancyboi999.
|
||||||
- macOS: keep custom SSH usernames in remote target. (#2046) Thanks @algal.
|
- macOS: keep custom SSH usernames in remote target. (#2046) Thanks @algal.
|
||||||
|
|||||||
@@ -395,10 +395,13 @@ When a user sends a sticker, Clawdbot handles it based on the sticker type:
|
|||||||
- **Animated stickers (TGS):** Skipped (Lottie format not supported for processing).
|
- **Animated stickers (TGS):** Skipped (Lottie format not supported for processing).
|
||||||
- **Video stickers (WEBM):** Skipped (video format not supported for processing).
|
- **Video stickers (WEBM):** Skipped (video format not supported for processing).
|
||||||
|
|
||||||
Template context fields available when receiving stickers:
|
Template context field available when receiving stickers:
|
||||||
- `StickerEmoji` — the emoji associated with the sticker
|
- `Sticker` — object with:
|
||||||
- `StickerSetName` — the name of the sticker set
|
- `emoji` — emoji associated with the sticker
|
||||||
- `StickerFileId` — the Telegram file ID (used for sending the same sticker back)
|
- `setName` — name of the sticker set
|
||||||
|
- `fileId` — Telegram file ID (send the same sticker back)
|
||||||
|
- `fileUniqueId` — stable ID for cache lookup
|
||||||
|
- `cachedDescription` — cached vision description when available
|
||||||
|
|
||||||
### Sticker cache
|
### Sticker cache
|
||||||
|
|
||||||
@@ -416,10 +419,11 @@ Stickers are processed through the AI's vision capabilities to generate descript
|
|||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"fileId": "CAACAgIAAxkBAAI...",
|
"fileId": "CAACAgIAAxkBAAI...",
|
||||||
|
"fileUniqueId": "AgADBAADb6cxG2Y",
|
||||||
"emoji": "👋",
|
"emoji": "👋",
|
||||||
"setName": "CoolCats",
|
"setName": "CoolCats",
|
||||||
"description": "A cartoon cat waving enthusiastically",
|
"description": "A cartoon cat waving enthusiastically",
|
||||||
"addedAt": "2026-01-15T10:30:00.000Z"
|
"cachedAt": "2026-01-15T10:30:00.000Z"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -458,7 +462,7 @@ The agent can send and search stickers using the `sticker` and `sticker-search`
|
|||||||
```
|
```
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
- `fileId` (required) — the Telegram file ID of the sticker. Obtain this from `StickerFileId` when receiving a sticker, or from a `sticker-search` result.
|
- `fileId` (required) — the Telegram file ID of the sticker. Obtain this from `Sticker.fileId` when receiving a sticker, or from a `sticker-search` result.
|
||||||
- `replyTo` (optional) — message ID to reply to.
|
- `replyTo` (optional) — message ID to reply to.
|
||||||
- `threadId` (optional) — message thread ID for forum topics.
|
- `threadId` (optional) — message thread ID for forum topics.
|
||||||
|
|
||||||
@@ -543,7 +547,7 @@ Outbound Telegram API calls retry on transient network/429 errors with exponenti
|
|||||||
- Tool: `telegram` with `react` action (`chatId`, `messageId`, `emoji`).
|
- Tool: `telegram` with `react` action (`chatId`, `messageId`, `emoji`).
|
||||||
- Tool: `telegram` with `deleteMessage` action (`chatId`, `messageId`).
|
- Tool: `telegram` with `deleteMessage` action (`chatId`, `messageId`).
|
||||||
- Reaction removal semantics: see [/tools/reactions](/tools/reactions).
|
- Reaction removal semantics: see [/tools/reactions](/tools/reactions).
|
||||||
- Tool gating: `channels.telegram.actions.reactions`, `channels.telegram.actions.sendMessage`, `channels.telegram.actions.deleteMessage` (default: enabled).
|
- Tool gating: `channels.telegram.actions.reactions`, `channels.telegram.actions.sendMessage`, `channels.telegram.actions.deleteMessage` (default: enabled), and `channels.telegram.actions.sticker` (default: disabled).
|
||||||
|
|
||||||
## Reaction notifications
|
## Reaction notifications
|
||||||
|
|
||||||
|
|||||||
@@ -8,12 +8,17 @@ const sendMessageTelegram = vi.fn(async () => ({
|
|||||||
messageId: "789",
|
messageId: "789",
|
||||||
chatId: "123",
|
chatId: "123",
|
||||||
}));
|
}));
|
||||||
|
const sendStickerTelegram = vi.fn(async () => ({
|
||||||
|
messageId: "456",
|
||||||
|
chatId: "123",
|
||||||
|
}));
|
||||||
const deleteMessageTelegram = vi.fn(async () => ({ ok: true }));
|
const deleteMessageTelegram = vi.fn(async () => ({ ok: true }));
|
||||||
const originalToken = process.env.TELEGRAM_BOT_TOKEN;
|
const originalToken = process.env.TELEGRAM_BOT_TOKEN;
|
||||||
|
|
||||||
vi.mock("../../telegram/send.js", () => ({
|
vi.mock("../../telegram/send.js", () => ({
|
||||||
reactMessageTelegram: (...args: unknown[]) => reactMessageTelegram(...args),
|
reactMessageTelegram: (...args: unknown[]) => reactMessageTelegram(...args),
|
||||||
sendMessageTelegram: (...args: unknown[]) => sendMessageTelegram(...args),
|
sendMessageTelegram: (...args: unknown[]) => sendMessageTelegram(...args),
|
||||||
|
sendStickerTelegram: (...args: unknown[]) => sendStickerTelegram(...args),
|
||||||
deleteMessageTelegram: (...args: unknown[]) => deleteMessageTelegram(...args),
|
deleteMessageTelegram: (...args: unknown[]) => deleteMessageTelegram(...args),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
@@ -21,6 +26,7 @@ describe("handleTelegramAction", () => {
|
|||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
reactMessageTelegram.mockClear();
|
reactMessageTelegram.mockClear();
|
||||||
sendMessageTelegram.mockClear();
|
sendMessageTelegram.mockClear();
|
||||||
|
sendStickerTelegram.mockClear();
|
||||||
deleteMessageTelegram.mockClear();
|
deleteMessageTelegram.mockClear();
|
||||||
process.env.TELEGRAM_BOT_TOKEN = "tok";
|
process.env.TELEGRAM_BOT_TOKEN = "tok";
|
||||||
});
|
});
|
||||||
@@ -96,6 +102,40 @@ describe("handleTelegramAction", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("rejects sticker actions when disabled by default", async () => {
|
||||||
|
const cfg = { channels: { telegram: { botToken: "tok" } } } as ClawdbotConfig;
|
||||||
|
await expect(
|
||||||
|
handleTelegramAction(
|
||||||
|
{
|
||||||
|
action: "sendSticker",
|
||||||
|
to: "123",
|
||||||
|
fileId: "sticker",
|
||||||
|
},
|
||||||
|
cfg,
|
||||||
|
),
|
||||||
|
).rejects.toThrow(/sticker actions are disabled/i);
|
||||||
|
expect(sendStickerTelegram).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("sends stickers when enabled", async () => {
|
||||||
|
const cfg = {
|
||||||
|
channels: { telegram: { botToken: "tok", actions: { sticker: true } } },
|
||||||
|
} as ClawdbotConfig;
|
||||||
|
await handleTelegramAction(
|
||||||
|
{
|
||||||
|
action: "sendSticker",
|
||||||
|
to: "123",
|
||||||
|
fileId: "sticker",
|
||||||
|
},
|
||||||
|
cfg,
|
||||||
|
);
|
||||||
|
expect(sendStickerTelegram).toHaveBeenCalledWith(
|
||||||
|
"123",
|
||||||
|
"sticker",
|
||||||
|
expect.objectContaining({ token: "tok" }),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it("removes reactions when remove flag set", async () => {
|
it("removes reactions when remove flag set", async () => {
|
||||||
const cfg = {
|
const cfg = {
|
||||||
channels: { telegram: { botToken: "tok", reactionLevel: "extensive" } },
|
channels: { telegram: { botToken: "tok", reactionLevel: "extensive" } },
|
||||||
|
|||||||
@@ -258,7 +258,7 @@ export async function handleTelegramAction(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (action === "sendSticker") {
|
if (action === "sendSticker") {
|
||||||
if (!isActionEnabled("sticker")) {
|
if (!isActionEnabled("sticker", false)) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"Telegram sticker actions are disabled. Set channels.telegram.actions.sticker to true.",
|
"Telegram sticker actions are disabled. Set channels.telegram.actions.sticker to true.",
|
||||||
);
|
);
|
||||||
@@ -291,7 +291,7 @@ export async function handleTelegramAction(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (action === "searchSticker") {
|
if (action === "searchSticker") {
|
||||||
if (!isActionEnabled("sticker")) {
|
if (!isActionEnabled("sticker", false)) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"Telegram sticker actions are disabled. Set channels.telegram.actions.sticker to true.",
|
"Telegram sticker actions are disabled. Set channels.telegram.actions.sticker to true.",
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -10,6 +10,13 @@ vi.mock("../../../agents/tools/telegram-actions.js", () => ({
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
describe("telegramMessageActions", () => {
|
describe("telegramMessageActions", () => {
|
||||||
|
it("excludes sticker actions when not enabled", () => {
|
||||||
|
const cfg = { channels: { telegram: { botToken: "tok" } } } as ClawdbotConfig;
|
||||||
|
const actions = telegramMessageActions.listActions({ cfg });
|
||||||
|
expect(actions).not.toContain("sticker");
|
||||||
|
expect(actions).not.toContain("sticker-search");
|
||||||
|
});
|
||||||
|
|
||||||
it("allows media-only sends and passes asVoice", async () => {
|
it("allows media-only sends and passes asVoice", async () => {
|
||||||
handleTelegramAction.mockClear();
|
handleTelegramAction.mockClear();
|
||||||
const cfg = { channels: { telegram: { botToken: "tok" } } } as ClawdbotConfig;
|
const cfg = { channels: { telegram: { botToken: "tok" } } } as ClawdbotConfig;
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ export const telegramMessageActions: ChannelMessageActionAdapter = {
|
|||||||
if (gate("reactions")) actions.add("react");
|
if (gate("reactions")) actions.add("react");
|
||||||
if (gate("deleteMessage")) actions.add("delete");
|
if (gate("deleteMessage")) actions.add("delete");
|
||||||
if (gate("editMessage")) actions.add("edit");
|
if (gate("editMessage")) actions.add("edit");
|
||||||
if (gate("sticker")) {
|
if (gate("sticker", false)) {
|
||||||
actions.add("sticker");
|
actions.add("sticker");
|
||||||
actions.add("sticker-search");
|
actions.add("sticker-search");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -412,6 +412,39 @@ async function resolveAutoEntries(params: {
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function resolveAutoImageModel(params: {
|
||||||
|
cfg: ClawdbotConfig;
|
||||||
|
agentDir?: string;
|
||||||
|
activeModel?: ActiveMediaModel;
|
||||||
|
}): Promise<ActiveMediaModel | null> {
|
||||||
|
const providerRegistry = buildProviderRegistry();
|
||||||
|
const toActive = (entry: MediaUnderstandingModelConfig | null): ActiveMediaModel | null => {
|
||||||
|
if (!entry || entry.type === "cli") return null;
|
||||||
|
const provider = entry.provider;
|
||||||
|
if (!provider) return null;
|
||||||
|
const model = entry.model ?? DEFAULT_IMAGE_MODELS[provider];
|
||||||
|
if (!model) return null;
|
||||||
|
return { provider, model };
|
||||||
|
};
|
||||||
|
const activeEntry = await resolveActiveModelEntry({
|
||||||
|
cfg: params.cfg,
|
||||||
|
agentDir: params.agentDir,
|
||||||
|
providerRegistry,
|
||||||
|
capability: "image",
|
||||||
|
activeModel: params.activeModel,
|
||||||
|
});
|
||||||
|
const resolvedActive = toActive(activeEntry);
|
||||||
|
if (resolvedActive) return resolvedActive;
|
||||||
|
const keyEntry = await resolveKeyEntry({
|
||||||
|
cfg: params.cfg,
|
||||||
|
agentDir: params.agentDir,
|
||||||
|
providerRegistry,
|
||||||
|
capability: "image",
|
||||||
|
activeModel: params.activeModel,
|
||||||
|
});
|
||||||
|
return toActive(keyEntry);
|
||||||
|
}
|
||||||
|
|
||||||
async function resolveActiveModelEntry(params: {
|
async function resolveActiveModelEntry(params: {
|
||||||
cfg: ClawdbotConfig;
|
cfg: ClawdbotConfig;
|
||||||
agentDir?: string;
|
agentDir?: string;
|
||||||
|
|||||||
@@ -139,6 +139,7 @@ export const dispatchTelegramMessage = async ({
|
|||||||
imagePath: ctxPayload.MediaPath,
|
imagePath: ctxPayload.MediaPath,
|
||||||
cfg,
|
cfg,
|
||||||
agentDir,
|
agentDir,
|
||||||
|
agentId: route.agentId,
|
||||||
});
|
});
|
||||||
if (description) {
|
if (description) {
|
||||||
// Format the description with sticker context
|
// Format the description with sticker context
|
||||||
|
|||||||
@@ -7,6 +7,9 @@ const middlewareUseSpy = vi.fn();
|
|||||||
const onSpy = vi.fn();
|
const onSpy = vi.fn();
|
||||||
const stopSpy = vi.fn();
|
const stopSpy = vi.fn();
|
||||||
const sendChatActionSpy = vi.fn();
|
const sendChatActionSpy = vi.fn();
|
||||||
|
const cacheStickerSpy = vi.fn();
|
||||||
|
const getCachedStickerSpy = vi.fn();
|
||||||
|
const describeStickerImageSpy = vi.fn();
|
||||||
|
|
||||||
type ApiStub = {
|
type ApiStub = {
|
||||||
config: { use: (arg: unknown) => void };
|
config: { use: (arg: unknown) => void };
|
||||||
@@ -79,6 +82,12 @@ vi.mock("../config/sessions.js", async (importOriginal) => {
|
|||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
vi.mock("./sticker-cache.js", () => ({
|
||||||
|
cacheSticker: (...args: unknown[]) => cacheStickerSpy(...args),
|
||||||
|
getCachedSticker: (...args: unknown[]) => getCachedStickerSpy(...args),
|
||||||
|
describeStickerImage: (...args: unknown[]) => describeStickerImageSpy(...args),
|
||||||
|
}));
|
||||||
|
|
||||||
vi.mock("./pairing-store.js", () => ({
|
vi.mock("./pairing-store.js", () => ({
|
||||||
readTelegramAllowFromStore: vi.fn(async () => [] as string[]),
|
readTelegramAllowFromStore: vi.fn(async () => [] as string[]),
|
||||||
upsertTelegramPairingRequest: vi.fn(async () => ({
|
upsertTelegramPairingRequest: vi.fn(async () => ({
|
||||||
@@ -408,6 +417,12 @@ describe("telegram media groups", () => {
|
|||||||
describe("telegram stickers", () => {
|
describe("telegram stickers", () => {
|
||||||
const STICKER_TEST_TIMEOUT_MS = process.platform === "win32" ? 30_000 : 20_000;
|
const STICKER_TEST_TIMEOUT_MS = process.platform === "win32" ? 30_000 : 20_000;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
cacheStickerSpy.mockReset();
|
||||||
|
getCachedStickerSpy.mockReset();
|
||||||
|
describeStickerImageSpy.mockReset();
|
||||||
|
});
|
||||||
|
|
||||||
it(
|
it(
|
||||||
"downloads static sticker (WEBP) and includes sticker metadata",
|
"downloads static sticker (WEBP) and includes sticker metadata",
|
||||||
async () => {
|
async () => {
|
||||||
@@ -481,6 +496,88 @@ describe("telegram stickers", () => {
|
|||||||
STICKER_TEST_TIMEOUT_MS,
|
STICKER_TEST_TIMEOUT_MS,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
it(
|
||||||
|
"refreshes cached sticker metadata on cache hit",
|
||||||
|
async () => {
|
||||||
|
const { createTelegramBot } = await import("./bot.js");
|
||||||
|
const replyModule = await import("../auto-reply/reply.js");
|
||||||
|
const replySpy = replyModule.__replySpy as unknown as ReturnType<typeof vi.fn>;
|
||||||
|
|
||||||
|
onSpy.mockReset();
|
||||||
|
replySpy.mockReset();
|
||||||
|
sendChatActionSpy.mockReset();
|
||||||
|
|
||||||
|
getCachedStickerSpy.mockReturnValue({
|
||||||
|
fileId: "old_file_id",
|
||||||
|
fileUniqueId: "sticker_unique_456",
|
||||||
|
emoji: "😴",
|
||||||
|
setName: "OldSet",
|
||||||
|
description: "Cached description",
|
||||||
|
cachedAt: "2026-01-20T10:00:00.000Z",
|
||||||
|
});
|
||||||
|
|
||||||
|
const runtimeError = vi.fn();
|
||||||
|
createTelegramBot({
|
||||||
|
token: "tok",
|
||||||
|
runtime: {
|
||||||
|
log: vi.fn(),
|
||||||
|
error: runtimeError,
|
||||||
|
exit: () => {
|
||||||
|
throw new Error("exit");
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const handler = onSpy.mock.calls.find((call) => call[0] === "message")?.[1] as (
|
||||||
|
ctx: Record<string, unknown>,
|
||||||
|
) => Promise<void>;
|
||||||
|
expect(handler).toBeDefined();
|
||||||
|
|
||||||
|
const fetchSpy = vi.spyOn(globalThis, "fetch" as never).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
status: 200,
|
||||||
|
statusText: "OK",
|
||||||
|
headers: { get: () => "image/webp" },
|
||||||
|
arrayBuffer: async () => new Uint8Array([0x52, 0x49, 0x46, 0x46]).buffer,
|
||||||
|
} as Response);
|
||||||
|
|
||||||
|
await handler({
|
||||||
|
message: {
|
||||||
|
message_id: 103,
|
||||||
|
chat: { id: 1234, type: "private" },
|
||||||
|
sticker: {
|
||||||
|
file_id: "new_file_id",
|
||||||
|
file_unique_id: "sticker_unique_456",
|
||||||
|
type: "regular",
|
||||||
|
width: 512,
|
||||||
|
height: 512,
|
||||||
|
is_animated: false,
|
||||||
|
is_video: false,
|
||||||
|
emoji: "🔥",
|
||||||
|
set_name: "NewSet",
|
||||||
|
},
|
||||||
|
date: 1736380800,
|
||||||
|
},
|
||||||
|
me: { username: "clawdbot_bot" },
|
||||||
|
getFile: async () => ({ file_path: "stickers/sticker.webp" }),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(runtimeError).not.toHaveBeenCalled();
|
||||||
|
expect(cacheStickerSpy).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
fileId: "new_file_id",
|
||||||
|
emoji: "🔥",
|
||||||
|
setName: "NewSet",
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
const payload = replySpy.mock.calls[0][0];
|
||||||
|
expect(payload.Sticker?.fileId).toBe("new_file_id");
|
||||||
|
expect(payload.Sticker?.cachedDescription).toBe("Cached description");
|
||||||
|
|
||||||
|
fetchSpy.mockRestore();
|
||||||
|
},
|
||||||
|
STICKER_TEST_TIMEOUT_MS,
|
||||||
|
);
|
||||||
|
|
||||||
it(
|
it(
|
||||||
"skips animated stickers (TGS format)",
|
"skips animated stickers (TGS format)",
|
||||||
async () => {
|
async () => {
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ import { buildInlineKeyboard } from "../send.js";
|
|||||||
import { resolveTelegramVoiceSend } from "../voice.js";
|
import { resolveTelegramVoiceSend } from "../voice.js";
|
||||||
import { buildTelegramThreadParams, resolveTelegramReplyId } from "./helpers.js";
|
import { buildTelegramThreadParams, resolveTelegramReplyId } from "./helpers.js";
|
||||||
import type { StickerMetadata, TelegramContext } from "./types.js";
|
import type { StickerMetadata, TelegramContext } from "./types.js";
|
||||||
import { getCachedSticker } from "../sticker-cache.js";
|
import { cacheSticker, getCachedSticker } from "../sticker-cache.js";
|
||||||
|
|
||||||
const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i;
|
const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i;
|
||||||
const VOICE_FORBIDDEN_RE = /VOICE_MESSAGES_FORBIDDEN/;
|
const VOICE_FORBIDDEN_RE = /VOICE_MESSAGES_FORBIDDEN/;
|
||||||
@@ -303,14 +303,26 @@ export async function resolveMedia(
|
|||||||
const cached = sticker.file_unique_id ? getCachedSticker(sticker.file_unique_id) : null;
|
const cached = sticker.file_unique_id ? getCachedSticker(sticker.file_unique_id) : null;
|
||||||
if (cached) {
|
if (cached) {
|
||||||
logVerbose(`telegram: sticker cache hit for ${sticker.file_unique_id}`);
|
logVerbose(`telegram: sticker cache hit for ${sticker.file_unique_id}`);
|
||||||
|
const fileId = sticker.file_id ?? cached.fileId;
|
||||||
|
const emoji = sticker.emoji ?? cached.emoji;
|
||||||
|
const setName = sticker.set_name ?? cached.setName;
|
||||||
|
if (fileId !== cached.fileId || emoji !== cached.emoji || setName !== cached.setName) {
|
||||||
|
// Refresh cached sticker metadata on hits so sends/searches use latest file_id.
|
||||||
|
cacheSticker({
|
||||||
|
...cached,
|
||||||
|
fileId,
|
||||||
|
emoji,
|
||||||
|
setName,
|
||||||
|
});
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
path: saved.path,
|
path: saved.path,
|
||||||
contentType: saved.contentType,
|
contentType: saved.contentType,
|
||||||
placeholder: "<media:sticker>",
|
placeholder: "<media:sticker>",
|
||||||
stickerMetadata: {
|
stickerMetadata: {
|
||||||
emoji: cached.emoji,
|
emoji,
|
||||||
setName: cached.setName,
|
setName,
|
||||||
fileId: cached.fileId,
|
fileId,
|
||||||
fileUniqueId: sticker.file_unique_id,
|
fileUniqueId: sticker.file_unique_id,
|
||||||
cachedDescription: cached.description,
|
cachedDescription: cached.description,
|
||||||
},
|
},
|
||||||
@@ -330,7 +342,7 @@ export async function resolveMedia(
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logVerbose(`telegram: failed to process sticker: ${err}`);
|
logVerbose(`telegram: failed to process sticker: ${String(err)}`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,13 @@ import type { ClawdbotConfig } from "../config/config.js";
|
|||||||
import { STATE_DIR_CLAWDBOT } from "../config/paths.js";
|
import { STATE_DIR_CLAWDBOT } from "../config/paths.js";
|
||||||
import { loadJsonFile, saveJsonFile } from "../infra/json-file.js";
|
import { loadJsonFile, saveJsonFile } from "../infra/json-file.js";
|
||||||
import { logVerbose } from "../globals.js";
|
import { logVerbose } from "../globals.js";
|
||||||
import { resolveApiKeyForProvider } from "../agents/model-auth.js";
|
import {
|
||||||
|
findModelInCatalog,
|
||||||
|
loadModelCatalog,
|
||||||
|
modelSupportsVision,
|
||||||
|
} from "../agents/model-catalog.js";
|
||||||
|
import { resolveDefaultModelForAgent } from "../agents/model-selection.js";
|
||||||
|
import { resolveAutoImageModel } from "../media-understanding/runner.js";
|
||||||
|
|
||||||
const CACHE_FILE = path.join(STATE_DIR_CLAWDBOT, "telegram", "sticker-cache.json");
|
const CACHE_FILE = path.join(STATE_DIR_CLAWDBOT, "telegram", "sticker-cache.json");
|
||||||
const CACHE_VERSION = 1;
|
const CACHE_VERSION = 1;
|
||||||
@@ -135,18 +141,11 @@ export function getCacheStats(): { count: number; oldestAt?: string; newestAt?:
|
|||||||
const STICKER_DESCRIPTION_PROMPT =
|
const STICKER_DESCRIPTION_PROMPT =
|
||||||
"Describe this sticker image in 1-2 sentences. Focus on what the sticker depicts (character, object, action, emotion). Be concise and objective.";
|
"Describe this sticker image in 1-2 sentences. Focus on what the sticker depicts (character, object, action, emotion). Be concise and objective.";
|
||||||
|
|
||||||
const VISION_PROVIDERS = ["anthropic", "openai", "google", "minimax"] as const;
|
|
||||||
const DEFAULT_VISION_MODELS: Record<string, string> = {
|
|
||||||
anthropic: "claude-sonnet-4-20250514",
|
|
||||||
openai: "gpt-4o-mini",
|
|
||||||
google: "gemini-2.0-flash",
|
|
||||||
minimax: "MiniMax-VL-01",
|
|
||||||
};
|
|
||||||
|
|
||||||
export interface DescribeStickerParams {
|
export interface DescribeStickerParams {
|
||||||
imagePath: string;
|
imagePath: string;
|
||||||
cfg: ClawdbotConfig;
|
cfg: ClawdbotConfig;
|
||||||
agentDir?: string;
|
agentDir?: string;
|
||||||
|
agentId?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -155,26 +154,35 @@ export interface DescribeStickerParams {
|
|||||||
* Returns null if no vision provider is available.
|
* Returns null if no vision provider is available.
|
||||||
*/
|
*/
|
||||||
export async function describeStickerImage(params: DescribeStickerParams): Promise<string | null> {
|
export async function describeStickerImage(params: DescribeStickerParams): Promise<string | null> {
|
||||||
const { imagePath, cfg, agentDir } = params;
|
const { imagePath, cfg, agentDir, agentId } = params;
|
||||||
|
|
||||||
// Find a vision provider with available API key
|
const defaultModel = resolveDefaultModelForAgent({ cfg, agentId });
|
||||||
let provider: string | null = null;
|
let activeModel = undefined as { provider: string; model: string } | undefined;
|
||||||
for (const p of VISION_PROVIDERS) {
|
try {
|
||||||
try {
|
const catalog = await loadModelCatalog({ config: cfg });
|
||||||
await resolveApiKeyForProvider({ provider: p, cfg, agentDir });
|
const entry = findModelInCatalog(catalog, defaultModel.provider, defaultModel.model);
|
||||||
provider = p;
|
if (modelSupportsVision(entry)) {
|
||||||
break;
|
activeModel = { provider: defaultModel.provider, model: defaultModel.model };
|
||||||
} catch {
|
|
||||||
// No key for this provider, try next
|
|
||||||
}
|
}
|
||||||
|
} catch {
|
||||||
|
// Ignore catalog failures; fall back to auto selection.
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!provider) {
|
const resolved = await resolveAutoImageModel({
|
||||||
|
cfg,
|
||||||
|
agentDir,
|
||||||
|
activeModel,
|
||||||
|
});
|
||||||
|
if (!resolved) {
|
||||||
logVerbose("telegram: no vision provider available for sticker description");
|
logVerbose("telegram: no vision provider available for sticker description");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const model = DEFAULT_VISION_MODELS[provider];
|
const { provider, model } = resolved;
|
||||||
|
if (!model) {
|
||||||
|
logVerbose(`telegram: no vision model available for ${provider}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
logVerbose(`telegram: describing sticker with ${provider}/${model}`);
|
logVerbose(`telegram: describing sticker with ${provider}/${model}`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -195,7 +203,7 @@ export async function describeStickerImage(params: DescribeStickerParams): Promi
|
|||||||
});
|
});
|
||||||
return result.text;
|
return result.text;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logVerbose(`telegram: failed to describe sticker: ${err}`);
|
logVerbose(`telegram: failed to describe sticker: ${String(err)}`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user