refactor: extend media understanding

This commit is contained in:
Peter Steinberger
2026-01-17 07:17:09 +00:00
parent 86a46874da
commit 0c0e1e4226
16 changed files with 674 additions and 550 deletions

View File

@@ -3,5 +3,6 @@ import { describeImageWithModel } from "../image.js";
export const anthropicProvider: MediaUnderstandingProvider = {
id: "anthropic",
capabilities: ["image"],
describeImage: describeImageWithModel,
};

View File

@@ -4,6 +4,7 @@ import { describeGeminiVideo } from "./video.js";
export const googleProvider: MediaUnderstandingProvider = {
id: "google",
capabilities: ["image", "audio", "video"],
describeImage: describeImageWithModel,
describeVideo: describeGeminiVideo,
};

View File

@@ -5,6 +5,7 @@ const DEFAULT_GROQ_AUDIO_BASE_URL = "https://api.groq.com/openai/v1";
export const groqProvider: MediaUnderstandingProvider = {
id: "groq",
capabilities: ["audio"],
transcribeAudio: (req) =>
transcribeOpenAiCompatibleAudio({
...req,

View File

@@ -29,7 +29,16 @@ export function buildMediaUnderstandingRegistry(
}
if (overrides) {
for (const [key, provider] of Object.entries(overrides)) {
registry.set(normalizeMediaProviderId(key), provider);
const normalizedKey = normalizeMediaProviderId(key);
const existing = registry.get(normalizedKey);
const merged = existing
? {
...existing,
...provider,
capabilities: provider.capabilities ?? existing.capabilities,
}
: provider;
registry.set(normalizedKey, merged);
}
}
return registry;

View File

@@ -3,5 +3,6 @@ import { describeImageWithModel } from "../image.js";
export const minimaxProvider: MediaUnderstandingProvider = {
id: "minimax",
capabilities: ["image"],
describeImage: describeImageWithModel,
};

View File

@@ -4,6 +4,7 @@ import { transcribeOpenAiCompatibleAudio } from "./audio.js";
export const openaiProvider: MediaUnderstandingProvider = {
id: "openai",
capabilities: ["image"],
describeImage: describeImageWithModel,
transcribeAudio: transcribeOpenAiCompatibleAudio,
};