refactor: unify media understanding pipeline
This commit is contained in:
@@ -107,14 +107,18 @@ const FIELD_LABELS: Record<string, string> = {
|
||||
"tools.media.image.maxChars": "Image Understanding Max Chars",
|
||||
"tools.media.image.prompt": "Image Understanding Prompt",
|
||||
"tools.media.image.timeoutSeconds": "Image Understanding Timeout (sec)",
|
||||
"tools.media.image.attachments": "Image Understanding Attachment Policy",
|
||||
"tools.media.image.models": "Image Understanding Models",
|
||||
"tools.media.image.scope": "Image Understanding Scope",
|
||||
"tools.media.models": "Media Understanding Shared Models",
|
||||
"tools.media.concurrency": "Media Understanding Concurrency",
|
||||
"tools.media.audio.enabled": "Enable Audio Understanding",
|
||||
"tools.media.audio.maxBytes": "Audio Understanding Max Bytes",
|
||||
"tools.media.audio.maxChars": "Audio Understanding Max Chars",
|
||||
"tools.media.audio.prompt": "Audio Understanding Prompt",
|
||||
"tools.media.audio.timeoutSeconds": "Audio Understanding Timeout (sec)",
|
||||
"tools.media.audio.language": "Audio Understanding Language",
|
||||
"tools.media.audio.attachments": "Audio Understanding Attachment Policy",
|
||||
"tools.media.audio.models": "Audio Understanding Models",
|
||||
"tools.media.audio.scope": "Audio Understanding Scope",
|
||||
"tools.media.video.enabled": "Enable Video Understanding",
|
||||
@@ -122,6 +126,7 @@ const FIELD_LABELS: Record<string, string> = {
|
||||
"tools.media.video.maxChars": "Video Understanding Max Chars",
|
||||
"tools.media.video.prompt": "Video Understanding Prompt",
|
||||
"tools.media.video.timeoutSeconds": "Video Understanding Timeout (sec)",
|
||||
"tools.media.video.attachments": "Video Understanding Attachment Policy",
|
||||
"tools.media.video.models": "Video Understanding Models",
|
||||
"tools.media.video.scope": "Video Understanding Scope",
|
||||
"tools.profile": "Tool Profile",
|
||||
|
||||
@@ -18,6 +18,15 @@ export type MediaUnderstandingScopeConfig = {
|
||||
|
||||
export type MediaUnderstandingCapability = "image" | "audio" | "video";
|
||||
|
||||
export type MediaUnderstandingAttachmentsConfig = {
|
||||
/** Select the first matching attachment or process multiple. */
|
||||
mode?: "first" | "all";
|
||||
/** Max number of attachments to process (default: 1). */
|
||||
maxAttachments?: number;
|
||||
/** Attachment ordering preference. */
|
||||
prefer?: "first" | "last" | "path" | "url";
|
||||
};
|
||||
|
||||
export type MediaUnderstandingModelConfig = {
|
||||
/** provider API id (e.g. openai, google). */
|
||||
provider?: string;
|
||||
@@ -62,11 +71,17 @@ export type MediaUnderstandingConfig = {
|
||||
timeoutSeconds?: number;
|
||||
/** Default language hint (audio). */
|
||||
language?: string;
|
||||
/** Attachment selection policy. */
|
||||
attachments?: MediaUnderstandingAttachmentsConfig;
|
||||
/** Ordered model list (fallbacks in order). */
|
||||
models?: MediaUnderstandingModelConfig[];
|
||||
};
|
||||
|
||||
export type MediaToolsConfig = {
|
||||
/** Shared model list applied across image/audio/video. */
|
||||
models?: MediaUnderstandingModelConfig[];
|
||||
/** Max concurrent media understanding runs. */
|
||||
concurrency?: number;
|
||||
image?: MediaUnderstandingConfig;
|
||||
audio?: MediaUnderstandingConfig;
|
||||
video?: MediaUnderstandingConfig;
|
||||
|
||||
@@ -271,6 +271,14 @@ export const MediaUnderstandingCapabilitiesSchema = z
|
||||
.array(z.union([z.literal("image"), z.literal("audio"), z.literal("video")]))
|
||||
.optional();
|
||||
|
||||
export const MediaUnderstandingAttachmentsSchema = z
|
||||
.object({
|
||||
mode: z.union([z.literal("first"), z.literal("all")]).optional(),
|
||||
maxAttachments: z.number().int().positive().optional(),
|
||||
prefer: z.union([z.literal("first"), z.literal("last"), z.literal("path"), z.literal("url")]).optional(),
|
||||
})
|
||||
.optional();
|
||||
|
||||
export const MediaUnderstandingModelSchema = z
|
||||
.object({
|
||||
provider: z.string().optional(),
|
||||
@@ -298,12 +306,15 @@ export const ToolsMediaUnderstandingSchema = z
|
||||
prompt: z.string().optional(),
|
||||
timeoutSeconds: z.number().int().positive().optional(),
|
||||
language: z.string().optional(),
|
||||
attachments: MediaUnderstandingAttachmentsSchema,
|
||||
models: z.array(MediaUnderstandingModelSchema).optional(),
|
||||
})
|
||||
.optional();
|
||||
|
||||
export const ToolsMediaSchema = z
|
||||
.object({
|
||||
models: z.array(MediaUnderstandingModelSchema).optional(),
|
||||
concurrency: z.number().int().positive().optional(),
|
||||
image: ToolsMediaUnderstandingSchema.optional(),
|
||||
audio: ToolsMediaUnderstandingSchema.optional(),
|
||||
video: ToolsMediaUnderstandingSchema.optional(),
|
||||
|
||||
Reference in New Issue
Block a user