Files
clawdbot/src/media-understanding/types.ts
2026-01-17 08:53:42 +00:00

115 lines
2.6 KiB
TypeScript

export type MediaUnderstandingKind =
| "audio.transcription"
| "video.description"
| "image.description";
export type MediaUnderstandingCapability = "image" | "audio" | "video";
export type MediaAttachment = {
path?: string;
url?: string;
mime?: string;
index: number;
};
export type MediaUnderstandingOutput = {
kind: MediaUnderstandingKind;
attachmentIndex: number;
text: string;
provider: string;
model?: string;
};
export type MediaUnderstandingDecisionOutcome =
| "success"
| "skipped"
| "disabled"
| "no-attachment"
| "scope-deny";
export type MediaUnderstandingModelDecision = {
provider?: string;
model?: string;
type: "provider" | "cli";
outcome: "success" | "skipped" | "failed";
reason?: string;
};
export type MediaUnderstandingAttachmentDecision = {
attachmentIndex: number;
attempts: MediaUnderstandingModelDecision[];
chosen?: MediaUnderstandingModelDecision;
};
export type MediaUnderstandingDecision = {
capability: MediaUnderstandingCapability;
outcome: MediaUnderstandingDecisionOutcome;
attachments: MediaUnderstandingAttachmentDecision[];
};
export type AudioTranscriptionRequest = {
buffer: Buffer;
fileName: string;
mime?: string;
apiKey: string;
baseUrl?: string;
headers?: Record<string, string>;
model?: string;
language?: string;
prompt?: string;
query?: Record<string, string | number | boolean>;
timeoutMs: number;
fetchFn?: typeof fetch;
};
export type AudioTranscriptionResult = {
text: string;
model?: string;
};
export type VideoDescriptionRequest = {
buffer: Buffer;
fileName: string;
mime?: string;
apiKey: string;
baseUrl?: string;
headers?: Record<string, string>;
model?: string;
prompt?: string;
timeoutMs: number;
fetchFn?: typeof fetch;
};
export type VideoDescriptionResult = {
text: string;
model?: string;
};
export type ImageDescriptionRequest = {
buffer: Buffer;
fileName: string;
mime?: string;
model: string;
provider: string;
prompt?: string;
maxTokens?: number;
timeoutMs: number;
profile?: string;
preferredProfile?: string;
agentDir: string;
cfg: import("../config/config.js").ClawdbotConfig;
};
export type ImageDescriptionResult = {
text: string;
model?: string;
};
export type MediaUnderstandingProvider = {
id: string;
capabilities?: MediaUnderstandingCapability[];
transcribeAudio?: (req: AudioTranscriptionRequest) => Promise<AudioTranscriptionResult>;
describeVideo?: (req: VideoDescriptionRequest) => Promise<VideoDescriptionResult>;
describeImage?: (req: ImageDescriptionRequest) => Promise<ImageDescriptionResult>;
};