115 lines
2.6 KiB
TypeScript
115 lines
2.6 KiB
TypeScript
export type MediaUnderstandingKind =
|
|
| "audio.transcription"
|
|
| "video.description"
|
|
| "image.description";
|
|
|
|
export type MediaUnderstandingCapability = "image" | "audio" | "video";
|
|
|
|
export type MediaAttachment = {
|
|
path?: string;
|
|
url?: string;
|
|
mime?: string;
|
|
index: number;
|
|
};
|
|
|
|
export type MediaUnderstandingOutput = {
|
|
kind: MediaUnderstandingKind;
|
|
attachmentIndex: number;
|
|
text: string;
|
|
provider: string;
|
|
model?: string;
|
|
};
|
|
|
|
export type MediaUnderstandingDecisionOutcome =
|
|
| "success"
|
|
| "skipped"
|
|
| "disabled"
|
|
| "no-attachment"
|
|
| "scope-deny";
|
|
|
|
export type MediaUnderstandingModelDecision = {
|
|
provider?: string;
|
|
model?: string;
|
|
type: "provider" | "cli";
|
|
outcome: "success" | "skipped" | "failed";
|
|
reason?: string;
|
|
};
|
|
|
|
export type MediaUnderstandingAttachmentDecision = {
|
|
attachmentIndex: number;
|
|
attempts: MediaUnderstandingModelDecision[];
|
|
chosen?: MediaUnderstandingModelDecision;
|
|
};
|
|
|
|
export type MediaUnderstandingDecision = {
|
|
capability: MediaUnderstandingCapability;
|
|
outcome: MediaUnderstandingDecisionOutcome;
|
|
attachments: MediaUnderstandingAttachmentDecision[];
|
|
};
|
|
|
|
export type AudioTranscriptionRequest = {
|
|
buffer: Buffer;
|
|
fileName: string;
|
|
mime?: string;
|
|
apiKey: string;
|
|
baseUrl?: string;
|
|
headers?: Record<string, string>;
|
|
model?: string;
|
|
language?: string;
|
|
prompt?: string;
|
|
query?: Record<string, string | number | boolean>;
|
|
timeoutMs: number;
|
|
fetchFn?: typeof fetch;
|
|
};
|
|
|
|
export type AudioTranscriptionResult = {
|
|
text: string;
|
|
model?: string;
|
|
};
|
|
|
|
export type VideoDescriptionRequest = {
|
|
buffer: Buffer;
|
|
fileName: string;
|
|
mime?: string;
|
|
apiKey: string;
|
|
baseUrl?: string;
|
|
headers?: Record<string, string>;
|
|
model?: string;
|
|
prompt?: string;
|
|
timeoutMs: number;
|
|
fetchFn?: typeof fetch;
|
|
};
|
|
|
|
export type VideoDescriptionResult = {
|
|
text: string;
|
|
model?: string;
|
|
};
|
|
|
|
export type ImageDescriptionRequest = {
|
|
buffer: Buffer;
|
|
fileName: string;
|
|
mime?: string;
|
|
model: string;
|
|
provider: string;
|
|
prompt?: string;
|
|
maxTokens?: number;
|
|
timeoutMs: number;
|
|
profile?: string;
|
|
preferredProfile?: string;
|
|
agentDir: string;
|
|
cfg: import("../config/config.js").ClawdbotConfig;
|
|
};
|
|
|
|
export type ImageDescriptionResult = {
|
|
text: string;
|
|
model?: string;
|
|
};
|
|
|
|
export type MediaUnderstandingProvider = {
|
|
id: string;
|
|
capabilities?: MediaUnderstandingCapability[];
|
|
transcribeAudio?: (req: AudioTranscriptionRequest) => Promise<AudioTranscriptionResult>;
|
|
describeVideo?: (req: VideoDescriptionRequest) => Promise<VideoDescriptionResult>;
|
|
describeImage?: (req: ImageDescriptionRequest) => Promise<ImageDescriptionResult>;
|
|
};
|