feat: auto-enable audio understanding when keys exist

This commit is contained in:
Peter Steinberger
2026-01-18 14:49:11 +00:00
parent 173bce34b0
commit c69947dff8
4 changed files with 177 additions and 10 deletions

View File

@@ -39,6 +39,8 @@ import {
import { describeImageWithModel } from "./providers/image.js";
import { estimateBase64Size, resolveVideoMaxBase64Bytes } from "./video.js";
const AUTO_AUDIO_PROVIDERS = ["openai", "groq", "deepgram"] as const;
export type ActiveMediaModel = {
provider: string;
model?: string;
@@ -65,6 +67,29 @@ export function createMediaAttachmentCache(attachments: MediaAttachment[]): Medi
return new MediaAttachmentCache(attachments);
}
async function resolveAutoAudioEntries(params: {
cfg: ClawdbotConfig;
agentDir?: string;
providerRegistry: ProviderRegistry;
}): Promise<MediaUnderstandingModelConfig[]> {
const entries: MediaUnderstandingModelConfig[] = [];
for (const providerId of AUTO_AUDIO_PROVIDERS) {
const provider = getMediaUnderstandingProvider(providerId, params.providerRegistry);
if (!provider?.transcribeAudio) continue;
try {
await resolveApiKeyForProvider({
provider: providerId,
cfg: params.cfg,
agentDir: params.agentDir,
});
entries.push({ type: "provider", provider: providerId });
} catch {
continue;
}
}
return entries;
}
function trimOutput(text: string, maxChars?: number): string {
const trimmed = text.trim();
if (!maxChars || trimmed.length <= maxChars) return trimmed;
@@ -561,7 +586,15 @@ export async function runCapability(params: {
providerRegistry: params.providerRegistry,
activeModel: params.activeModel,
});
if (entries.length === 0) {
let resolvedEntries = entries;
if (resolvedEntries.length === 0 && capability === "audio" && config?.enabled !== false) {
resolvedEntries = await resolveAutoAudioEntries({
cfg,
agentDir: params.agentDir,
providerRegistry: params.providerRegistry,
});
}
if (resolvedEntries.length === 0) {
return {
outputs: [],
decision: {
@@ -583,7 +616,7 @@ export async function runCapability(params: {
agentDir: params.agentDir,
providerRegistry: params.providerRegistry,
cache: params.attachments,
entries,
entries: resolvedEntries,
config,
});
if (output) outputs.push(output);