refactor: unify media provider options
This commit is contained in:
@@ -51,7 +51,9 @@ export type MediaUnderstandingModelConfig = {
|
||||
timeoutSeconds?: number;
|
||||
/** Optional language hint for audio transcription. */
|
||||
language?: string;
|
||||
/** Optional Deepgram transcription options (audio only). */
|
||||
/** Optional provider-specific query params (merged into requests). */
|
||||
providerOptions?: Record<string, Record<string, string | number | boolean>>;
|
||||
/** @deprecated Use providerOptions.deepgram instead. */
|
||||
deepgram?: {
|
||||
detectLanguage?: boolean;
|
||||
punctuate?: boolean;
|
||||
@@ -82,7 +84,9 @@ export type MediaUnderstandingConfig = {
|
||||
timeoutSeconds?: number;
|
||||
/** Default language hint (audio). */
|
||||
language?: string;
|
||||
/** Optional Deepgram transcription options (audio only). */
|
||||
/** Optional provider-specific query params (merged into requests). */
|
||||
providerOptions?: Record<string, Record<string, string | number | boolean>>;
|
||||
/** @deprecated Use providerOptions.deepgram instead. */
|
||||
deepgram?: {
|
||||
detectLanguage?: boolean;
|
||||
punctuate?: boolean;
|
||||
|
||||
@@ -284,6 +284,11 @@ const DeepgramAudioSchema = z
|
||||
})
|
||||
.optional();
|
||||
|
||||
const ProviderOptionValueSchema = z.union([z.string(), z.number(), z.boolean()]);
|
||||
const ProviderOptionsSchema = z
|
||||
.record(z.string(), z.record(z.string(), ProviderOptionValueSchema))
|
||||
.optional();
|
||||
|
||||
export const MediaUnderstandingModelSchema = z
|
||||
.object({
|
||||
provider: z.string().optional(),
|
||||
@@ -297,6 +302,7 @@ export const MediaUnderstandingModelSchema = z
|
||||
maxBytes: z.number().int().positive().optional(),
|
||||
timeoutSeconds: z.number().int().positive().optional(),
|
||||
language: z.string().optional(),
|
||||
providerOptions: ProviderOptionsSchema,
|
||||
deepgram: DeepgramAudioSchema,
|
||||
baseUrl: z.string().optional(),
|
||||
headers: z.record(z.string(), z.string()).optional(),
|
||||
@@ -314,6 +320,7 @@ export const ToolsMediaUnderstandingSchema = z
|
||||
prompt: z.string().optional(),
|
||||
timeoutSeconds: z.number().int().positive().optional(),
|
||||
language: z.string().optional(),
|
||||
providerOptions: ProviderOptionsSchema,
|
||||
deepgram: DeepgramAudioSchema,
|
||||
baseUrl: z.string().optional(),
|
||||
headers: z.record(z.string(), z.string()).optional(),
|
||||
|
||||
112
src/media-understanding/runner.deepgram.test.ts
Normal file
112
src/media-understanding/runner.deepgram.test.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import type { ClawdbotConfig } from "../config/config.js";
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import {
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
runCapability,
|
||||
} from "./runner.js";
|
||||
|
||||
describe("runCapability deepgram provider options", () => {
|
||||
it("merges provider options, headers, and baseUrl overrides", async () => {
|
||||
const tmpPath = path.join(os.tmpdir(), `clawdbot-deepgram-${Date.now()}.wav`);
|
||||
await fs.writeFile(tmpPath, Buffer.from("RIFF"));
|
||||
const ctx: MsgContext = { MediaPath: tmpPath, MediaType: "audio/wav" };
|
||||
const media = normalizeMediaAttachments(ctx);
|
||||
const cache = createMediaAttachmentCache(media);
|
||||
|
||||
let seenQuery: Record<string, string | number | boolean> | undefined;
|
||||
let seenBaseUrl: string | undefined;
|
||||
let seenHeaders: Record<string, string> | undefined;
|
||||
|
||||
const providerRegistry = buildProviderRegistry({
|
||||
deepgram: {
|
||||
id: "deepgram",
|
||||
capabilities: ["audio"],
|
||||
transcribeAudio: async (req) => {
|
||||
seenQuery = req.query;
|
||||
seenBaseUrl = req.baseUrl;
|
||||
seenHeaders = req.headers;
|
||||
return { text: "ok", model: req.model };
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
deepgram: {
|
||||
baseUrl: "https://provider.example",
|
||||
apiKey: "test-key",
|
||||
headers: { "X-Provider": "1" },
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
tools: {
|
||||
media: {
|
||||
audio: {
|
||||
enabled: true,
|
||||
baseUrl: "https://config.example",
|
||||
headers: { "X-Config": "2" },
|
||||
providerOptions: {
|
||||
deepgram: {
|
||||
detect_language: true,
|
||||
punctuate: true,
|
||||
},
|
||||
},
|
||||
deepgram: { smartFormat: true },
|
||||
models: [
|
||||
{
|
||||
provider: "deepgram",
|
||||
model: "nova-3",
|
||||
baseUrl: "https://entry.example",
|
||||
headers: { "X-Entry": "3" },
|
||||
providerOptions: {
|
||||
deepgram: {
|
||||
detectLanguage: false,
|
||||
punctuate: false,
|
||||
smart_format: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as ClawdbotConfig;
|
||||
|
||||
try {
|
||||
const result = await runCapability({
|
||||
capability: "audio",
|
||||
cfg,
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media,
|
||||
providerRegistry,
|
||||
});
|
||||
expect(result.outputs[0]?.text).toBe("ok");
|
||||
expect(seenBaseUrl).toBe("https://entry.example");
|
||||
expect(seenHeaders).toMatchObject({
|
||||
"X-Provider": "1",
|
||||
"X-Config": "2",
|
||||
"X-Entry": "3",
|
||||
});
|
||||
expect(seenQuery).toMatchObject({
|
||||
detect_language: false,
|
||||
punctuate: false,
|
||||
smart_format: true,
|
||||
});
|
||||
expect((seenQuery as Record<string, unknown>)["detectLanguage"]).toBeUndefined();
|
||||
} finally {
|
||||
await cache.cleanup();
|
||||
await fs.unlink(tmpPath).catch(() => {});
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -71,21 +71,73 @@ function trimOutput(text: string, maxChars?: number): string {
|
||||
return trimmed.slice(0, maxChars).trim();
|
||||
}
|
||||
|
||||
function buildDeepgramQuery(options?: {
|
||||
type ProviderQuery = Record<string, string | number | boolean>;
|
||||
|
||||
function normalizeProviderQuery(
|
||||
options?: Record<string, string | number | boolean>,
|
||||
): ProviderQuery | undefined {
|
||||
if (!options) return undefined;
|
||||
const query: ProviderQuery = {};
|
||||
for (const [key, value] of Object.entries(options)) {
|
||||
if (value === undefined) continue;
|
||||
query[key] = value;
|
||||
}
|
||||
return Object.keys(query).length > 0 ? query : undefined;
|
||||
}
|
||||
|
||||
function buildDeepgramCompatQuery(options?: {
|
||||
detectLanguage?: boolean;
|
||||
punctuate?: boolean;
|
||||
smartFormat?: boolean;
|
||||
}): Record<string, string | number | boolean> | undefined {
|
||||
}): ProviderQuery | undefined {
|
||||
if (!options) return undefined;
|
||||
const query: Record<string, string | number | boolean> = {};
|
||||
if (typeof options.detectLanguage === "boolean") {
|
||||
query.detect_language = options.detectLanguage;
|
||||
const query: ProviderQuery = {};
|
||||
if (typeof options.detectLanguage === "boolean") query.detect_language = options.detectLanguage;
|
||||
if (typeof options.punctuate === "boolean") query.punctuate = options.punctuate;
|
||||
if (typeof options.smartFormat === "boolean") query.smart_format = options.smartFormat;
|
||||
return Object.keys(query).length > 0 ? query : undefined;
|
||||
}
|
||||
|
||||
function mergeProviderQuery(
|
||||
base: ProviderQuery | undefined,
|
||||
incoming: ProviderQuery | undefined,
|
||||
): ProviderQuery | undefined {
|
||||
if (!base && !incoming) return undefined;
|
||||
return { ...(base ?? {}), ...(incoming ?? {}) };
|
||||
}
|
||||
|
||||
function normalizeDeepgramQueryKeys(query: ProviderQuery): ProviderQuery {
|
||||
const normalized = { ...query };
|
||||
if ("detectLanguage" in normalized) {
|
||||
normalized.detect_language = normalized.detectLanguage as boolean;
|
||||
delete normalized.detectLanguage;
|
||||
}
|
||||
if (typeof options.punctuate === "boolean") {
|
||||
query.punctuate = options.punctuate;
|
||||
if ("smartFormat" in normalized) {
|
||||
normalized.smart_format = normalized.smartFormat as boolean;
|
||||
delete normalized.smartFormat;
|
||||
}
|
||||
if (typeof options.smartFormat === "boolean") {
|
||||
query.smart_format = options.smartFormat;
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function resolveProviderQuery(params: {
|
||||
providerId: string;
|
||||
config?: MediaUnderstandingConfig;
|
||||
entry: MediaUnderstandingModelConfig;
|
||||
}): ProviderQuery | undefined {
|
||||
const { providerId, config, entry } = params;
|
||||
const mergedOptions = normalizeProviderQuery({
|
||||
...(config?.providerOptions?.[providerId] ?? {}),
|
||||
...(entry.providerOptions?.[providerId] ?? {}),
|
||||
});
|
||||
if (providerId !== "deepgram") {
|
||||
return mergedOptions;
|
||||
}
|
||||
let query = normalizeDeepgramQueryKeys(mergedOptions ?? {});
|
||||
const compat = buildDeepgramCompatQuery({ ...config?.deepgram, ...entry.deepgram });
|
||||
for (const [key, value] of Object.entries(compat ?? {})) {
|
||||
if (query[key] === undefined) {
|
||||
query[key] = value;
|
||||
}
|
||||
}
|
||||
return Object.keys(query).length > 0 ? query : undefined;
|
||||
}
|
||||
@@ -246,13 +298,11 @@ async function runProviderEntry(params: {
|
||||
...(entry.headers ?? {}),
|
||||
};
|
||||
const headers = Object.keys(mergedHeaders).length > 0 ? mergedHeaders : undefined;
|
||||
const deepgramQuery =
|
||||
providerId === "deepgram"
|
||||
? buildDeepgramQuery({
|
||||
...params.config?.deepgram,
|
||||
...entry.deepgram,
|
||||
})
|
||||
: undefined;
|
||||
const providerQuery = resolveProviderQuery({
|
||||
providerId,
|
||||
config: params.config,
|
||||
entry,
|
||||
});
|
||||
const model = entry.model?.trim() || DEFAULT_AUDIO_MODELS[providerId] || entry.model;
|
||||
const result = await provider.transcribeAudio({
|
||||
buffer: media.buffer,
|
||||
@@ -264,7 +314,7 @@ async function runProviderEntry(params: {
|
||||
model,
|
||||
language: entry.language ?? params.config?.language ?? cfg.tools?.media?.audio?.language,
|
||||
prompt,
|
||||
query: deepgramQuery,
|
||||
query: providerQuery,
|
||||
timeoutMs,
|
||||
});
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user