feat: add Deepgram audio transcription

Co-authored-by: Safzan Pirani <safzanpirani@users.noreply.github.com>
2026-01-17 08:46:40 +00:00
parent 869ef0c5ba
commit e637bbdfb5
12 changed files with 303 additions and 2 deletions
--- a/src/media-understanding/providers/deepgram/audio.ts
+++ b/src/media-understanding/providers/deepgram/audio.ts
@@ -0,0 +1,64 @@
+import type { AudioTranscriptionRequest, AudioTranscriptionResult } from "../../types.js";
+import { fetchWithTimeout, normalizeBaseUrl, readErrorResponse } from "../shared.js";
+
+export const DEFAULT_DEEPGRAM_AUDIO_BASE_URL = "https://api.deepgram.com/v1";
+export const DEFAULT_DEEPGRAM_AUDIO_MODEL = "nova-3";
+
+function resolveModel(model?: string): string {
+  const trimmed = model?.trim();
+  return trimmed || DEFAULT_DEEPGRAM_AUDIO_MODEL;
+}
+
+type DeepgramTranscriptResponse = {
+  results?: {
+    channels?: Array<{
+      alternatives?: Array<{
+        transcript?: string;
+      }>;
+    }>;
+  };
+};
+
+export async function transcribeDeepgramAudio(
+  params: AudioTranscriptionRequest,
+): Promise<AudioTranscriptionResult> {
+  const fetchFn = params.fetchFn ?? fetch;
+  const baseUrl = normalizeBaseUrl(params.baseUrl, DEFAULT_DEEPGRAM_AUDIO_BASE_URL);
+  const model = resolveModel(params.model);
+
+  const url = new URL(`${baseUrl}/listen`);
+  url.searchParams.set("model", model);
+  if (params.language?.trim()) url.searchParams.set("language", params.language.trim());
+
+  const headers = new Headers(params.headers);
+  if (!headers.has("authorization")) {
+    headers.set("authorization", `Token ${params.apiKey}`);
+  }
+  if (!headers.has("content-type")) {
+    headers.set("content-type", params.mime ?? "application/octet-stream");
+  }
+
+  const res = await fetchWithTimeout(
+    url.toString(),
+    {
+      method: "POST",
+      headers,
+      body: params.buffer,
+    },
+    params.timeoutMs,
+    fetchFn,
+  );
+
+  if (!res.ok) {
+    const detail = await readErrorResponse(res);
+    const suffix = detail ? `: ${detail}` : "";
+    throw new Error(`Audio transcription failed (HTTP ${res.status})${suffix}`);
+  }
+
+  const payload = (await res.json()) as DeepgramTranscriptResponse;
+  const transcript = payload.results?.channels?.[0]?.alternatives?.[0]?.transcript?.trim();
+  if (!transcript) {
+    throw new Error("Audio transcription response missing transcript");
+  }
+  return { text: transcript, model };
+}