From 97cfa0846c278eb0670e4ff578f1d818bddfb22e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 17 Jan 2026 03:53:56 +0000 Subject: [PATCH] chore: remove legacy transcription helpers --- src/auto-reply/transcription.test.ts | 67 ----------------------- src/auto-reply/transcription.ts | 82 ---------------------------- 2 files changed, 149 deletions(-) delete mode 100644 src/auto-reply/transcription.test.ts delete mode 100644 src/auto-reply/transcription.ts diff --git a/src/auto-reply/transcription.test.ts b/src/auto-reply/transcription.test.ts deleted file mode 100644 index bdd97abe4..000000000 --- a/src/auto-reply/transcription.test.ts +++ /dev/null @@ -1,67 +0,0 @@ -import fs from "node:fs/promises"; -import os from "node:os"; -import path from "node:path"; - -import { afterEach, describe, expect, it, vi } from "vitest"; - -vi.mock("../globals.js", () => ({ - isVerbose: () => false, - logVerbose: vi.fn(), - shouldLogVerbose: () => false, -})); - -vi.mock("../process/exec.js", () => ({ - runExec: vi.fn(), -})); - -const runtime = { - error: vi.fn(), -}; - -describe("transcribeInboundAudio", () => { - afterEach(() => { - vi.resetAllMocks(); - vi.unstubAllGlobals(); - }); - - it("downloads mediaUrl to temp file and returns transcript", async () => { - const tmpBuf = Buffer.from("audio-bytes"); - const tmpFile = path.join(os.tmpdir(), `clawdbot-audio-${Date.now()}.ogg`); - await fs.writeFile(tmpFile, tmpBuf); - - const fetchMock = vi.fn(async () => ({ - ok: true, - status: 200, - arrayBuffer: async () => tmpBuf, - })) as unknown as typeof fetch; - vi.stubGlobal("fetch", fetchMock); - - const cfg = { - tools: { - audio: { - transcription: { - args: ["echo", "{{MediaPath}}"], - timeoutSeconds: 5, - }, - }, - }, - }; - const ctx = { MediaUrl: "https://example.com/audio.ogg" }; - - const execModule = await import("../process/exec.js"); - vi.mocked(execModule.runExec).mockResolvedValue({ - stdout: "transcribed text\n", - stderr: "", - }); - const { transcribeInboundAudio } = await import("./transcription.js"); - const result = await transcribeInboundAudio(cfg as never, ctx as never, runtime as never); - expect(result?.text).toBe("transcribed text"); - expect(fetchMock).toHaveBeenCalled(); - }); - - it("returns undefined when no transcription command", async () => { - const { transcribeInboundAudio } = await import("./transcription.js"); - const res = await transcribeInboundAudio({ audio: {} } as never, {} as never, runtime as never); - expect(res).toBeUndefined(); - }); -}); diff --git a/src/auto-reply/transcription.ts b/src/auto-reply/transcription.ts deleted file mode 100644 index e63e79a68..000000000 --- a/src/auto-reply/transcription.ts +++ /dev/null @@ -1,82 +0,0 @@ -import crypto from "node:crypto"; -import fs from "node:fs/promises"; -import os from "node:os"; -import path from "node:path"; - -import type { ClawdbotConfig } from "../config/config.js"; -import { logVerbose, shouldLogVerbose } from "../globals.js"; -import { runExec } from "../process/exec.js"; -import type { RuntimeEnv } from "../runtime.js"; -import { applyTemplate, type MsgContext } from "./templating.js"; - -const AUDIO_TRANSCRIPTION_BINARY = "whisper"; - -export function isAudio(mediaType?: string | null) { - return Boolean(mediaType?.startsWith("audio")); -} - -export function hasAudioTranscriptionConfig(cfg: ClawdbotConfig): boolean { - if (cfg.tools?.audio?.transcription?.args?.length) return true; - return Boolean(cfg.audio?.transcription?.command?.length); -} - -export async function transcribeInboundAudio( - cfg: ClawdbotConfig, - ctx: MsgContext, - runtime: RuntimeEnv, -): Promise<{ text: string } | undefined> { - const toolTranscriber = cfg.tools?.audio?.transcription; - const legacyTranscriber = cfg.audio?.transcription; - const hasToolTranscriber = Boolean(toolTranscriber?.args?.length); - if (!hasToolTranscriber && !legacyTranscriber?.command?.length) { - return undefined; - } - - const timeoutMs = Math.max( - (toolTranscriber?.timeoutSeconds ?? legacyTranscriber?.timeoutSeconds ?? 45) * 1000, - 1_000, - ); - let tmpPath: string | undefined; - let mediaPath = ctx.MediaPath; - try { - if (!mediaPath && ctx.MediaUrl) { - const res = await fetch(ctx.MediaUrl); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const arrayBuf = await res.arrayBuffer(); - const buffer = Buffer.from(arrayBuf); - tmpPath = path.join(os.tmpdir(), `clawdbot-audio-${crypto.randomUUID()}.ogg`); - await fs.writeFile(tmpPath, buffer); - mediaPath = tmpPath; - if (shouldLogVerbose()) { - logVerbose( - `Downloaded audio for transcription (${(buffer.length / (1024 * 1024)).toFixed(2)}MB) -> ${tmpPath}`, - ); - } - } - if (!mediaPath) return undefined; - - const templCtx: MsgContext = { ...ctx, MediaPath: mediaPath }; - const argv = hasToolTranscriber - ? [AUDIO_TRANSCRIPTION_BINARY, ...(toolTranscriber?.args ?? [])].map((part, index) => - index === 0 ? part : applyTemplate(part, templCtx), - ) - : (legacyTranscriber?.command ?? []).map((part) => applyTemplate(part, templCtx)); - if (shouldLogVerbose()) { - logVerbose(`Transcribing audio via command: ${argv.join(" ")}`); - } - const { stdout } = await runExec(argv[0], argv.slice(1), { - timeoutMs, - maxBuffer: 5 * 1024 * 1024, - }); - const text = stdout.trim(); - if (!text) return undefined; - return { text }; - } catch (err) { - runtime.error?.(`Audio transcription failed: ${String(err)}`); - return undefined; - } finally { - if (tmpPath) { - void fs.unlink(tmpPath).catch(() => {}); - } - } -}