import crypto from "node:crypto";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";

import type { ClawdbotConfig } from "../config/config.js";
import { logVerbose, shouldLogVerbose } from "../globals.js";
import { runExec } from "../process/exec.js";
import type { RuntimeEnv } from "../runtime.js";
import { applyTemplate, type MsgContext } from "./templating.js";

const AUDIO_TRANSCRIPTION_BINARY = "whisper";

export function isAudio(mediaType?: string | null) {
  return Boolean(mediaType?.startsWith("audio"));
}

export function hasAudioTranscriptionConfig(cfg: ClawdbotConfig): boolean {
  if (cfg.tools?.audio?.transcription?.args?.length) return true;
  return Boolean(cfg.audio?.transcription?.command?.length);
}

export async function transcribeInboundAudio(
  cfg: ClawdbotConfig,
  ctx: MsgContext,
  runtime: RuntimeEnv,
): Promise<{ text: string } | undefined> {
  const toolTranscriber = cfg.tools?.audio?.transcription;
  const legacyTranscriber = cfg.audio?.transcription;
  const hasToolTranscriber = Boolean(toolTranscriber?.args?.length);
  if (!hasToolTranscriber && !legacyTranscriber?.command?.length) {
    return undefined;
  }

  const timeoutMs = Math.max(
    (toolTranscriber?.timeoutSeconds ??
      legacyTranscriber?.timeoutSeconds ??
      45) * 1000,
    1_000,
  );
  let tmpPath: string | undefined;
  let mediaPath = ctx.MediaPath;
  try {
    if (!mediaPath && ctx.MediaUrl) {
      const res = await fetch(ctx.MediaUrl);
      if (!res.ok) throw new Error(`HTTP ${res.status}`);
      const arrayBuf = await res.arrayBuffer();
      const buffer = Buffer.from(arrayBuf);
      tmpPath = path.join(
        os.tmpdir(),
        `clawdbot-audio-${crypto.randomUUID()}.ogg`,
      );
      await fs.writeFile(tmpPath, buffer);
      mediaPath = tmpPath;
      if (shouldLogVerbose()) {
        logVerbose(
          `Downloaded audio for transcription (${(buffer.length / (1024 * 1024)).toFixed(2)}MB) -> ${tmpPath}`,
        );
      }
    }
    if (!mediaPath) return undefined;

    const templCtx: MsgContext = { ...ctx, MediaPath: mediaPath };
    const argv = hasToolTranscriber
      ? [AUDIO_TRANSCRIPTION_BINARY, ...(toolTranscriber?.args ?? [])].map(
          (part, index) => (index === 0 ? part : applyTemplate(part, templCtx)),
        )
      : (legacyTranscriber?.command ?? []).map((part) =>
          applyTemplate(part, templCtx),
        );
    if (shouldLogVerbose()) {
      logVerbose(`Transcribing audio via command: ${argv.join(" ")}`);
    }
    const { stdout } = await runExec(argv[0], argv.slice(1), {
      timeoutMs,
      maxBuffer: 5 * 1024 * 1024,
    });
    const text = stdout.trim();
    if (!text) return undefined;
    return { text };
  } catch (err) {
    runtime.error?.(`Audio transcription failed: ${String(err)}`);
    return undefined;
  } finally {
    if (tmpPath) {
      void fs.unlink(tmpPath).catch(() => {});
    }
  }
}