feat(skills): add media/transcription helpers

2025-12-20 12:53:09 +00:00
parent e0cd5650c5
commit e1a3bab7e5
10 changed files with 579 additions and 31 deletions
--- a/skills/openai-whisper-api/SKILL.md
+++ b/skills/openai-whisper-api/SKILL.md
@@ -0,0 +1,42 @@
+---
+name: openai-whisper-api
+description: Transcribe audio via OpenAI Audio Transcriptions API (Whisper).
+metadata: {"clawdis":{"requires":{"bins":["curl"],"env":["OPENAI_API_KEY"]},"primaryEnv":"OPENAI_API_KEY"}}
+---
+
+# OpenAI Whisper API (curl)
+
+Transcribe an audio file via OpenAI’s `/v1/audio/transcriptions` endpoint.
+
+## Quick start
+
+```bash
+{baseDir}/scripts/transcribe.sh /path/to/audio.m4a
+```
+
+Defaults:
+- Model: `whisper-1`
+- Output: `<input>.txt`
+
+## Useful flags
+
+```bash
+{baseDir}/scripts/transcribe.sh /path/to/audio.ogg --model whisper-1 --out /tmp/transcript.txt
+{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --language en
+{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --prompt "Speaker names: Peter, Daniel"
+{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --json --out /tmp/transcript.json
+```
+
+## API key
+
+Set `OPENAI_API_KEY`, or configure it in `~/.clawdis/clawdis.json`:
+
+```json5
+{
+  skills: {
+    "openai-whisper-api": {
+      apiKey: "OPENAI_KEY_HERE"
+    }
+  }
+}
+```
--- a/skills/openai-whisper-api/scripts/transcribe.sh
+++ b/skills/openai-whisper-api/scripts/transcribe.sh
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+usage() {
+  cat >&2 <<'EOF'
+Usage:
+  transcribe.sh <audio-file> [--model whisper-1] [--out /path/to/out.txt] [--language en] [--prompt "hint"] [--json]
+EOF
+  exit 2
+}
+
+if [[ "${1:-}" == "" || "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
+  usage
+fi
+
+in="${1:-}"
+shift || true
+
+model="whisper-1"
+out=""
+language=""
+prompt=""
+response_format="text"
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --model)
+      model="${2:-}"
+      shift 2
+      ;;
+    --out)
+      out="${2:-}"
+      shift 2
+      ;;
+    --language)
+      language="${2:-}"
+      shift 2
+      ;;
+    --prompt)
+      prompt="${2:-}"
+      shift 2
+      ;;
+    --json)
+      response_format="json"
+      shift 1
+      ;;
+    *)
+      echo "Unknown arg: $1" >&2
+      usage
+      ;;
+  esac
+done
+
+if [[ ! -f "$in" ]]; then
+  echo "File not found: $in" >&2
+  exit 1
+fi
+
+if [[ "${OPENAI_API_KEY:-}" == "" ]]; then
+  echo "Missing OPENAI_API_KEY" >&2
+  exit 1
+fi
+
+if [[ "$out" == "" ]]; then
+  base="${in%.*}"
+  if [[ "$response_format" == "json" ]]; then
+    out="${base}.json"
+  else
+    out="${base}.txt"
+  fi
+fi
+
+mkdir -p "$(dirname "$out")"
+
+curl -sS https://api.openai.com/v1/audio/transcriptions \
+  -H "Authorization: Bearer $OPENAI_API_KEY" \
+  -H "Accept: application/json" \
+  -F "file=@${in}" \
+  -F "model=${model}" \
+  -F "response_format=${response_format}" \
+  ${language:+-F "language=${language}"} \
+  ${prompt:+-F "prompt=${prompt}"} \
+  >"$out"
+
+echo "$out"