feat(skills): add media/transcription helpers
This commit is contained in:
42
skills/openai-whisper-api/SKILL.md
Normal file
42
skills/openai-whisper-api/SKILL.md
Normal file
@@ -0,0 +1,42 @@
|
||||
---
|
||||
name: openai-whisper-api
|
||||
description: Transcribe audio via OpenAI Audio Transcriptions API (Whisper).
|
||||
metadata: {"clawdis":{"requires":{"bins":["curl"],"env":["OPENAI_API_KEY"]},"primaryEnv":"OPENAI_API_KEY"}}
|
||||
---
|
||||
|
||||
# OpenAI Whisper API (curl)
|
||||
|
||||
Transcribe an audio file via OpenAI’s `/v1/audio/transcriptions` endpoint.
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a
|
||||
```
|
||||
|
||||
Defaults:
|
||||
- Model: `whisper-1`
|
||||
- Output: `<input>.txt`
|
||||
|
||||
## Useful flags
|
||||
|
||||
```bash
|
||||
{baseDir}/scripts/transcribe.sh /path/to/audio.ogg --model whisper-1 --out /tmp/transcript.txt
|
||||
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --language en
|
||||
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --prompt "Speaker names: Peter, Daniel"
|
||||
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --json --out /tmp/transcript.json
|
||||
```
|
||||
|
||||
## API key
|
||||
|
||||
Set `OPENAI_API_KEY`, or configure it in `~/.clawdis/clawdis.json`:
|
||||
|
||||
```json5
|
||||
{
|
||||
skills: {
|
||||
"openai-whisper-api": {
|
||||
apiKey: "OPENAI_KEY_HERE"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
85
skills/openai-whisper-api/scripts/transcribe.sh
Normal file
85
skills/openai-whisper-api/scripts/transcribe.sh
Normal file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat >&2 <<'EOF'
|
||||
Usage:
|
||||
transcribe.sh <audio-file> [--model whisper-1] [--out /path/to/out.txt] [--language en] [--prompt "hint"] [--json]
|
||||
EOF
|
||||
exit 2
|
||||
}
|
||||
|
||||
if [[ "${1:-}" == "" || "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
in="${1:-}"
|
||||
shift || true
|
||||
|
||||
model="whisper-1"
|
||||
out=""
|
||||
language=""
|
||||
prompt=""
|
||||
response_format="text"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--model)
|
||||
model="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--out)
|
||||
out="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--language)
|
||||
language="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--prompt)
|
||||
prompt="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--json)
|
||||
response_format="json"
|
||||
shift 1
|
||||
;;
|
||||
*)
|
||||
echo "Unknown arg: $1" >&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! -f "$in" ]]; then
|
||||
echo "File not found: $in" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${OPENAI_API_KEY:-}" == "" ]]; then
|
||||
echo "Missing OPENAI_API_KEY" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$out" == "" ]]; then
|
||||
base="${in%.*}"
|
||||
if [[ "$response_format" == "json" ]]; then
|
||||
out="${base}.json"
|
||||
else
|
||||
out="${base}.txt"
|
||||
fi
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$out")"
|
||||
|
||||
curl -sS https://api.openai.com/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer $OPENAI_API_KEY" \
|
||||
-H "Accept: application/json" \
|
||||
-F "file=@${in}" \
|
||||
-F "model=${model}" \
|
||||
-F "response_format=${response_format}" \
|
||||
${language:+-F "language=${language}"} \
|
||||
${prompt:+-F "prompt=${prompt}"} \
|
||||
>"$out"
|
||||
|
||||
echo "$out"
|
||||
Reference in New Issue
Block a user