Files
huobao-drama/infrastructure/external/ffmpeg/ffmpeg.go
2026-01-15 09:44:09 +08:00

613 lines
16 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package ffmpeg
import (
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/drama-generator/backend/pkg/logger"
)
type FFmpeg struct {
log *logger.Logger
tempDir string
}
func NewFFmpeg(log *logger.Logger) *FFmpeg {
tempDir := filepath.Join(os.TempDir(), "drama-video-merge")
os.MkdirAll(tempDir, 0755)
return &FFmpeg{
log: log,
tempDir: tempDir,
}
}
type VideoClip struct {
URL string
Duration float64
StartTime float64
EndTime float64
Transition map[string]interface{}
}
type MergeOptions struct {
OutputPath string
Clips []VideoClip
}
func (f *FFmpeg) MergeVideos(opts *MergeOptions) (string, error) {
if len(opts.Clips) == 0 {
return "", fmt.Errorf("no video clips to merge")
}
f.log.Infow("Starting video merge with trimming", "clips_count", len(opts.Clips))
// 下载并裁剪所有视频片段
trimmedPaths := make([]string, 0, len(opts.Clips))
downloadedPaths := make([]string, 0, len(opts.Clips))
for i, clip := range opts.Clips {
// 下载原始视频
downloadPath := filepath.Join(f.tempDir, fmt.Sprintf("download_%d_%d.mp4", time.Now().Unix(), i))
localPath, err := f.downloadVideo(clip.URL, downloadPath)
if err != nil {
f.cleanup(downloadedPaths)
f.cleanup(trimmedPaths)
return "", fmt.Errorf("failed to download clip %d: %w", i, err)
}
downloadedPaths = append(downloadedPaths, localPath)
// 裁剪视频片段根据StartTime和EndTime
trimmedPath := filepath.Join(f.tempDir, fmt.Sprintf("trimmed_%d_%d.mp4", time.Now().Unix(), i))
err = f.trimVideo(localPath, trimmedPath, clip.StartTime, clip.EndTime)
if err != nil {
f.cleanup(downloadedPaths)
f.cleanup(trimmedPaths)
return "", fmt.Errorf("failed to trim clip %d: %w", i, err)
}
trimmedPaths = append(trimmedPaths, trimmedPath)
f.log.Infow("Clip trimmed",
"index", i,
"start", clip.StartTime,
"end", clip.EndTime,
"duration", clip.EndTime-clip.StartTime)
}
// 清理下载的原始文件
f.cleanup(downloadedPaths)
// 确保输出目录存在
outputDir := filepath.Dir(opts.OutputPath)
if err := os.MkdirAll(outputDir, 0755); err != nil {
f.cleanup(trimmedPaths)
return "", fmt.Errorf("failed to create output directory: %w", err)
}
// 合并裁剪后的视频片段(支持转场效果)
err := f.concatenateVideosWithTransitions(trimmedPaths, opts.Clips, opts.OutputPath)
// 清理裁剪后的临时文件
f.cleanup(trimmedPaths)
if err != nil {
return "", fmt.Errorf("failed to concatenate videos: %w", err)
}
f.log.Infow("Video merge completed", "output", opts.OutputPath)
return opts.OutputPath, nil
}
func (f *FFmpeg) downloadVideo(url, destPath string) (string, error) {
f.log.Infow("Downloading video", "url", url, "dest", destPath)
resp, err := http.Get(url)
if err != nil {
return "", fmt.Errorf("failed to download: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("bad status: %s", resp.Status)
}
out, err := os.Create(destPath)
if err != nil {
return "", fmt.Errorf("failed to create file: %w", err)
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
if err != nil {
return "", fmt.Errorf("failed to save file: %w", err)
}
return destPath, nil
}
func (f *FFmpeg) trimVideo(inputPath, outputPath string, startTime, endTime float64) error {
f.log.Infow("Trimming video",
"input", inputPath,
"output", outputPath,
"start", startTime,
"end", endTime)
// 如果startTime和endTime都为0或者endTime <= startTime复制整个视频
// 使用重新编码而非-c copy以确保输出文件完整性
if (startTime == 0 && endTime == 0) || endTime <= startTime {
f.log.Infow("No valid trim range, re-encoding entire video")
cmd := exec.Command("ffmpeg",
"-i", inputPath,
"-c:v", "libx264",
"-preset", "fast",
"-crf", "23",
"-c:a", "aac",
"-b:a", "128k",
"-movflags", "+faststart",
"-y",
outputPath,
)
output, err := cmd.CombinedOutput()
if err != nil {
f.log.Errorw("FFmpeg re-encode failed", "error", err, "output", string(output))
return fmt.Errorf("ffmpeg re-encode failed: %w, output: %s", err, string(output))
}
f.log.Infow("Video re-encoded successfully", "output", outputPath)
return nil
}
// 使用FFmpeg裁剪视频
// -ss: 开始时间(秒)
// -to/-t: 结束时间或持续时间
// 使用重新编码而非-c copy以确保输出文件完整性避免Windows环境下流信息丢失
var cmd *exec.Cmd
if endTime > 0 {
// 有明确的结束时间
cmd = exec.Command("ffmpeg",
"-i", inputPath,
"-ss", fmt.Sprintf("%.2f", startTime),
"-to", fmt.Sprintf("%.2f", endTime),
"-c:v", "libx264",
"-preset", "fast",
"-crf", "23",
"-c:a", "aac",
"-b:a", "128k",
"-movflags", "+faststart",
"-y",
outputPath,
)
} else {
// 只有开始时间,裁剪到视频末尾
cmd = exec.Command("ffmpeg",
"-i", inputPath,
"-ss", fmt.Sprintf("%.2f", startTime),
"-c:v", "libx264",
"-preset", "fast",
"-crf", "23",
"-c:a", "aac",
"-b:a", "128k",
"-movflags", "+faststart",
"-y",
outputPath,
)
}
output, err := cmd.CombinedOutput()
if err != nil {
f.log.Errorw("FFmpeg trim failed", "error", err, "output", string(output))
return fmt.Errorf("ffmpeg trim failed: %w, output: %s", err, string(output))
}
f.log.Infow("Video trimmed successfully", "output", outputPath)
return nil
}
func (f *FFmpeg) concatenateVideosWithTransitions(inputPaths []string, clips []VideoClip, outputPath string) error {
if len(inputPaths) == 0 {
return fmt.Errorf("no input paths")
}
// 如果只有一个视频,直接复制
if len(inputPaths) == 1 {
f.log.Infow("Only one clip, copying directly")
return f.copyFile(inputPaths[0], outputPath)
}
// 检查是否有转场效果
hasTransitions := false
for _, clip := range clips {
if clip.Transition != nil && len(clip.Transition) > 0 {
hasTransitions = true
break
}
}
// 如果没有转场效果,使用简单拼接
if !hasTransitions {
f.log.Infow("No transitions, using simple concatenation")
return f.concatenateVideos(inputPaths, outputPath)
}
// 使用xfade滤镜添加转场效果
f.log.Infow("Merging with transitions", "clips_count", len(inputPaths))
return f.mergeWithXfade(inputPaths, clips, outputPath)
}
func (f *FFmpeg) concatenateVideos(inputPaths []string, outputPath string) error {
// 创建文件列表
listFile := filepath.Join(f.tempDir, fmt.Sprintf("filelist_%d.txt", time.Now().Unix()))
defer os.Remove(listFile)
var content strings.Builder
for _, path := range inputPaths {
content.WriteString(fmt.Sprintf("file '%s'\n", path))
}
if err := os.WriteFile(listFile, []byte(content.String()), 0644); err != nil {
return fmt.Errorf("failed to create file list: %w", err)
}
// 使用FFmpeg合并视频
// -f concat: 使用concat demuxer
// -safe 0: 允许不安全的文件路径
// -i: 输入文件列表
// -c copy: 直接复制流,不重新编码(速度快)
cmd := exec.Command("ffmpeg",
"-f", "concat",
"-safe", "0",
"-i", listFile,
"-c", "copy",
"-y", // 覆盖输出文件
outputPath,
)
output, err := cmd.CombinedOutput()
if err != nil {
f.log.Errorw("FFmpeg failed", "error", err, "output", string(output))
return fmt.Errorf("ffmpeg execution failed: %w, output: %s", err, string(output))
}
f.log.Infow("FFmpeg concatenation completed", "output", outputPath)
return nil
}
func (f *FFmpeg) mergeWithXfade(inputPaths []string, clips []VideoClip, outputPath string) error {
// 使用xfade滤镜进行转场
// 构建输入参数
args := []string{}
for _, path := range inputPaths {
args = append(args, "-i", path)
}
// 检测每个视频是否有音频流
audioStreams := make([]bool, len(inputPaths))
hasAnyAudio := false
for i, path := range inputPaths {
audioStreams[i] = f.hasAudioStream(path)
if audioStreams[i] {
hasAnyAudio = true
}
f.log.Infow("Audio stream detection", "index", i, "path", path, "has_audio", audioStreams[i])
}
f.log.Infow("Overall audio detection", "has_any_audio", hasAnyAudio, "audio_streams", audioStreams)
// 检测视频分辨率,找到最大分辨率作为目标分辨率
maxWidth := 0
maxHeight := 0
for i, path := range inputPaths {
width, height := f.getVideoResolution(path)
if width > maxWidth {
maxWidth = width
}
if height > maxHeight {
maxHeight = height
}
f.log.Infow("Video resolution detection", "index", i, "width", width, "height", height)
}
f.log.Infow("Target resolution", "width", maxWidth, "height", maxHeight)
// 为每个视频流添加缩放滤镜,统一分辨率
var scaleFilters []string
for i := 0; i < len(inputPaths); i++ {
// 使用scale滤镜缩放到目标分辨率pad添加黑边保持长宽比
scaleFilters = append(scaleFilters,
fmt.Sprintf("[%d:v]scale=%d:%d:force_original_aspect_ratio=decrease,pad=%d:%d:(ow-iw)/2:(oh-ih)/2[v%d]",
i, maxWidth, maxHeight, maxWidth, maxHeight, i))
}
// 构建filter_complex
// 例如: [0:v][1:v]xfade=transition=fade:duration=1:offset=5[v01];[v01][2:v]xfade=transition=fade:duration=1:offset=10[out]
// 构建转场滤镜,使用缩放后的视频流
var transitionFilters []string
var offset float64 = 0
for i := 0; i < len(inputPaths)-1; i++ {
// 获取当前片段的时长
clipDuration := clips[i].Duration
if clips[i].EndTime > 0 && clips[i].StartTime >= 0 {
clipDuration = clips[i].EndTime - clips[i].StartTime
}
// 获取转场类型和时长
transitionType := "fade" // 默认淡入淡出
transitionDuration := 1.0 // 默认转场时长为1秒
if clips[i].Transition != nil {
// 读取转场类型
if tType, ok := clips[i].Transition["type"].(string); ok && tType != "" {
transitionType = f.mapTransitionType(tType)
f.log.Infow("Using transition type", "type", tType, "mapped", transitionType)
}
// 读取转场时长
if tDuration, ok := clips[i].Transition["duration"].(float64); ok && tDuration > 0 {
transitionDuration = tDuration
}
}
// 计算转场开始的时间点
// 转场在两个片段的交界处,从前一个片段结束前 transitionDuration/2 开始
// 这样转场效果会平均分布在两个片段的交界处
offset += clipDuration - (transitionDuration / 2)
if offset < 0 {
offset = 0
}
f.log.Infow("Transition settings",
"clip_index", i,
"type", transitionType,
"duration", transitionDuration,
"offset", offset,
"clip_duration", clipDuration)
var inputLabel, outputLabel string
if i == 0 {
inputLabel = fmt.Sprintf("[v0][v1]")
} else {
inputLabel = fmt.Sprintf("[vx%02d][v%d]", i-1, i+1)
}
if i == len(inputPaths)-2 {
outputLabel = "[outv]"
} else {
outputLabel = fmt.Sprintf("[vx%02d]", i)
}
filterPart := fmt.Sprintf("%sxfade=transition=%s:duration=%.1f:offset=%.1f%s",
inputLabel, transitionType, transitionDuration, offset, outputLabel)
transitionFilters = append(transitionFilters, filterPart)
}
// 合并缩放和转场滤镜
var videoFilters []string
videoFilters = append(videoFilters, scaleFilters...)
videoFilters = append(videoFilters, transitionFilters...)
filterComplex := strings.Join(videoFilters, ";")
// 音频处理:如果有任何视频包含音频流,则处理音频
var fullFilter string
if hasAnyAudio {
// 为没有音频的视频生成静音轨道,确保所有输入音频流一致
var silenceFilters []string
for i := 0; i < len(inputPaths); i++ {
if !audioStreams[i] {
// 计算该视频的时长
clipDuration := clips[i].Duration
if clips[i].EndTime > 0 && clips[i].StartTime >= 0 {
clipDuration = clips[i].EndTime - clips[i].StartTime
}
// anullsrc是源滤镜不接受输入使用duration参数指定时长
silenceFilters = append(silenceFilters,
fmt.Sprintf("anullsrc=channel_layout=stereo:sample_rate=44100:duration=%.2f[a%d]", clipDuration, i))
}
}
// 拼接所有音频流(包括生成的静音流)
var audioConcat strings.Builder
for i := 0; i < len(inputPaths); i++ {
if audioStreams[i] {
audioConcat.WriteString(fmt.Sprintf("[%d:a]", i))
} else {
audioConcat.WriteString(fmt.Sprintf("[a%d]", i))
}
}
audioConcat.WriteString(fmt.Sprintf("concat=n=%d:v=0:a=1[outa]", len(inputPaths)))
// 构建完整滤镜:先生成静音流,再拼接音频
if len(silenceFilters) > 0 {
fullFilter = filterComplex + ";" + strings.Join(silenceFilters, ";") + ";" + audioConcat.String()
} else {
fullFilter = filterComplex + ";" + audioConcat.String()
}
} else {
// 所有视频都无音频流,只处理视频
fullFilter = filterComplex
}
// 构建完整命令
args = append(args,
"-filter_complex", fullFilter,
"-map", "[outv]",
)
// 仅在有任何音频时映射音频输出
if hasAnyAudio {
args = append(args, "-map", "[outa]")
}
args = append(args,
"-c:v", "libx264",
"-preset", "medium",
"-crf", "23",
)
// 仅在有任何音频时设置音频编码参数
if hasAnyAudio {
args = append(args,
"-c:a", "aac",
"-b:a", "128k",
)
}
args = append(args,
"-y",
outputPath,
)
f.log.Infow("Running FFmpeg with transitions", "filter", fullFilter, "has_any_audio", hasAnyAudio)
cmd := exec.Command("ffmpeg", args...)
output, err := cmd.CombinedOutput()
if err != nil {
f.log.Errorw("FFmpeg xfade failed", "error", err, "output", string(output))
return fmt.Errorf("ffmpeg xfade failed: %w, output: %s", err, string(output))
}
f.log.Infow("Video merged with transitions successfully")
return nil
}
func (f *FFmpeg) mapTransitionType(transType string) string {
// 将前端传入的转场类型映射为FFmpeg xfade支持的类型
// FFmpeg xfade支持的完整转场列表: https://ffmpeg.org/ffmpeg-filters.html#xfade
switch strings.ToLower(transType) {
// 淡入淡出类
case "fade", "fadein", "fadeout":
return "fade"
case "fadeblack":
return "fadeblack"
case "fadewhite":
return "fadewhite"
case "fadegrays":
return "fadegrays"
// 滑动类
case "slideleft":
return "slideleft"
case "slideright":
return "slideright"
case "slideup":
return "slideup"
case "slidedown":
return "slidedown"
// 擦除类
case "wipeleft":
return "wipeleft"
case "wiperight":
return "wiperight"
case "wipeup":
return "wipeup"
case "wipedown":
return "wipedown"
// 圆形类
case "circleopen":
return "circleopen"
case "circleclose":
return "circleclose"
// 矩形打开/关闭类
case "horzopen":
return "horzopen"
case "horzclose":
return "horzclose"
case "vertopen":
return "vertopen"
case "vertclose":
return "vertclose"
// 其他特效
case "dissolve":
return "dissolve"
case "distance":
return "distance"
case "pixelize":
return "pixelize"
default:
return "fade" // 默认淡入淡出
}
}
func (f *FFmpeg) hasAudioStream(videoPath string) bool {
cmd := exec.Command("ffprobe",
"-v", "error",
"-select_streams", "a:0",
"-show_entries", "stream=codec_type",
"-of", "default=noprint_wrappers=1:nokey=1",
videoPath,
)
output, err := cmd.CombinedOutput()
if err != nil {
return false
}
result := strings.TrimSpace(string(output))
return result == "audio"
}
func (f *FFmpeg) getVideoResolution(videoPath string) (int, int) {
cmd := exec.Command("ffprobe",
"-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=width,height",
"-of", "csv=p=0",
videoPath,
)
output, err := cmd.CombinedOutput()
if err != nil {
f.log.Warnw("Failed to get video resolution", "path", videoPath, "error", err)
return 1920, 1080 // 默认分辨率
}
result := strings.TrimSpace(string(output))
parts := strings.Split(result, ",")
if len(parts) != 2 {
f.log.Warnw("Invalid resolution format", "output", result)
return 1920, 1080
}
var width, height int
fmt.Sscanf(parts[0], "%d", &width)
fmt.Sscanf(parts[1], "%d", &height)
if width <= 0 || height <= 0 {
return 1920, 1080
}
return width, height
}
func (f *FFmpeg) copyFile(src, dst string) error {
cmd := exec.Command("cp", src, dst)
output, err := cmd.CombinedOutput()
if err != nil {
f.log.Errorw("File copy failed", "error", err, "output", string(output))
return fmt.Errorf("copy failed: %w", err)
}
return nil
}
func (f *FFmpeg) cleanup(paths []string) {
for _, path := range paths {
if err := os.Remove(path); err != nil {
f.log.Warnw("Failed to cleanup file", "path", path, "error", err)
}
}
}
func (f *FFmpeg) CleanupTempDir() error {
return os.RemoveAll(f.tempDir)
}