diff --git a/infrastructure/external/ffmpeg/ffmpeg.go b/infrastructure/external/ffmpeg/ffmpeg.go index dca98b6..b5c450a 100644 --- a/infrastructure/external/ffmpeg/ffmpeg.go +++ b/infrastructure/external/ffmpeg/ffmpeg.go @@ -276,9 +276,46 @@ func (f *FFmpeg) mergeWithXfade(inputPaths []string, clips []VideoClip, outputPa args = append(args, "-i", path) } + // 检测每个视频是否有音频流 + audioStreams := make([]bool, len(inputPaths)) + hasAnyAudio := false + for i, path := range inputPaths { + audioStreams[i] = f.hasAudioStream(path) + if audioStreams[i] { + hasAnyAudio = true + } + f.log.Infow("Audio stream detection", "index", i, "path", path, "has_audio", audioStreams[i]) + } + f.log.Infow("Overall audio detection", "has_any_audio", hasAnyAudio, "audio_streams", audioStreams) + + // 检测视频分辨率,找到最大分辨率作为目标分辨率 + maxWidth := 0 + maxHeight := 0 + for i, path := range inputPaths { + width, height := f.getVideoResolution(path) + if width > maxWidth { + maxWidth = width + } + if height > maxHeight { + maxHeight = height + } + f.log.Infow("Video resolution detection", "index", i, "width", width, "height", height) + } + f.log.Infow("Target resolution", "width", maxWidth, "height", maxHeight) + + // 为每个视频流添加缩放滤镜,统一分辨率 + var scaleFilters []string + for i := 0; i < len(inputPaths); i++ { + // 使用scale滤镜缩放到目标分辨率,pad添加黑边保持长宽比 + scaleFilters = append(scaleFilters, + fmt.Sprintf("[%d:v]scale=%d:%d:force_original_aspect_ratio=decrease,pad=%d:%d:(ow-iw)/2:(oh-ih)/2[v%d]", + i, maxWidth, maxHeight, maxWidth, maxHeight, i)) + } + // 构建filter_complex // 例如: [0:v][1:v]xfade=transition=fade:duration=1:offset=5[v01];[v01][2:v]xfade=transition=fade:duration=1:offset=10[out] - var filterParts []string + // 构建转场滤镜,使用缩放后的视频流 + var transitionFilters []string var offset float64 = 0 for i := 0; i < len(inputPaths)-1; i++ { @@ -321,49 +358,99 @@ func (f *FFmpeg) mergeWithXfade(inputPaths []string, clips []VideoClip, outputPa var inputLabel, outputLabel string if i == 0 { - inputLabel = fmt.Sprintf("[0:v][1:v]") + inputLabel = fmt.Sprintf("[v0][v1]") } else { - inputLabel = fmt.Sprintf("[v%02d][%d:v]", i-1, i+1) + inputLabel = fmt.Sprintf("[vx%02d][v%d]", i-1, i+1) } if i == len(inputPaths)-2 { outputLabel = "[outv]" } else { - outputLabel = fmt.Sprintf("[v%02d]", i) + outputLabel = fmt.Sprintf("[vx%02d]", i) } filterPart := fmt.Sprintf("%sxfade=transition=%s:duration=%.1f:offset=%.1f%s", inputLabel, transitionType, transitionDuration, offset, outputLabel) - filterParts = append(filterParts, filterPart) + transitionFilters = append(transitionFilters, filterPart) } - filterComplex := strings.Join(filterParts, ";") + // 合并缩放和转场滤镜 + var videoFilters []string + videoFilters = append(videoFilters, scaleFilters...) + videoFilters = append(videoFilters, transitionFilters...) + filterComplex := strings.Join(videoFilters, ";") - // 音频处理:直接concat连接,不做交叉淡入淡出 - // 这样可以避免音频提前播放的问题 - var audioConcat strings.Builder - for i := 0; i < len(inputPaths); i++ { - audioConcat.WriteString(fmt.Sprintf("[%d:a]", i)) + // 音频处理:如果有任何视频包含音频流,则处理音频 + var fullFilter string + if hasAnyAudio { + // 为没有音频的视频生成静音轨道,确保所有输入音频流一致 + var silenceFilters []string + for i := 0; i < len(inputPaths); i++ { + if !audioStreams[i] { + // 计算该视频的时长 + clipDuration := clips[i].Duration + if clips[i].EndTime > 0 && clips[i].StartTime >= 0 { + clipDuration = clips[i].EndTime - clips[i].StartTime + } + // anullsrc是源滤镜,不接受输入,使用duration参数指定时长 + silenceFilters = append(silenceFilters, + fmt.Sprintf("anullsrc=channel_layout=stereo:sample_rate=44100:duration=%.2f[a%d]", clipDuration, i)) + } + } + + // 拼接所有音频流(包括生成的静音流) + var audioConcat strings.Builder + for i := 0; i < len(inputPaths); i++ { + if audioStreams[i] { + audioConcat.WriteString(fmt.Sprintf("[%d:a]", i)) + } else { + audioConcat.WriteString(fmt.Sprintf("[a%d]", i)) + } + } + audioConcat.WriteString(fmt.Sprintf("concat=n=%d:v=0:a=1[outa]", len(inputPaths))) + + // 构建完整滤镜:先生成静音流,再拼接音频 + if len(silenceFilters) > 0 { + fullFilter = filterComplex + ";" + strings.Join(silenceFilters, ";") + ";" + audioConcat.String() + } else { + fullFilter = filterComplex + ";" + audioConcat.String() + } + } else { + // 所有视频都无音频流,只处理视频 + fullFilter = filterComplex } - audioConcat.WriteString(fmt.Sprintf("concat=n=%d:v=0:a=1[outa]", len(inputPaths))) - - fullFilter := filterComplex + ";" + audioConcat.String() // 构建完整命令 args = append(args, "-filter_complex", fullFilter, "-map", "[outv]", - "-map", "[outa]", + ) + + // 仅在有任何音频时映射音频输出 + if hasAnyAudio { + args = append(args, "-map", "[outa]") + } + + args = append(args, "-c:v", "libx264", "-preset", "medium", "-crf", "23", - "-c:a", "aac", - "-b:a", "128k", + ) + + // 仅在有任何音频时设置音频编码参数 + if hasAnyAudio { + args = append(args, + "-c:a", "aac", + "-b:a", "128k", + ) + } + + args = append(args, "-y", outputPath, ) - f.log.Infow("Running FFmpeg with transitions", "filter", fullFilter) + f.log.Infow("Running FFmpeg with transitions", "filter", fullFilter, "has_any_audio", hasAnyAudio) cmd := exec.Command("ffmpeg", args...) output, err := cmd.CombinedOutput() @@ -439,6 +526,57 @@ func (f *FFmpeg) mapTransitionType(transType string) string { } } +func (f *FFmpeg) hasAudioStream(videoPath string) bool { + cmd := exec.Command("ffprobe", + "-v", "error", + "-select_streams", "a:0", + "-show_entries", "stream=codec_type", + "-of", "default=noprint_wrappers=1:nokey=1", + videoPath, + ) + + output, err := cmd.CombinedOutput() + if err != nil { + return false + } + + result := strings.TrimSpace(string(output)) + return result == "audio" +} + +func (f *FFmpeg) getVideoResolution(videoPath string) (int, int) { + cmd := exec.Command("ffprobe", + "-v", "error", + "-select_streams", "v:0", + "-show_entries", "stream=width,height", + "-of", "csv=p=0", + videoPath, + ) + + output, err := cmd.CombinedOutput() + if err != nil { + f.log.Warnw("Failed to get video resolution", "path", videoPath, "error", err) + return 1920, 1080 // 默认分辨率 + } + + result := strings.TrimSpace(string(output)) + parts := strings.Split(result, ",") + if len(parts) != 2 { + f.log.Warnw("Invalid resolution format", "output", result) + return 1920, 1080 + } + + var width, height int + fmt.Sscanf(parts[0], "%d", &width) + fmt.Sscanf(parts[1], "%d", &height) + + if width <= 0 || height <= 0 { + return 1920, 1080 + } + + return width, height +} + func (f *FFmpeg) copyFile(src, dst string) error { cmd := exec.Command("cp", src, dst) output, err := cmd.CombinedOutput()