分镜支持视频功能

This commit is contained in:
puke
2025-11-11 20:38:31 +08:00
parent cf9321feac
commit 0e2b6b17d0
17 changed files with 1225 additions and 321 deletions

View File

@@ -239,6 +239,51 @@ class VideoService:
logger.error(f"FFmpeg concat filter error: {error_msg}")
raise RuntimeError(f"Failed to concatenate videos: {error_msg}")
def _get_video_duration(self, video: str) -> float:
"""Get video duration in seconds"""
try:
probe = ffmpeg.probe(video)
duration = float(probe['format']['duration'])
return duration
except Exception as e:
logger.warning(f"Failed to get video duration: {e}")
return 0.0
def _get_audio_duration(self, audio: str) -> float:
"""Get audio duration in seconds"""
try:
probe = ffmpeg.probe(audio)
duration = float(probe['format']['duration'])
return duration
except Exception as e:
logger.warning(f"Failed to get audio duration: {e}, using estimate")
# Fallback: estimate based on file size (very rough)
import os
file_size = os.path.getsize(audio)
# Assume ~16kbps for MP3, so 2KB per second
estimated_duration = file_size / 2000
return max(1.0, estimated_duration) # At least 1 second
def has_audio_stream(self, video: str) -> bool:
"""
Check if video has audio stream
Args:
video: Video file path
Returns:
True if video has audio stream, False otherwise
"""
try:
probe = ffmpeg.probe(video)
audio_streams = [s for s in probe.get('streams', []) if s['codec_type'] == 'audio']
has_audio = len(audio_streams) > 0
logger.debug(f"Video {video} has_audio={has_audio}")
return has_audio
except Exception as e:
logger.warning(f"Failed to probe video audio streams: {e}, assuming no audio")
return False
def merge_audio_video(
self,
video: str,
@@ -247,9 +292,18 @@ class VideoService:
replace_audio: bool = True,
audio_volume: float = 1.0,
video_volume: float = 0.0,
pad_strategy: str = "freeze", # "freeze" (freeze last frame) or "black" (black screen)
) -> str:
"""
Merge audio with video
Merge audio with video, using the longer duration
The output video duration will be the maximum of video and audio duration.
If audio is longer than video, the video will be padded using the specified strategy.
Automatically handles videos with or without audio streams.
- If video has no audio: adds the audio track
- If video has audio and replace_audio=True: replaces with new audio
- If video has audio and replace_audio=False: mixes both audio tracks
Args:
video: Video file path
@@ -259,6 +313,9 @@ class VideoService:
audio_volume: Volume of the new audio (0.0 to 1.0+)
video_volume: Volume of original video audio (0.0 to 1.0+)
Only used when replace_audio=False
pad_strategy: Strategy to pad video if audio is longer
- "freeze": Freeze last frame (default)
- "black": Fill with black screen
Returns:
Path to the output video file
@@ -267,28 +324,110 @@ class VideoService:
RuntimeError: If FFmpeg execution fails
Note:
- When replace_audio=True, video's original audio is removed
- When replace_audio=False, original and new audio are mixed
- Audio is trimmed/extended to match video duration
- Uses the longer duration between video and audio
- When audio is longer, video is padded using pad_strategy
- When video is longer, audio is looped or extended
- Automatically detects if video has audio
- When video is silent, audio is added regardless of replace_audio
- When replace_audio=True and video has audio, original audio is removed
- When replace_audio=False and video has audio, original and new audio are mixed
"""
# Get durations of video and audio
video_duration = self._get_video_duration(video)
audio_duration = self._get_audio_duration(audio)
logger.info(f"Video duration: {video_duration:.2f}s, Audio duration: {audio_duration:.2f}s")
# Determine target duration (max of both)
target_duration = max(video_duration, audio_duration)
logger.info(f"Target output duration: {target_duration:.2f}s")
# Check if video has audio stream
video_has_audio = self.has_audio_stream(video)
# Prepare video stream (potentially with padding)
input_video = ffmpeg.input(video)
video_stream = input_video.video
# Pad video if audio is longer
if audio_duration > video_duration:
pad_duration = audio_duration - video_duration
logger.info(f"Audio is longer, padding video by {pad_duration:.2f}s using '{pad_strategy}' strategy")
if pad_strategy == "freeze":
# Freeze last frame: tpad filter
video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration)
else: # black
# Generate black frames for padding duration
from pixelle_video.utils.os_util import get_temp_path
import os
# Get video properties
probe = ffmpeg.probe(video)
video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
width = int(video_info['width'])
height = int(video_info['height'])
fps_str = video_info['r_frame_rate']
fps_num, fps_den = map(int, fps_str.split('/'))
fps = fps_num / fps_den if fps_den != 0 else 30
# Create black video for padding
black_video_path = get_temp_path(f"black_pad_{os.path.basename(output)}")
black_input = ffmpeg.input(
f'color=c=black:s={width}x{height}:r={fps}',
f='lavfi',
t=pad_duration
)
# Concatenate original video with black padding
video_stream = ffmpeg.concat(video_stream, black_input.video, v=1, a=0)
# Prepare audio stream
input_audio = ffmpeg.input(audio)
audio_stream = input_audio.audio.filter('volume', audio_volume)
if not video_has_audio:
logger.info(f"Video has no audio stream, adding audio track")
# Video is silent, just add the audio
try:
(
ffmpeg
.output(
video_stream,
audio_stream,
output,
vcodec='libx264', # Re-encode video if padded
acodec='aac',
audio_bitrate='192k',
t=target_duration # Trim to target duration
)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
)
logger.success(f"Audio added to silent video: {output}")
return output
except ffmpeg.Error as e:
error_msg = e.stderr.decode() if e.stderr else str(e)
logger.error(f"FFmpeg error adding audio to silent video: {error_msg}")
raise RuntimeError(f"Failed to add audio to video: {error_msg}")
# Video has audio, proceed with merging
logger.info(f"Merging audio with video (replace={replace_audio})")
try:
input_video = ffmpeg.input(video)
input_audio = ffmpeg.input(audio)
if replace_audio:
# Replace audio: use only new audio, ignore original
(
ffmpeg
.output(
input_video.video,
input_audio.audio.filter('volume', audio_volume),
video_stream,
audio_stream,
output,
vcodec='copy',
vcodec='libx264', # Re-encode video if padded
acodec='aac',
audio_bitrate='192k',
shortest=None
t=target_duration # Trim to target duration
)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
@@ -298,22 +437,23 @@ class VideoService:
mixed_audio = ffmpeg.filter(
[
input_video.audio.filter('volume', video_volume),
input_audio.audio.filter('volume', audio_volume)
audio_stream
],
'amix',
inputs=2,
duration='first'
duration='longest' # Use longest audio
)
(
ffmpeg
.output(
input_video.video,
video_stream,
mixed_audio,
output,
vcodec='copy',
vcodec='libx264', # Re-encode video if padded
acodec='aac',
audio_bitrate='192k'
audio_bitrate='192k',
t=target_duration # Trim to target duration
)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
@@ -326,6 +466,92 @@ class VideoService:
logger.error(f"FFmpeg merge error: {error_msg}")
raise RuntimeError(f"Failed to merge audio and video: {error_msg}")
def overlay_image_on_video(
self,
video: str,
overlay_image: str,
output: str,
scale_mode: str = "contain"
) -> str:
"""
Overlay a transparent image on top of video
Args:
video: Base video file path
overlay_image: Transparent overlay image path (e.g., rendered HTML with transparent background)
output: Output video file path
scale_mode: How to scale the base video to fit the overlay size
- "contain": Scale video to fit within overlay dimensions (letterbox/pillarbox)
- "cover": Scale video to cover overlay dimensions (may crop)
- "stretch": Stretch video to exact overlay dimensions
Returns:
Path to the output video file
Raises:
RuntimeError: If FFmpeg execution fails
Note:
- Overlay image should have transparent background
- Video is scaled to match overlay dimensions based on scale_mode
- Final video size matches overlay image size
- Video codec is re-encoded to support overlay
"""
logger.info(f"Overlaying image on video (scale_mode={scale_mode})")
try:
# Get overlay image dimensions
overlay_probe = ffmpeg.probe(overlay_image)
overlay_stream = next(s for s in overlay_probe['streams'] if s['codec_type'] == 'video')
overlay_width = int(overlay_stream['width'])
overlay_height = int(overlay_stream['height'])
logger.debug(f"Overlay dimensions: {overlay_width}x{overlay_height}")
input_video = ffmpeg.input(video)
input_overlay = ffmpeg.input(overlay_image)
# Scale video to fit overlay size using scale_mode
if scale_mode == "contain":
# Scale to fit (letterbox/pillarbox if aspect ratio differs)
# Use scale filter with force_original_aspect_ratio=decrease and pad to center
scaled_video = (
input_video
.filter('scale', overlay_width, overlay_height, force_original_aspect_ratio='decrease')
.filter('pad', overlay_width, overlay_height, '(ow-iw)/2', '(oh-ih)/2', color='black')
)
elif scale_mode == "cover":
# Scale to cover (crop if aspect ratio differs)
scaled_video = (
input_video
.filter('scale', overlay_width, overlay_height, force_original_aspect_ratio='increase')
.filter('crop', overlay_width, overlay_height)
)
else: # stretch
# Stretch to exact dimensions
scaled_video = input_video.filter('scale', overlay_width, overlay_height)
# Overlay the transparent image on top of the scaled video
output_stream = ffmpeg.overlay(scaled_video, input_overlay)
(
ffmpeg
.output(output_stream, output,
vcodec='libx264',
pix_fmt='yuv420p',
preset='medium',
crf=23)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
)
logger.success(f"Image overlaid on video: {output}")
return output
except ffmpeg.Error as e:
error_msg = e.stderr.decode() if e.stderr else str(e)
logger.error(f"FFmpeg overlay error: {error_msg}")
raise RuntimeError(f"Failed to overlay image on video: {error_msg}")
def create_video_from_image(
self,
image: str,