Files
AI-Video/reelforge/services/video_generator.py
2025-11-07 16:59:12 +08:00

607 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Video Generator Service
End-to-end service for generating short videos from content.
"""
from datetime import datetime
from pathlib import Path
from typing import Optional, Callable, Literal
from loguru import logger
from reelforge.models.progress import ProgressEvent
from reelforge.models.storyboard import (
Storyboard,
StoryboardFrame,
StoryboardConfig,
ContentMetadata,
VideoGenerationResult
)
class VideoGeneratorService:
"""
Video generation service
Orchestrates the complete pipeline:
1. Generate narrations (LLM)
2. Generate image prompts (LLM)
3. Process each frame (TTS + Image + Compose + Video)
4. Concatenate all segments
5. Add BGM (optional)
"""
def __init__(self, reelforge_core):
"""
Initialize video generator service
Args:
reelforge_core: ReelForgeCore instance
"""
self.core = reelforge_core
async def __call__(
self,
# === Input ===
text: str,
# === Processing Mode ===
mode: Literal["generate", "fixed"] = "generate",
# === Optional Title ===
title: Optional[str] = None,
# === Basic Config ===
n_scenes: int = 5, # Only used in generate mode; ignored in fixed mode
voice_id: str = "zh-CN-YunjianNeural",
output_path: Optional[str] = None,
# === LLM Parameters ===
min_narration_words: int = 5,
max_narration_words: int = 20,
min_image_prompt_words: int = 30,
max_image_prompt_words: int = 60,
# === Image Parameters ===
image_width: int = 1024,
image_height: int = 1024,
image_preset: Optional[str] = None,
# === Video Parameters ===
video_width: int = 1080,
video_height: int = 1920,
video_fps: int = 30,
# === Frame Template ===
frame_template: Optional[str] = None,
# === Image Style ===
prompt_prefix: Optional[str] = None,
# === BGM Parameters ===
bgm_path: Optional[str] = None,
bgm_volume: float = 0.2,
bgm_mode: Literal["once", "loop"] = "loop",
# === Advanced Options ===
content_metadata: Optional[ContentMetadata] = None,
progress_callback: Optional[Callable[[ProgressEvent], None]] = None,
) -> VideoGenerationResult:
"""
Generate short video from text input
Args:
text: Text input (required)
- For generate mode: topic/theme (e.g., "如何提高学习效率")
- For fixed mode: complete narration script (will be split into frames)
mode: Processing mode (default "generate")
- "generate": LLM generates narrations from topic/theme, creates n_scenes
- "fixed": Split existing script into frames, preserves original text
Note: In fixed mode, n_scenes is ignored (uses actual split count)
title: Video title (optional)
- If provided, use it as the video title
- If not provided:
* generate mode → use text as title
* fixed mode → LLM generates title from script
n_scenes: Number of storyboard scenes (default 5)
Only effective in generate mode; ignored in fixed mode
voice_id: TTS voice ID (default "zh-CN-YunjianNeural")
output_path: Output video path (auto-generated if None)
min_narration_words: Min narration length (generate mode only)
max_narration_words: Max narration length (generate mode only)
min_image_prompt_words: Min image prompt length
max_image_prompt_words: Max image prompt length
image_width: Generated image width (default 1024)
image_height: Generated image height (default 1024)
image_preset: Image workflow preset (e.g., "flux", "sdxl", None = use default)
video_width: Final video width (default 1080)
video_height: Final video height (default 1920)
video_fps: Video frame rate (default 30)
frame_template: HTML template filename or path (None = use PIL)
e.g., "default.html", "modern.html", "neon.html", or custom path
prompt_prefix: Image prompt prefix (overrides config.yaml if provided)
e.g., "anime style, vibrant colors" or "" for no prefix
bgm_path: BGM path (filename like "default.mp3", custom path, or None)
bgm_volume: BGM volume 0.0-1.0 (default 0.2)
bgm_mode: BGM mode "once" or "loop" (default "loop")
content_metadata: Content metadata (optional, for display)
progress_callback: Progress callback function(message, progress)
Returns:
VideoGenerationResult with video path and metadata
Examples:
# Generate mode: LLM creates narrations from topic
>>> result = await reelforge.generate_video(
... text="如何在信息爆炸时代保持深度思考",
... mode="generate",
... n_scenes=5,
... bgm_path="default"
... )
# Fixed mode: Use existing script (split by paragraphs)
>>> script = '''大家好,今天跟你分享三个学习技巧
...
... 第一个技巧是专注力训练每天冥想10分钟
...
... 第二个技巧是主动回忆,学完立即复述'''
>>> result = await reelforge.generate_video(
... text=script,
... mode="fixed",
... title="三个学习技巧"
... )
# Fixed mode: Use existing script (split by sentences)
>>> result = await reelforge.generate_video(
... text="第一点是专注。第二点是复述。第三点是重复。",
... mode="fixed"
... )
>>> print(result.video_path)
"""
# ========== Step 0: Process text and determine title ==========
logger.info(f"🚀 Starting video generation in '{mode}' mode")
logger.info(f" Text length: {len(text)} chars")
# Determine final title (priority: user-specified > auto-generated)
if title:
# User specified title, use it directly
final_title = title
logger.info(f" Title: '{title}' (user-specified)")
else:
# Auto-generate title based on mode
if mode == "generate":
# Use text as title (it's a topic/theme)
final_title = text[:20] if len(text) > 20 else text
logger.info(f" Title: '{final_title}' (from text)")
else: # fixed
# Generate title from script using LLM
self._report_progress(progress_callback, "generating_title", 0.01)
final_title = await self._generate_title_from_content(text)
logger.info(f" Title: '{final_title}' (LLM-generated)")
# Auto-generate output path if not provided
if output_path is None:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# Use first 10 chars of final_title for filename
safe_name = final_title[:10].replace('/', '_').replace(' ', '_')
output_path = f"output/{timestamp}_{safe_name}.mp4"
# Ensure output directory exists
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
# Create storyboard config
config = StoryboardConfig(
n_storyboard=n_scenes,
min_narration_words=min_narration_words,
max_narration_words=max_narration_words,
min_image_prompt_words=min_image_prompt_words,
max_image_prompt_words=max_image_prompt_words,
video_width=video_width,
video_height=video_height,
video_fps=video_fps,
voice_id=voice_id,
image_width=image_width,
image_height=image_height,
image_preset=image_preset,
frame_template=frame_template
)
# Create storyboard
storyboard = Storyboard(
topic=final_title, # Use final_title as video title
config=config,
content_metadata=content_metadata,
created_at=datetime.now()
)
# Store storyboard in core for access in storyboard processor
self.core._current_storyboard = storyboard
try:
# ========== Step 1: Generate/Split narrations ==========
if mode == "generate":
# Generate narrations using LLM
self._report_progress(progress_callback, "generating_narrations", 0.05)
narrations = await self.core.narration_generator.generate_narrations(
config=config,
source_type="topic",
content_metadata=None,
topic=text,
content=None
)
logger.info(f"✅ Generated {len(narrations)} narrations")
else: # fixed
# Split fixed script using LLM (preserves original text)
self._report_progress(progress_callback, "splitting_script", 0.05)
narrations = await self._split_narration_script(text, config)
logger.info(f"✅ Split script into {len(narrations)} segments")
logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode")
# Step 2: Generate image prompts
self._report_progress(progress_callback, "generating_image_prompts", 0.15)
# Override prompt_prefix if provided (temporarily modify config)
original_prefix = None
if prompt_prefix is not None:
image_config = self.core.config.get("image", {})
original_prefix = image_config.get("prompt_prefix")
image_config["prompt_prefix"] = prompt_prefix
logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
try:
# Create progress callback wrapper for image prompt generation (15%-30% range)
def image_prompt_progress(completed: int, total: int, message: str):
# Map batch progress to 15%-30% range
batch_progress = completed / total if total > 0 else 0
overall_progress = 0.15 + (batch_progress * 0.15) # 15% -> 30%
self._report_progress(
progress_callback,
"generating_image_prompts",
overall_progress,
extra_info=message
)
image_prompts = await self.core.image_prompt_generator.generate_image_prompts(
narrations=narrations,
config=config,
progress_callback=image_prompt_progress
)
finally:
# Restore original prompt_prefix
if original_prefix is not None:
image_config["prompt_prefix"] = original_prefix
logger.info(f"✅ Generated {len(image_prompts)} image prompts")
# Step 3: Create frames
for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
frame = StoryboardFrame(
index=i,
narration=narration,
image_prompt=image_prompt,
created_at=datetime.now()
)
storyboard.frames.append(frame)
# Step 4: Process each frame
for i, frame in enumerate(storyboard.frames):
# Calculate fine-grained progress for this frame
base_progress = 0.2 # Frames processing starts at 20%
frame_range = 0.6 # Frames processing takes 60% (20%-80%)
per_frame_progress = frame_range / len(storyboard.frames)
# Create frame-specific progress callback
def frame_progress_callback(event: ProgressEvent):
"""Report sub-step progress within current frame"""
# Calculate overall progress: base + previous frames + current frame progress
overall_progress = base_progress + (per_frame_progress * i) + (per_frame_progress * event.progress)
# Forward the event with adjusted overall progress
if progress_callback:
adjusted_event = ProgressEvent(
event_type=event.event_type,
progress=overall_progress,
frame_current=event.frame_current,
frame_total=event.frame_total,
step=event.step,
action=event.action
)
progress_callback(adjusted_event)
# Report frame start
self._report_progress(
progress_callback,
"processing_frame",
base_progress + (per_frame_progress * i),
frame_current=i+1,
frame_total=len(storyboard.frames)
)
processed_frame = await self.core.storyboard_processor.process_frame(
frame=frame,
config=config,
total_frames=len(storyboard.frames),
progress_callback=frame_progress_callback
)
storyboard.total_duration += processed_frame.duration
logger.info(f"✅ Frame {i+1} completed ({processed_frame.duration:.2f}s)")
# Step 5: Concatenate videos
self._report_progress(progress_callback, "concatenating", 0.85)
segment_paths = [frame.video_segment_path for frame in storyboard.frames]
from reelforge.services.video import VideoService
video_service = VideoService()
final_video_path = video_service.concat_videos(
videos=segment_paths,
output=output_path,
bgm_path=bgm_path,
bgm_volume=bgm_volume,
bgm_mode=bgm_mode
)
storyboard.final_video_path = final_video_path
storyboard.completed_at = datetime.now()
logger.success(f"🎬 Video generation completed: {final_video_path}")
# Step 6: Create result
self._report_progress(progress_callback, "finalizing", 1.0)
video_path_obj = Path(final_video_path)
file_size = video_path_obj.stat().st_size
result = VideoGenerationResult(
video_path=final_video_path,
storyboard=storyboard,
duration=storyboard.total_duration,
file_size=file_size
)
logger.info(f"✅ Generated video: {final_video_path}")
logger.info(f" Duration: {storyboard.total_duration:.2f}s")
logger.info(f" Size: {file_size / (1024*1024):.2f} MB")
logger.info(f" Frames: {len(storyboard.frames)}")
return result
except Exception as e:
logger.error(f"❌ Video generation failed: {e}")
raise
def _report_progress(
self,
callback: Optional[Callable[[ProgressEvent], None]],
event_type: str,
progress: float,
**kwargs
):
"""
Report progress via callback
Args:
callback: Progress callback function
event_type: Type of progress event
progress: Progress value (0.0-1.0)
**kwargs: Additional event-specific parameters (frame_current, frame_total, etc.)
"""
if callback:
event = ProgressEvent(event_type=event_type, progress=progress, **kwargs)
callback(event)
logger.debug(f"Progress: {progress*100:.0f}% - {event_type}")
else:
logger.debug(f"Progress: {progress*100:.0f}% - {event_type}")
def _parse_json(self, text: str) -> dict:
"""
Parse JSON from text, with fallback to extract JSON from markdown code blocks
Args:
text: Text containing JSON
Returns:
Parsed JSON dict
"""
import json
import re
# Try direct parsing first
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# Try to extract JSON from markdown code block
json_pattern = r'```(?:json)?\s*([\s\S]+?)\s*```'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(1))
except json.JSONDecodeError:
pass
# Try to find any JSON object in the text (flexible pattern for narrations)
json_pattern = r'\{[^{}]*"narrations"\s*:\s*\[[^\]]*\][^{}]*\}'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(0))
except json.JSONDecodeError:
pass
# If all fails, raise error
raise json.JSONDecodeError("No valid JSON found", text, 0)
async def _split_narration_script(self, script: str, config: StoryboardConfig) -> list[str]:
"""
Split user-provided narration script into segments (programmatic splitting).
Priority:
1. Split by major punctuation (newline, 。!?;)
2. If segment > max_len, split by comma ()
3. If still > max_len, keep original (no force split)
4. Merge segments < min_len with next segment
Args:
script: Fixed narration script
config: Storyboard configuration (for length guidelines)
Returns:
List of narration segments
"""
import re
min_len = config.min_narration_words
max_len = config.max_narration_words
logger.info(f"Splitting script (length: {len(script)} chars) with target: {min_len}-{max_len} chars")
# Step 1: Split by major punctuation (newline, period, exclamation, question mark, semicolon)
major_delimiters = r'[\n。]'
parts = re.split(f'({major_delimiters})', script)
# Reconstruct sentences (text only, remove trailing punctuation)
sentences = []
for i in range(0, len(parts)-1, 2):
text = parts[i].strip()
if text:
sentences.append(text)
# Handle last part if no delimiter
if len(parts) % 2 == 1 and parts[-1].strip():
sentences.append(parts[-1].strip())
logger.debug(f"After major split: {len(sentences)} sentences")
# Step 2: For segments > max_len, try splitting by comma
final_segments = []
for sentence in sentences:
sent_len = len(sentence)
# If within range or short, keep as is
if sent_len <= max_len:
final_segments.append(sentence)
continue
# Too long: try splitting by comma
comma_parts = re.split(r'()', sentence)
sub_segments = []
current = ""
for part in comma_parts:
if part == '':
continue
if not current:
current = part
elif len(current + part) <= max_len:
current += part
else:
# Current segment is ready
if current:
sub_segments.append(current.strip())
current = part
# Add last segment
if current:
sub_segments.append(current.strip())
# If comma splitting worked (resulted in multiple segments), use it
if sub_segments and len(sub_segments) > 1:
final_segments.extend(sub_segments)
else:
# Keep original sentence even if > max_len
logger.debug(f"Keeping long segment ({sent_len} chars): {sentence[:30]}...")
final_segments.append(sentence)
# Step 3: Merge segments that are too short
merged_segments = []
i = 0
while i < len(final_segments):
segment = final_segments[i]
# If too short and not the last one, try merging with next
if len(segment) < min_len and i < len(final_segments) - 1:
next_segment = final_segments[i + 1]
merged = segment + "" + next_segment
# If merged result is within max_len, use it
if len(merged) <= max_len:
merged_segments.append(merged)
i += 2 # Skip next segment
continue
# Otherwise keep as is
merged_segments.append(segment)
i += 1
# Clean up
result = [s.strip() for s in merged_segments if s.strip()]
# Log statistics
lengths = [len(s) for s in result]
logger.info(f"Script split into {len(result)} segments")
if lengths:
logger.info(f" Min: {min(lengths)} chars, Max: {max(lengths)} chars, Avg: {sum(lengths)//len(lengths)} chars")
in_range = sum(1 for l in lengths if min_len <= l <= max_len)
too_short = sum(1 for l in lengths if l < min_len)
too_long = sum(1 for l in lengths if l > max_len)
logger.info(f" In range ({min_len}-{max_len}): {in_range}/{len(result)} ({in_range*100//len(result)}%)")
if too_short:
logger.info(f" Too short (< {min_len}): {too_short}/{len(result)} ({too_short*100//len(result)}%)")
if too_long:
logger.info(f" Too long (> {max_len}): {too_long}/{len(result)} ({too_long*100//len(result)}%)")
return result
async def _generate_title_from_content(self, content: str) -> str:
"""
Generate a short, attractive title from user content using LLM
Args:
content: User-provided content
Returns:
Generated title (10 characters or less)
"""
from reelforge.prompts import build_title_generation_prompt
# Build prompt using template
prompt = build_title_generation_prompt(content, max_length=500)
# Call LLM to generate title
response = await self.core.llm(
prompt=prompt,
temperature=0.7,
max_tokens=50
)
# Clean up response
title = response.strip()
# Remove quotes if present
if title.startswith('"') and title.endswith('"'):
title = title[1:-1]
if title.startswith("'") and title.endswith("'"):
title = title[1:-1]
# Limit to 20 chars max (safety)
if len(title) > 20:
title = title[:20]
return title