Optimize the generation logic

This commit is contained in:
puke
2025-10-26 01:52:49 +08:00
committed by puke
parent 198094fe5f
commit f832424dab
17 changed files with 869 additions and 417 deletions

View File

@@ -43,23 +43,25 @@ class VideoGeneratorService:
async def __call__(
self,
# === Content Source (Choose ONE, mutually exclusive) ===
topic: Optional[str] = None,
content: Optional[str] = None,
# === Input ===
text: str,
# === Optional Title (works with any source) ===
# === Processing Mode ===
mode: Literal["generate", "fixed"] = "generate",
# === Optional Title ===
title: Optional[str] = None,
# === Basic Config ===
n_frames: int = 5,
n_scenes: int = 5, # Only used in generate mode; ignored in fixed mode
voice_id: str = "zh-CN-YunjianNeural",
output_path: Optional[str] = None,
# === LLM Parameters ===
min_narration_words: int = 20,
max_narration_words: int = 40,
min_image_prompt_words: int = 50,
max_image_prompt_words: int = 100,
min_narration_words: int = 5,
max_narration_words: int = 20,
min_image_prompt_words: int = 30,
max_image_prompt_words: int = 60,
# === Image Parameters ===
image_width: int = 1024,
@@ -85,26 +87,33 @@ class VideoGeneratorService:
progress_callback: Optional[Callable[[ProgressEvent], None]] = None,
) -> VideoGenerationResult:
"""
Generate short video from different content sources
Generate short video from text input
Args:
topic: Topic/theme (e.g., "如何提高学习效率")
content: User-provided content (any length)
text: Text input (required)
- For generate mode: topic/theme (e.g., "如何提高学习效率")
- For fixed mode: complete narration script (will be split into frames)
Note: Must provide exactly ONE of: topic or content
mode: Processing mode (default "generate")
- "generate": LLM generates narrations from topic/theme, creates n_scenes
- "fixed": Split existing script into frames, preserves original text
Note: In fixed mode, n_scenes is ignored (uses actual split count)
title: Video title (optional)
- If provided, use it as the video title
- If not provided, auto-generate based on source:
* topic → use topic text
* content → LLM extracts title from content
- If not provided:
* generate mode → use text as title
* fixed mode → LLM generates title from script
n_scenes: Number of storyboard scenes (default 5)
Only effective in generate mode; ignored in fixed mode
n_frames: Number of storyboard frames (default 5)
voice_id: TTS voice ID (default "zh-CN-YunjianNeural")
output_path: Output video path (auto-generated if None)
min_narration_words: Min narration length
max_narration_words: Max narration length
min_narration_words: Min narration length (generate mode only)
max_narration_words: Max narration length (generate mode only)
min_image_prompt_words: Min image prompt length
max_image_prompt_words: Max image prompt length
@@ -131,66 +140,53 @@ class VideoGeneratorService:
VideoGenerationResult with video path and metadata
Examples:
# Generate from topic
# Generate mode: LLM creates narrations from topic
>>> result = await reelforge.generate_video(
... topic="如何在信息爆炸时代保持深度思考",
... n_frames=5,
... text="如何在信息爆炸时代保持深度思考",
... mode="generate",
... n_scenes=5,
... bgm_path="default"
... )
# Generate from user content with auto-generated title
# Fixed mode: Use existing script (split by paragraphs)
>>> script = '''大家好,今天跟你分享三个学习技巧
...
... 第一个技巧是专注力训练每天冥想10分钟
...
... 第二个技巧是主动回忆,学完立即复述'''
>>> result = await reelforge.generate_video(
... content="昨天我读了一本书,讲的是...",
... n_frames=3
... text=script,
... mode="fixed",
... title="三个学习技巧"
... )
# Generate from user content with custom title
# Fixed mode: Use existing script (split by sentences)
>>> result = await reelforge.generate_video(
... content="买房子,第一应该看的是楼盘的整体环境...",
... title="买房风水指南",
... n_frames=5
... text="第一点是专注。第二点是复述。第三点是重复。",
... mode="fixed"
... )
>>> print(result.video_path)
"""
# ========== Step 0: Validate parameters (mutually exclusive) ==========
sources = [topic, content]
source_count = sum(x is not None for x in sources)
if source_count == 0:
raise ValueError(
"Must provide exactly ONE of: topic or content"
)
elif source_count > 1:
raise ValueError(
"Cannot provide multiple sources. Choose ONE of: topic or content"
)
# Determine source type
if topic:
source_type = "topic"
else: # content
source_type = "content"
# ========== Step 0: Process text and determine title ==========
logger.info(f"🚀 Starting video generation in '{mode}' mode")
logger.info(f" Text length: {len(text)} chars")
# Determine final title (priority: user-specified > auto-generated)
if title:
# User specified title, use it directly
final_title = title
logger.info(f"🚀 Starting video generation from {source_type} with title: '{title}'")
logger.info(f" Title: '{title}' (user-specified)")
else:
# Auto-generate title based on source
if source_type == "topic":
final_title = topic
logger.info(f"🚀 Starting video generation from topic: '{final_title}'")
else: # content
# Will generate title from content using LLM
logger.info(f"🚀 Starting video generation from content ({len(content)} chars)")
final_title = None # Will be generated later
# Generate title from content if needed (before creating output path)
if source_type == "content" and final_title is None:
self._report_progress(progress_callback, "generating_title", 0.01)
final_title = await self._generate_title_from_content(content)
logger.info(f"✅ Generated title: {final_title}")
# Auto-generate title based on mode
if mode == "generate":
# Use text as title (it's a topic/theme)
final_title = text[:20] if len(text) > 20 else text
logger.info(f" Title: '{final_title}' (from text)")
else: # fixed
# Generate title from script using LLM
self._report_progress(progress_callback, "generating_title", 0.01)
final_title = await self._generate_title_from_content(text)
logger.info(f" Title: '{final_title}' (LLM-generated)")
# Auto-generate output path if not provided
if output_path is None:
@@ -204,7 +200,7 @@ class VideoGeneratorService:
# Create storyboard config
config = StoryboardConfig(
n_storyboard=n_frames,
n_storyboard=n_scenes,
min_narration_words=min_narration_words,
max_narration_words=max_narration_words,
min_image_prompt_words=min_image_prompt_words,
@@ -230,24 +226,46 @@ class VideoGeneratorService:
self.core._current_storyboard = storyboard
try:
# ========== Step 1: Generate narrations ==========
self._report_progress(progress_callback, "generating_narrations", 0.05)
narrations = await self.core.narration_generator.generate_narrations(
config=config,
source_type=source_type,
content_metadata=None, # No metadata needed for topic/content
topic=topic if source_type == "topic" else None,
content=content if source_type == "content" else None
)
logger.info(f"✅ Generated {len(narrations)} narrations")
# ========== Step 1: Generate/Split narrations ==========
if mode == "generate":
# Generate narrations using LLM
self._report_progress(progress_callback, "generating_narrations", 0.05)
narrations = await self.core.narration_generator.generate_narrations(
config=config,
source_type="topic",
content_metadata=None,
topic=text,
content=None
)
logger.info(f"✅ Generated {len(narrations)} narrations")
else: # fixed
# Split fixed script using LLM (preserves original text)
self._report_progress(progress_callback, "splitting_script", 0.05)
narrations = await self._split_narration_script(text, config)
logger.info(f"✅ Split script into {len(narrations)} segments")
logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode")
# Step 2: Generate image prompts
self._report_progress(progress_callback, "generating_image_prompts", 0.15)
# Create progress callback wrapper for image prompt generation (15%-30% range)
def image_prompt_progress(completed: int, total: int, message: str):
# Map batch progress to 15%-30% range
batch_progress = completed / total if total > 0 else 0
overall_progress = 0.15 + (batch_progress * 0.15) # 15% -> 30%
self._report_progress(
progress_callback,
"generating_image_prompts",
overall_progress,
extra_info=message
)
image_prompts = await self.core.image_prompt_generator.generate_image_prompts(
narrations=narrations,
config=config,
image_style_preset=image_style_preset,
image_style_description=image_style_description
image_style_description=image_style_description,
progress_callback=image_prompt_progress
)
logger.info(f"✅ Generated {len(image_prompts)} image prompts")
@@ -370,6 +388,169 @@ class VideoGeneratorService:
else:
logger.debug(f"Progress: {progress*100:.0f}% - {event_type}")
def _parse_json(self, text: str) -> dict:
"""
Parse JSON from text, with fallback to extract JSON from markdown code blocks
Args:
text: Text containing JSON
Returns:
Parsed JSON dict
"""
import json
import re
# Try direct parsing first
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# Try to extract JSON from markdown code block
json_pattern = r'```(?:json)?\s*([\s\S]+?)\s*```'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(1))
except json.JSONDecodeError:
pass
# Try to find any JSON object in the text (flexible pattern for narrations)
json_pattern = r'\{[^{}]*"narrations"\s*:\s*\[[^\]]*\][^{}]*\}'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(0))
except json.JSONDecodeError:
pass
# If all fails, raise error
raise json.JSONDecodeError("No valid JSON found", text, 0)
async def _split_narration_script(self, script: str, config: StoryboardConfig) -> list[str]:
"""
Split user-provided narration script into segments (programmatic splitting).
Priority:
1. Split by major punctuation (newline, 。!?;)
2. If segment > max_len, split by comma ()
3. If still > max_len, keep original (no force split)
4. Merge segments < min_len with next segment
Args:
script: Fixed narration script
config: Storyboard configuration (for length guidelines)
Returns:
List of narration segments
"""
import re
min_len = config.min_narration_words
max_len = config.max_narration_words
logger.info(f"Splitting script (length: {len(script)} chars) with target: {min_len}-{max_len} chars")
# Step 1: Split by major punctuation (newline, period, exclamation, question mark, semicolon)
major_delimiters = r'[\n。]'
parts = re.split(f'({major_delimiters})', script)
# Reconstruct sentences (text only, remove trailing punctuation)
sentences = []
for i in range(0, len(parts)-1, 2):
text = parts[i].strip()
if text:
sentences.append(text)
# Handle last part if no delimiter
if len(parts) % 2 == 1 and parts[-1].strip():
sentences.append(parts[-1].strip())
logger.debug(f"After major split: {len(sentences)} sentences")
# Step 2: For segments > max_len, try splitting by comma
final_segments = []
for sentence in sentences:
sent_len = len(sentence)
# If within range or short, keep as is
if sent_len <= max_len:
final_segments.append(sentence)
continue
# Too long: try splitting by comma
comma_parts = re.split(r'()', sentence)
sub_segments = []
current = ""
for part in comma_parts:
if part == '':
continue
if not current:
current = part
elif len(current + part) <= max_len:
current += part
else:
# Current segment is ready
if current:
sub_segments.append(current.strip())
current = part
# Add last segment
if current:
sub_segments.append(current.strip())
# If comma splitting worked (resulted in multiple segments), use it
if sub_segments and len(sub_segments) > 1:
final_segments.extend(sub_segments)
else:
# Keep original sentence even if > max_len
logger.debug(f"Keeping long segment ({sent_len} chars): {sentence[:30]}...")
final_segments.append(sentence)
# Step 3: Merge segments that are too short
merged_segments = []
i = 0
while i < len(final_segments):
segment = final_segments[i]
# If too short and not the last one, try merging with next
if len(segment) < min_len and i < len(final_segments) - 1:
next_segment = final_segments[i + 1]
merged = segment + "" + next_segment
# If merged result is within max_len, use it
if len(merged) <= max_len:
merged_segments.append(merged)
i += 2 # Skip next segment
continue
# Otherwise keep as is
merged_segments.append(segment)
i += 1
# Clean up
result = [s.strip() for s in merged_segments if s.strip()]
# Log statistics
lengths = [len(s) for s in result]
logger.info(f"Script split into {len(result)} segments")
if lengths:
logger.info(f" Min: {min(lengths)} chars, Max: {max(lengths)} chars, Avg: {sum(lengths)//len(lengths)} chars")
in_range = sum(1 for l in lengths if min_len <= l <= max_len)
too_short = sum(1 for l in lengths if l < min_len)
too_long = sum(1 for l in lengths if l > max_len)
logger.info(f" In range ({min_len}-{max_len}): {in_range}/{len(result)} ({in_range*100//len(result)}%)")
if too_short:
logger.info(f" Too short (< {min_len}): {too_short}/{len(result)} ({too_short*100//len(result)}%)")
if too_long:
logger.info(f" Too long (> {max_len}): {too_long}/{len(result)} ({too_long*100//len(result)}%)")
return result
async def _generate_title_from_content(self, content: str) -> str:
"""
Generate a short, attractive title from user content using LLM
@@ -380,21 +561,10 @@ class VideoGeneratorService:
Returns:
Generated title (10 characters or less)
"""
# Take first 500 chars to avoid overly long prompts
content_preview = content[:500]
from reelforge.prompts import build_title_generation_prompt
prompt = f"""请为以下内容生成一个简短、有吸引力的标题10字以内
内容:
{content_preview}
要求:
1. 简短精炼10字以内
2. 准确概括核心内容
3. 有吸引力,适合作为视频标题
4. 只输出标题文本,不要其他内容
标题:"""
# Build prompt using template
prompt = build_title_generation_prompt(content, max_length=500)
# Call LLM to generate title
response = await self.core.llm(