移除冗余的逻辑

This commit is contained in:
puke
2025-11-04 11:38:06 +08:00
parent 22c46cf2c5
commit 641efb81c6
7 changed files with 6 additions and 1066 deletions

View File

@@ -2,7 +2,7 @@
Standard Video Generation Pipeline
Standard workflow for generating short videos from topic or fixed script.
This is the default pipeline that replicates the original VideoGeneratorService logic.
This is the default pipeline for general-purpose video generation.
"""
from datetime import datetime

View File

@@ -13,9 +13,6 @@ from pixelle_video.services.llm_service import LLMService
from pixelle_video.services.tts_service import TTSService
from pixelle_video.services.image import ImageService
from pixelle_video.services.video import VideoService
from pixelle_video.services.narration_generator import NarrationGeneratorService
from pixelle_video.services.image_prompt_generator import ImagePromptGeneratorService
from pixelle_video.services.title_generator import TitleGeneratorService
from pixelle_video.services.frame_processor import FrameProcessor
from pixelle_video.pipelines.standard import StandardPipeline
from pixelle_video.pipelines.custom import CustomPipeline
@@ -70,13 +67,6 @@ class PixelleVideoCore:
self.tts: Optional[TTSService] = None
self.image: Optional[ImageService] = None
self.video: Optional[VideoService] = None
# Content generation services
self.narration_generator: Optional[NarrationGeneratorService] = None
self.image_prompt_generator: Optional[ImagePromptGeneratorService] = None
self.title_generator: Optional[TitleGeneratorService] = None
# Frame processing services
self.frame_processor: Optional[FrameProcessor] = None
# Video generation pipelines (dictionary of pipeline_name -> pipeline_instance)
@@ -105,23 +95,16 @@ class PixelleVideoCore:
self.tts = TTSService(self.config)
self.image = ImageService(self.config)
self.video = VideoService()
# 2. Initialize content generation services
self.narration_generator = NarrationGeneratorService(self)
self.image_prompt_generator = ImagePromptGeneratorService(self)
self.title_generator = TitleGeneratorService(self)
# 3. Initialize frame processing services
self.frame_processor = FrameProcessor(self)
# 4. Register video generation pipelines
# 2. Register video generation pipelines
self.pipelines = {
"standard": StandardPipeline(self),
"custom": CustomPipeline(self),
}
logger.info(f"📹 Registered pipelines: {', '.join(self.pipelines.keys())}")
# 5. Set default pipeline callable (for backward compatibility)
# 3. Set default pipeline callable (for backward compatibility)
self.generate_video = self._create_generate_video_wrapper()
self._initialized = True

View File

@@ -3,18 +3,13 @@ Pixelle-Video Services
Core services providing atomic capabilities.
Core Services (Active):
Services:
- LLMService: LLM text generation
- TTSService: Text-to-speech
- ImageService: Image generation
- VideoService: Video processing
Legacy Services (Kept for backward compatibility):
- NarrationGeneratorService: Use pipelines + utils.content_generators instead
- ImagePromptGeneratorService: Use pipelines + utils.content_generators instead
- TitleGeneratorService: Use pipelines + utils.content_generators instead
- FrameProcessor: Use pipelines instead
- VideoGeneratorService: Use pipelines.StandardPipeline instead
- FrameProcessor: Frame processing orchestrator
- ComfyBaseService: Base class for ComfyUI-based services
"""
from pixelle_video.services.comfy_base_service import ComfyBaseService
@@ -22,13 +17,7 @@ from pixelle_video.services.llm_service import LLMService
from pixelle_video.services.tts_service import TTSService
from pixelle_video.services.image import ImageService
from pixelle_video.services.video import VideoService
# Legacy services (kept for backward compatibility)
from pixelle_video.services.narration_generator import NarrationGeneratorService
from pixelle_video.services.image_prompt_generator import ImagePromptGeneratorService
from pixelle_video.services.title_generator import TitleGeneratorService
from pixelle_video.services.frame_processor import FrameProcessor
from pixelle_video.services.video_generator import VideoGeneratorService
__all__ = [
"ComfyBaseService",
@@ -36,11 +25,6 @@ __all__ = [
"TTSService",
"ImageService",
"VideoService",
# Legacy (backward compatibility)
"NarrationGeneratorService",
"ImagePromptGeneratorService",
"TitleGeneratorService",
"FrameProcessor",
"VideoGeneratorService",
]

View File

@@ -1,218 +0,0 @@
"""
Image prompt generation service
"""
import json
import re
from typing import List, Optional, Callable
from loguru import logger
from pixelle_video.models.storyboard import StoryboardConfig
from pixelle_video.prompts import build_image_prompt_prompt
class ImagePromptGeneratorService:
"""Image prompt generation service"""
def __init__(self, pixelle_video_core):
"""
Initialize
Args:
pixelle_video_core: PixelleVideoCore instance
"""
self.core = pixelle_video_core
async def generate_image_prompts(
self,
narrations: List[str],
config: StoryboardConfig,
batch_size: int = 10,
max_retries: int = 3,
progress_callback: Optional[Callable] = None
) -> List[str]:
"""
Generate image prompts based on narrations (with batching and retry)
Args:
narrations: List of narrations
config: Storyboard configuration
batch_size: Max narrations per batch (default: 10)
max_retries: Max retry attempts per batch (default: 3)
progress_callback: Optional callback(completed, total, message) for progress updates
Returns:
List of image prompts with prompt_prefix applied (from config)
Raises:
ValueError: If batch fails after max_retries
json.JSONDecodeError: If unable to parse JSON
"""
logger.info(f"Generating image prompts for {len(narrations)} narrations (batch_size={batch_size}, max_retries={max_retries})")
# Split narrations into batches
batches = [narrations[i:i + batch_size] for i in range(0, len(narrations), batch_size)]
logger.info(f"Split into {len(batches)} batches")
all_base_prompts = []
# Process each batch
for batch_idx, batch_narrations in enumerate(batches, 1):
logger.info(f"Processing batch {batch_idx}/{len(batches)} ({len(batch_narrations)} narrations)")
# Retry logic for this batch
for attempt in range(1, max_retries + 1):
try:
# Generate prompts for this batch
batch_prompts = await self._generate_batch_prompts(
batch_narrations,
config,
batch_idx,
attempt
)
# Validate count
if len(batch_prompts) != len(batch_narrations):
error_msg = (
f"Batch {batch_idx} prompt count mismatch (attempt {attempt}/{max_retries}):\n"
f" Expected: {len(batch_narrations)} prompts\n"
f" Got: {len(batch_prompts)} prompts\n"
f" Difference: {abs(len(batch_prompts) - len(batch_narrations))} "
f"{'missing' if len(batch_prompts) < len(batch_narrations) else 'extra'}"
)
logger.warning(error_msg)
if attempt < max_retries:
logger.info(f"Retrying batch {batch_idx}...")
continue
else:
logger.error(f"Batch {batch_idx} failed after {max_retries} attempts")
raise ValueError(error_msg)
# Success!
logger.info(f"✅ Batch {batch_idx} completed successfully ({len(batch_prompts)} prompts)")
all_base_prompts.extend(batch_prompts)
# Report progress
if progress_callback:
progress_callback(
len(all_base_prompts),
len(narrations),
f"Batch {batch_idx}/{len(batches)} completed"
)
break
except json.JSONDecodeError as e:
logger.error(f"Batch {batch_idx} JSON parse error (attempt {attempt}/{max_retries}): {e}")
if attempt >= max_retries:
raise
logger.info(f"Retrying batch {batch_idx}...")
base_prompts = all_base_prompts
logger.info(f"✅ All batches completed. Total prompts: {len(base_prompts)}")
# 5. Apply prompt prefix to each prompt
from pixelle_video.utils.prompt_helper import build_image_prompt
# Get prompt prefix from config (fix: correct path is comfyui.image.prompt_prefix)
image_config = self.core.config.get("comfyui", {}).get("image", {})
prompt_prefix = image_config.get("prompt_prefix", "")
# Apply prefix to each base prompt
final_prompts = []
for base_prompt in base_prompts:
final_prompt = build_image_prompt(base_prompt, prompt_prefix)
final_prompts.append(final_prompt)
logger.info(f"Generated {len(final_prompts)} final image prompts with prefix applied")
return final_prompts
async def _generate_batch_prompts(
self,
batch_narrations: List[str],
config: StoryboardConfig,
batch_idx: int,
attempt: int
) -> List[str]:
"""
Generate image prompts for a single batch of narrations
Args:
batch_narrations: Batch of narrations
config: Storyboard configuration
batch_idx: Batch index (for logging)
attempt: Attempt number (for logging)
Returns:
List of image prompts for this batch
Raises:
json.JSONDecodeError: If unable to parse JSON
KeyError: If response format is invalid
"""
logger.debug(f"Batch {batch_idx} attempt {attempt}: Generating prompts for {len(batch_narrations)} narrations")
# 1. Build prompt
prompt = build_image_prompt_prompt(
narrations=batch_narrations,
min_words=config.min_image_prompt_words,
max_words=config.max_image_prompt_words
)
# 2. Call LLM
response = await self.core.llm(
prompt=prompt,
temperature=0.7,
max_tokens=8192
)
logger.debug(f"Batch {batch_idx} attempt {attempt}: LLM response length: {len(response)} chars")
# 3. Parse JSON
result = self._parse_json(response)
if "image_prompts" not in result:
logger.error("Response missing 'image_prompts' key")
raise KeyError("Invalid response format: missing 'image_prompts'")
return result["image_prompts"]
def _parse_json(self, text: str) -> dict:
"""
Parse JSON from text, with fallback to extract JSON from markdown code blocks
Args:
text: Text containing JSON
Returns:
Parsed JSON dict
"""
# Try direct parsing first
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# Try to extract JSON from markdown code block
json_pattern = r'```(?:json)?\s*([\s\S]+?)\s*```'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(1))
except json.JSONDecodeError:
pass
# Try to find any JSON object in the text
json_pattern = r'\{[^{}]*"image_prompts"\s*:\s*\[[^\]]*\][^{}]*\}'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(0))
except json.JSONDecodeError:
pass
# If all fails, raise error
raise json.JSONDecodeError("No valid JSON found", text, 0)

View File

@@ -1,179 +0,0 @@
"""
Narration generation service
Supports two content sources:
1. Topic: Generate narrations from a topic/theme
2. Content: Extract/refine narrations from user-provided content
"""
import json
import re
from typing import List, Optional, Literal
from loguru import logger
from pixelle_video.models.storyboard import StoryboardConfig, ContentMetadata
from pixelle_video.prompts import (
build_topic_narration_prompt,
build_content_narration_prompt,
)
class NarrationGeneratorService:
"""Narration generation service"""
def __init__(self, pixelle_video_core):
"""
Initialize
Args:
pixelle_video_core: PixelleVideoCore instance (for calling llm)
"""
self.core = pixelle_video_core
async def generate_narrations(
self,
config: StoryboardConfig,
source_type: Literal["topic", "content"],
content_metadata: Optional[ContentMetadata] = None,
topic: Optional[str] = None,
content: Optional[str] = None,
) -> List[str]:
"""
Generate storyboard narrations from different sources
Args:
config: Storyboard configuration
source_type: Type of content source ("topic" or "content")
content_metadata: Content metadata (optional, not currently used)
topic: Topic/theme (required if source_type="topic")
content: User-provided content (required if source_type="content")
Returns:
List of narration texts
Raises:
ValueError: If parameters don't match source_type or narration count mismatch
json.JSONDecodeError: If unable to parse LLM response as JSON
Examples:
# Generate from topic
>>> narrations = await service.generate_narrations(
... config=config,
... source_type="topic",
... topic="如何提高学习效率"
... )
# Generate from user content
>>> narrations = await service.generate_narrations(
... config=config,
... source_type="content",
... content="Today I want to share three useful tips..."
... )
"""
# 1. Build prompt based on source_type
if source_type == "topic":
if topic is None:
raise ValueError("topic is required when source_type='topic'")
logger.info(f"Generating topic narrations for: {topic}")
prompt = build_topic_narration_prompt(
topic=topic,
n_storyboard=config.n_storyboard,
min_words=config.min_narration_words,
max_words=config.max_narration_words
)
else: # content
if content is None:
raise ValueError("content is required when source_type='content'")
logger.info(f"Generating narrations from user content ({len(content)} chars)")
prompt = build_content_narration_prompt(
content=content,
n_storyboard=config.n_storyboard,
min_words=config.min_narration_words,
max_words=config.max_narration_words
)
# 2. Call LLM (using self.core.llm)
response = await self.core.llm(
prompt=prompt,
temperature=0.8, # Higher temperature for more creativity
max_tokens=2000
)
logger.debug(f"LLM response: {response[:200]}...")
# 3. Parse JSON
try:
result = self._parse_json(response)
narrations = result["narrations"]
except json.JSONDecodeError as e:
logger.error(f"Failed to parse LLM response as JSON: {e}")
logger.error(f"Response: {response}")
raise
except KeyError:
logger.error("Response JSON missing 'narrations' key")
logger.error(f"Response: {response}")
raise ValueError("Invalid response format")
# 4. Validate count (take first N if got more)
if len(narrations) > config.n_storyboard:
logger.warning(
f"Got {len(narrations)} narrations, taking first {config.n_storyboard}"
)
narrations = narrations[:config.n_storyboard]
elif len(narrations) < config.n_storyboard:
raise ValueError(
f"Expected at least {config.n_storyboard} narrations, "
f"got only {len(narrations)}"
)
# 5. Validate word count for each narration
for i, text in enumerate(narrations):
word_count = len(text)
if word_count < config.min_narration_words:
logger.warning(
f"Narration {i} too short: {word_count} chars "
f"(min: {config.min_narration_words})"
)
logger.info(f"Generated {len(narrations)} narrations successfully")
return narrations
def _parse_json(self, text: str) -> dict:
"""
Parse JSON from text, with fallback to extract JSON from markdown code blocks
Args:
text: Text containing JSON
Returns:
Parsed JSON dict
"""
# Try direct parsing first
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# Try to extract JSON from markdown code block
json_pattern = r'```(?:json)?\s*([\s\S]+?)\s*```'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(1))
except json.JSONDecodeError:
pass
# Try to find any JSON object in the text
json_pattern = r'\{[^{}]*"narrations"\s*:\s*\[[^\]]*\][^{}]*\}'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(0))
except json.JSONDecodeError:
pass
# If all fails, raise error
raise json.JSONDecodeError("No valid JSON found", text, 0)

View File

@@ -1,138 +0,0 @@
"""
Title Generator Service
Service for generating video titles from content.
"""
from typing import Literal
from loguru import logger
# Title generation constants
AUTO_LENGTH_THRESHOLD = 15
MAX_TITLE_LENGTH = 15
class TitleGeneratorService:
"""
Title generation service
Generates video titles from content using different strategies:
- auto: Automatically decide based on content length
- direct: Use content directly as title
- llm: Always use LLM to generate title
"""
def __init__(self, pixelle_video_core):
"""
Initialize title generator service
Args:
pixelle_video_core: PixelleVideoCore instance
"""
self.core = pixelle_video_core
async def __call__(
self,
content: str,
strategy: Literal["auto", "direct", "llm"] = "auto",
max_length: int = MAX_TITLE_LENGTH
) -> str:
"""
Generate title from content
Args:
content: Source content (topic or script)
strategy: Generation strategy
- "auto": Auto-decide based on content length (default)
* If content <= AUTO_LENGTH_THRESHOLD chars: use directly
* If content > AUTO_LENGTH_THRESHOLD chars: use LLM
- "direct": Use content directly (truncated to max_length if needed)
- "llm": Always use LLM to generate title
max_length: Maximum title length (default: MAX_TITLE_LENGTH)
Returns:
Generated title
Examples:
# Auto strategy (default)
>>> title = await title_generator("AI技术") # Short, use directly
>>> # Returns: "AI技术"
>>> title = await title_generator("如何在信息爆炸时代保持深度思考") # Long, use LLM
>>> # Returns: "信息时代的深度思考" (LLM generated)
# Direct strategy
>>> title = await title_generator("Very long content...", strategy="direct")
>>> # Returns: "Very long content..." (truncated to max_length)
# LLM strategy
>>> title = await title_generator("AI", strategy="llm") # Force LLM even for short content
>>> # Returns: "人工智能技术" (LLM generated)
"""
if strategy == "direct":
return self._use_directly(content, max_length)
elif strategy == "llm":
return await self._generate_by_llm(content, max_length)
else: # auto
if len(content.strip()) <= AUTO_LENGTH_THRESHOLD:
return content.strip()
return await self._generate_by_llm(content, max_length)
def _use_directly(self, content: str, max_length: int) -> str:
"""
Use content directly as title (with truncation if needed)
Args:
content: Source content
max_length: Maximum title length
Returns:
Truncated or original content
"""
content = content.strip()
if len(content) <= max_length:
return content
return content[:max_length]
async def _generate_by_llm(self, content: str, max_length: int) -> str:
"""
Generate title using LLM
Args:
content: Source content (topic or script)
max_length: Maximum title length
Returns:
LLM-generated title
"""
from pixelle_video.prompts import build_title_generation_prompt
# Build prompt using template
prompt = build_title_generation_prompt(content, max_length=500)
# Call LLM to generate title
response = await self.core.llm(
prompt=prompt,
temperature=0.7,
max_tokens=50
)
# Clean up response
title = response.strip()
# Remove quotes if present
if title.startswith('"') and title.endswith('"'):
title = title[1:-1]
if title.startswith("'") and title.endswith("'"):
title = title[1:-1]
# Limit to max_length (safety)
if len(title) > max_length:
title = title[:max_length]
logger.debug(f"Generated title: '{title}' (length: {len(title)})")
return title

View File

@@ -1,492 +0,0 @@
"""
Video Generator Service
End-to-end service for generating short videos from content.
"""
from datetime import datetime
from pathlib import Path
from typing import Optional, Callable, Literal
from loguru import logger
from pixelle_video.models.progress import ProgressEvent
from pixelle_video.models.storyboard import (
Storyboard,
StoryboardFrame,
StoryboardConfig,
ContentMetadata,
VideoGenerationResult
)
class VideoGeneratorService:
"""
Video generation service
Orchestrates the complete pipeline:
1. Generate narrations (LLM)
2. Generate image prompts (LLM)
3. Process each frame (TTS + Image + Compose + Video)
4. Concatenate all segments
5. Add BGM (optional)
"""
def __init__(self, pixelle_video_core):
"""
Initialize video generator service
Args:
pixelle_video_core: PixelleVideoCore instance
"""
self.core = pixelle_video_core
async def __call__(
self,
# === Input ===
text: str,
# === Processing Mode ===
mode: Literal["generate", "fixed"] = "generate",
# === Optional Title ===
title: Optional[str] = None,
# === Basic Config ===
n_scenes: int = 5, # Only used in generate mode; ignored in fixed mode
voice_id: str = "[Chinese] zh-CN Yunjian",
tts_workflow: Optional[str] = None,
tts_speed: float = 1.2,
ref_audio: Optional[str] = None, # Reference audio for voice cloning
output_path: Optional[str] = None,
# === LLM Parameters ===
min_narration_words: int = 5,
max_narration_words: int = 20,
min_image_prompt_words: int = 30,
max_image_prompt_words: int = 60,
# === Image Parameters ===
image_width: int = 1024,
image_height: int = 1024,
image_workflow: Optional[str] = None,
# === Video Parameters ===
video_fps: int = 30,
# === Frame Template (determines video size) ===
frame_template: Optional[str] = None,
# === Image Style ===
prompt_prefix: Optional[str] = None,
# === BGM Parameters ===
bgm_path: Optional[str] = None,
bgm_volume: float = 0.2,
bgm_mode: Literal["once", "loop"] = "loop",
# === Advanced Options ===
content_metadata: Optional[ContentMetadata] = None,
progress_callback: Optional[Callable[[ProgressEvent], None]] = None,
) -> VideoGenerationResult:
"""
Generate short video from text input
Args:
text: Text input (required)
- For generate mode: topic/theme (e.g., "如何提高学习效率")
- For fixed mode: complete narration script (each line is a narration)
mode: Processing mode (default "generate")
- "generate": LLM generates narrations from topic/theme, creates n_scenes
- "fixed": Use existing script as-is, each line becomes a narration
Note: In fixed mode, n_scenes is ignored (uses actual line count)
title: Video title (optional)
- If provided, use it as the video title
- If not provided:
* generate mode → use text as title
* fixed mode → LLM generates title from script
n_scenes: Number of storyboard scenes (default 5)
Only effective in generate mode; ignored in fixed mode
voice_id: TTS voice ID (default "[Chinese] zh-CN Yunjian")
tts_workflow: TTS workflow filename (e.g., "tts_edge.json", None = use default)
tts_speed: TTS speed multiplier (1.0 = normal, 1.2 = 20% faster, default 1.2)
output_path: Output video path (auto-generated if None)
min_narration_words: Min narration length (generate mode only)
max_narration_words: Max narration length (generate mode only)
min_image_prompt_words: Min image prompt length
max_image_prompt_words: Max image prompt length
image_width: Generated image width (default 1024)
image_height: Generated image height (default 1024)
image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)
video_fps: Video frame rate (default 30)
frame_template: HTML template path with size (None = use default "1080x1920/default.html")
Format: "SIZExSIZE/template.html" (e.g., "1080x1920/default.html", "1920x1080/modern.html")
Video size is automatically determined from template path
prompt_prefix: Image prompt prefix (overrides config.yaml if provided)
e.g., "anime style, vibrant colors" or "" for no prefix
bgm_path: BGM path (filename like "default.mp3", custom path, or None)
bgm_volume: BGM volume 0.0-1.0 (default 0.2)
bgm_mode: BGM mode "once" or "loop" (default "loop")
content_metadata: Content metadata (optional, for display)
progress_callback: Progress callback function(message, progress)
Returns:
VideoGenerationResult with video path and metadata
Examples:
# Generate mode: LLM creates narrations from topic
>>> result = await pixelle_video.generate_video(
... text="如何在信息爆炸时代保持深度思考",
... mode="generate",
... n_scenes=5,
... bgm_path="default"
... )
# Fixed mode: Use existing script (each line is a narration)
>>> script = '''大家好,今天跟你分享三个学习技巧
... 第一个技巧是专注力训练每天冥想10分钟
... 第二个技巧是主动回忆,学完立即复述
... 第三个技巧是间隔重复,学习后定期复习'''
>>> result = await pixelle_video.generate_video(
... text=script,
... mode="fixed",
... title="三个学习技巧"
... )
>>> print(result.video_path)
"""
# ========== Step 0: Process text and determine title ==========
logger.info(f"🚀 Starting video generation in '{mode}' mode")
logger.info(f" Text length: {len(text)} chars")
# Determine final title (priority: user-specified > auto-generated)
if title:
# User specified title, use it directly
final_title = title
logger.info(f" Title: '{title}' (user-specified)")
else:
# Auto-generate title using title_generator service
self._report_progress(progress_callback, "generating_title", 0.01)
if mode == "generate":
# Auto strategy: decide based on content length
final_title = await self.core.title_generator(text, strategy="auto")
logger.info(f" Title: '{final_title}' (auto-generated)")
else: # fixed
# Force LLM strategy: always use LLM for script
final_title = await self.core.title_generator(text, strategy="llm")
logger.info(f" Title: '{final_title}' (LLM-generated)")
# ========== Step 0.5: Create isolated task directory ==========
from pixelle_video.utils.os_util import (
create_task_output_dir,
get_task_final_video_path
)
# Create isolated task directory for this video generation
task_dir, task_id = create_task_output_dir()
logger.info(f"📁 Task directory created: {task_dir}")
logger.info(f" Task ID: {task_id}")
# Determine final video path
user_specified_output = None
if output_path is None:
# Use standardized path: output/{task_id}/final.mp4
output_path = get_task_final_video_path(task_id)
else:
# User specified custom path: save it and use task path for generation
user_specified_output = output_path
output_path = get_task_final_video_path(task_id)
logger.info(f" Will copy final video to: {user_specified_output}")
# Create storyboard config
config = StoryboardConfig(
task_id=task_id, # Pass task_id for file isolation
n_storyboard=n_scenes,
min_narration_words=min_narration_words,
max_narration_words=max_narration_words,
min_image_prompt_words=min_image_prompt_words,
max_image_prompt_words=max_image_prompt_words,
video_fps=video_fps,
voice_id=voice_id,
tts_workflow=tts_workflow,
tts_speed=tts_speed,
ref_audio=ref_audio,
image_width=image_width,
image_height=image_height,
image_workflow=image_workflow,
frame_template=frame_template or "1080x1920/default.html"
)
# Create storyboard
storyboard = Storyboard(
title=final_title, # Use final_title as video title
config=config,
content_metadata=content_metadata,
created_at=datetime.now()
)
try:
# ========== Step 1: Generate/Split narrations ==========
if mode == "generate":
# Generate narrations using LLM
self._report_progress(progress_callback, "generating_narrations", 0.05)
narrations = await self.core.narration_generator.generate_narrations(
config=config,
source_type="topic",
content_metadata=None,
topic=text,
content=None
)
logger.info(f"✅ Generated {len(narrations)} narrations")
else: # fixed
# Split fixed script by lines (trust user input completely)
self._report_progress(progress_callback, "splitting_script", 0.05)
narrations = await self._split_narration_script(text, config)
logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode")
# Step 2: Generate image prompts
self._report_progress(progress_callback, "generating_image_prompts", 0.15)
# Override prompt_prefix if provided (temporarily modify config)
original_prefix = None
if prompt_prefix is not None:
# Fix: image config is under comfyui.image, not directly under config
image_config = self.core.config.get("comfyui", {}).get("image", {})
original_prefix = image_config.get("prompt_prefix")
image_config["prompt_prefix"] = prompt_prefix
logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
try:
# Create progress callback wrapper for image prompt generation (15%-30% range)
def image_prompt_progress(completed: int, total: int, message: str):
# Map batch progress to 15%-30% range
batch_progress = completed / total if total > 0 else 0
overall_progress = 0.15 + (batch_progress * 0.15) # 15% -> 30%
self._report_progress(
progress_callback,
"generating_image_prompts",
overall_progress,
extra_info=message
)
image_prompts = await self.core.image_prompt_generator.generate_image_prompts(
narrations=narrations,
config=config,
progress_callback=image_prompt_progress
)
finally:
# Restore original prompt_prefix
if original_prefix is not None:
image_config["prompt_prefix"] = original_prefix
logger.info(f"✅ Generated {len(image_prompts)} image prompts")
# Step 3: Create frames
for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
frame = StoryboardFrame(
index=i,
narration=narration,
image_prompt=image_prompt,
created_at=datetime.now()
)
storyboard.frames.append(frame)
# Step 4: Process each frame
for i, frame in enumerate(storyboard.frames):
# Calculate fine-grained progress for this frame
base_progress = 0.2 # Frames processing starts at 20%
frame_range = 0.6 # Frames processing takes 60% (20%-80%)
per_frame_progress = frame_range / len(storyboard.frames)
# Create frame-specific progress callback
def frame_progress_callback(event: ProgressEvent):
"""Report sub-step progress within current frame"""
# Calculate overall progress: base + previous frames + current frame progress
overall_progress = base_progress + (per_frame_progress * i) + (per_frame_progress * event.progress)
# Forward the event with adjusted overall progress
if progress_callback:
adjusted_event = ProgressEvent(
event_type=event.event_type,
progress=overall_progress,
frame_current=event.frame_current,
frame_total=event.frame_total,
step=event.step,
action=event.action
)
progress_callback(adjusted_event)
# Report frame start
self._report_progress(
progress_callback,
"processing_frame",
base_progress + (per_frame_progress * i),
frame_current=i+1,
frame_total=len(storyboard.frames)
)
processed_frame = await self.core.frame_processor(
frame=frame,
storyboard=storyboard,
config=config,
total_frames=len(storyboard.frames),
progress_callback=frame_progress_callback
)
storyboard.total_duration += processed_frame.duration
logger.info(f"✅ Frame {i+1} completed ({processed_frame.duration:.2f}s)")
# Step 5: Concatenate videos
self._report_progress(progress_callback, "concatenating", 0.85)
segment_paths = [frame.video_segment_path for frame in storyboard.frames]
from pixelle_video.services.video import VideoService
video_service = VideoService()
final_video_path = video_service.concat_videos(
videos=segment_paths,
output=output_path,
bgm_path=bgm_path,
bgm_volume=bgm_volume,
bgm_mode=bgm_mode
)
storyboard.final_video_path = final_video_path
storyboard.completed_at = datetime.now()
# Copy to user-specified path if provided
if user_specified_output:
import shutil
Path(user_specified_output).parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(final_video_path, user_specified_output)
logger.info(f"📹 Final video copied to: {user_specified_output}")
# Use user-specified path in result
final_video_path = user_specified_output
storyboard.final_video_path = user_specified_output
logger.success(f"🎬 Video generation completed: {final_video_path}")
# Step 6: Create result
self._report_progress(progress_callback, "completed", 1.0)
video_path_obj = Path(final_video_path)
file_size = video_path_obj.stat().st_size
result = VideoGenerationResult(
video_path=final_video_path,
storyboard=storyboard,
duration=storyboard.total_duration,
file_size=file_size
)
logger.info(f"✅ Generated video: {final_video_path}")
logger.info(f" Duration: {storyboard.total_duration:.2f}s")
logger.info(f" Size: {file_size / (1024*1024):.2f} MB")
logger.info(f" Frames: {len(storyboard.frames)}")
return result
except Exception as e:
logger.error(f"❌ Video generation failed: {e}")
raise
def _report_progress(
self,
callback: Optional[Callable[[ProgressEvent], None]],
event_type: str,
progress: float,
**kwargs
):
"""
Report progress via callback
Args:
callback: Progress callback function
event_type: Type of progress event
progress: Progress value (0.0-1.0)
**kwargs: Additional event-specific parameters (frame_current, frame_total, etc.)
"""
if callback:
event = ProgressEvent(event_type=event_type, progress=progress, **kwargs)
callback(event)
logger.debug(f"Progress: {progress*100:.0f}% - {event_type}")
else:
logger.debug(f"Progress: {progress*100:.0f}% - {event_type}")
def _parse_json(self, text: str) -> dict:
"""
Parse JSON from text, with fallback to extract JSON from markdown code blocks
Args:
text: Text containing JSON
Returns:
Parsed JSON dict
"""
import json
import re
# Try direct parsing first
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# Try to extract JSON from markdown code block
json_pattern = r'```(?:json)?\s*([\s\S]+?)\s*```'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(1))
except json.JSONDecodeError:
pass
# Try to find any JSON object in the text (flexible pattern for narrations)
json_pattern = r'\{[^{}]*"narrations"\s*:\s*\[[^\]]*\][^{}]*\}'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
return json.loads(match.group(0))
except json.JSONDecodeError:
pass
# If all fails, raise error
raise json.JSONDecodeError("No valid JSON found", text, 0)
async def _split_narration_script(self, script: str, config: StoryboardConfig) -> list[str]:
"""
Split user-provided narration script into segments (trust user input completely).
Simply split by newline, each line becomes a narration segment.
Empty lines are filtered out.
Args:
script: Fixed narration script (each line is a narration)
config: Storyboard configuration (unused, kept for interface compatibility)
Returns:
List of narration segments
"""
logger.info(f"Splitting script by lines (length: {len(script)} chars)")
# Split by newline, filter empty lines
narrations = [line.strip() for line in script.split('\n') if line.strip()]
logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
# Log statistics
if narrations:
lengths = [len(s) for s in narrations]
logger.info(f" Min: {min(lengths)} chars, Max: {max(lengths)} chars, Avg: {sum(lengths)//len(lengths)} chars")
return narrations