From 641efb81c6ff1aca927d2ae64f8ebf53c8624296 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Tue, 4 Nov 2025 11:38:06 +0800 Subject: [PATCH] =?UTF-8?q?=E7=A7=BB=E9=99=A4=E5=86=97=E4=BD=99=E7=9A=84?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pixelle_video/pipelines/standard.py | 2 +- pixelle_video/service.py | 21 +- pixelle_video/services/__init__.py | 22 +- .../services/image_prompt_generator.py | 218 -------- pixelle_video/services/narration_generator.py | 179 ------- pixelle_video/services/title_generator.py | 138 ----- pixelle_video/services/video_generator.py | 492 ------------------ 7 files changed, 6 insertions(+), 1066 deletions(-) delete mode 100644 pixelle_video/services/image_prompt_generator.py delete mode 100644 pixelle_video/services/narration_generator.py delete mode 100644 pixelle_video/services/title_generator.py delete mode 100644 pixelle_video/services/video_generator.py diff --git a/pixelle_video/pipelines/standard.py b/pixelle_video/pipelines/standard.py index d28abb7..407e8f3 100644 --- a/pixelle_video/pipelines/standard.py +++ b/pixelle_video/pipelines/standard.py @@ -2,7 +2,7 @@ Standard Video Generation Pipeline Standard workflow for generating short videos from topic or fixed script. -This is the default pipeline that replicates the original VideoGeneratorService logic. +This is the default pipeline for general-purpose video generation. """ from datetime import datetime diff --git a/pixelle_video/service.py b/pixelle_video/service.py index 307af9b..8ccfe17 100644 --- a/pixelle_video/service.py +++ b/pixelle_video/service.py @@ -13,9 +13,6 @@ from pixelle_video.services.llm_service import LLMService from pixelle_video.services.tts_service import TTSService from pixelle_video.services.image import ImageService from pixelle_video.services.video import VideoService -from pixelle_video.services.narration_generator import NarrationGeneratorService -from pixelle_video.services.image_prompt_generator import ImagePromptGeneratorService -from pixelle_video.services.title_generator import TitleGeneratorService from pixelle_video.services.frame_processor import FrameProcessor from pixelle_video.pipelines.standard import StandardPipeline from pixelle_video.pipelines.custom import CustomPipeline @@ -70,13 +67,6 @@ class PixelleVideoCore: self.tts: Optional[TTSService] = None self.image: Optional[ImageService] = None self.video: Optional[VideoService] = None - - # Content generation services - self.narration_generator: Optional[NarrationGeneratorService] = None - self.image_prompt_generator: Optional[ImagePromptGeneratorService] = None - self.title_generator: Optional[TitleGeneratorService] = None - - # Frame processing services self.frame_processor: Optional[FrameProcessor] = None # Video generation pipelines (dictionary of pipeline_name -> pipeline_instance) @@ -105,23 +95,16 @@ class PixelleVideoCore: self.tts = TTSService(self.config) self.image = ImageService(self.config) self.video = VideoService() - - # 2. Initialize content generation services - self.narration_generator = NarrationGeneratorService(self) - self.image_prompt_generator = ImagePromptGeneratorService(self) - self.title_generator = TitleGeneratorService(self) - - # 3. Initialize frame processing services self.frame_processor = FrameProcessor(self) - # 4. Register video generation pipelines + # 2. Register video generation pipelines self.pipelines = { "standard": StandardPipeline(self), "custom": CustomPipeline(self), } logger.info(f"📹 Registered pipelines: {', '.join(self.pipelines.keys())}") - # 5. Set default pipeline callable (for backward compatibility) + # 3. Set default pipeline callable (for backward compatibility) self.generate_video = self._create_generate_video_wrapper() self._initialized = True diff --git a/pixelle_video/services/__init__.py b/pixelle_video/services/__init__.py index d4c29b9..70dbfc7 100644 --- a/pixelle_video/services/__init__.py +++ b/pixelle_video/services/__init__.py @@ -3,18 +3,13 @@ Pixelle-Video Services Core services providing atomic capabilities. -Core Services (Active): +Services: - LLMService: LLM text generation - TTSService: Text-to-speech - ImageService: Image generation - VideoService: Video processing - -Legacy Services (Kept for backward compatibility): -- NarrationGeneratorService: Use pipelines + utils.content_generators instead -- ImagePromptGeneratorService: Use pipelines + utils.content_generators instead -- TitleGeneratorService: Use pipelines + utils.content_generators instead -- FrameProcessor: Use pipelines instead -- VideoGeneratorService: Use pipelines.StandardPipeline instead +- FrameProcessor: Frame processing orchestrator +- ComfyBaseService: Base class for ComfyUI-based services """ from pixelle_video.services.comfy_base_service import ComfyBaseService @@ -22,13 +17,7 @@ from pixelle_video.services.llm_service import LLMService from pixelle_video.services.tts_service import TTSService from pixelle_video.services.image import ImageService from pixelle_video.services.video import VideoService - -# Legacy services (kept for backward compatibility) -from pixelle_video.services.narration_generator import NarrationGeneratorService -from pixelle_video.services.image_prompt_generator import ImagePromptGeneratorService -from pixelle_video.services.title_generator import TitleGeneratorService from pixelle_video.services.frame_processor import FrameProcessor -from pixelle_video.services.video_generator import VideoGeneratorService __all__ = [ "ComfyBaseService", @@ -36,11 +25,6 @@ __all__ = [ "TTSService", "ImageService", "VideoService", - # Legacy (backward compatibility) - "NarrationGeneratorService", - "ImagePromptGeneratorService", - "TitleGeneratorService", "FrameProcessor", - "VideoGeneratorService", ] diff --git a/pixelle_video/services/image_prompt_generator.py b/pixelle_video/services/image_prompt_generator.py deleted file mode 100644 index d8feea6..0000000 --- a/pixelle_video/services/image_prompt_generator.py +++ /dev/null @@ -1,218 +0,0 @@ -""" -Image prompt generation service -""" - -import json -import re -from typing import List, Optional, Callable - -from loguru import logger - -from pixelle_video.models.storyboard import StoryboardConfig -from pixelle_video.prompts import build_image_prompt_prompt - - -class ImagePromptGeneratorService: - """Image prompt generation service""" - - def __init__(self, pixelle_video_core): - """ - Initialize - - Args: - pixelle_video_core: PixelleVideoCore instance - """ - self.core = pixelle_video_core - - async def generate_image_prompts( - self, - narrations: List[str], - config: StoryboardConfig, - batch_size: int = 10, - max_retries: int = 3, - progress_callback: Optional[Callable] = None - ) -> List[str]: - """ - Generate image prompts based on narrations (with batching and retry) - - Args: - narrations: List of narrations - config: Storyboard configuration - batch_size: Max narrations per batch (default: 10) - max_retries: Max retry attempts per batch (default: 3) - progress_callback: Optional callback(completed, total, message) for progress updates - - Returns: - List of image prompts with prompt_prefix applied (from config) - - Raises: - ValueError: If batch fails after max_retries - json.JSONDecodeError: If unable to parse JSON - """ - logger.info(f"Generating image prompts for {len(narrations)} narrations (batch_size={batch_size}, max_retries={max_retries})") - - # Split narrations into batches - batches = [narrations[i:i + batch_size] for i in range(0, len(narrations), batch_size)] - logger.info(f"Split into {len(batches)} batches") - - all_base_prompts = [] - - # Process each batch - for batch_idx, batch_narrations in enumerate(batches, 1): - logger.info(f"Processing batch {batch_idx}/{len(batches)} ({len(batch_narrations)} narrations)") - - # Retry logic for this batch - for attempt in range(1, max_retries + 1): - try: - # Generate prompts for this batch - batch_prompts = await self._generate_batch_prompts( - batch_narrations, - config, - batch_idx, - attempt - ) - - # Validate count - if len(batch_prompts) != len(batch_narrations): - error_msg = ( - f"Batch {batch_idx} prompt count mismatch (attempt {attempt}/{max_retries}):\n" - f" Expected: {len(batch_narrations)} prompts\n" - f" Got: {len(batch_prompts)} prompts\n" - f" Difference: {abs(len(batch_prompts) - len(batch_narrations))} " - f"{'missing' if len(batch_prompts) < len(batch_narrations) else 'extra'}" - ) - logger.warning(error_msg) - - if attempt < max_retries: - logger.info(f"Retrying batch {batch_idx}...") - continue - else: - logger.error(f"Batch {batch_idx} failed after {max_retries} attempts") - raise ValueError(error_msg) - - # Success! - logger.info(f"✅ Batch {batch_idx} completed successfully ({len(batch_prompts)} prompts)") - all_base_prompts.extend(batch_prompts) - - # Report progress - if progress_callback: - progress_callback( - len(all_base_prompts), - len(narrations), - f"Batch {batch_idx}/{len(batches)} completed" - ) - - break - - except json.JSONDecodeError as e: - logger.error(f"Batch {batch_idx} JSON parse error (attempt {attempt}/{max_retries}): {e}") - if attempt >= max_retries: - raise - logger.info(f"Retrying batch {batch_idx}...") - - base_prompts = all_base_prompts - logger.info(f"✅ All batches completed. Total prompts: {len(base_prompts)}") - - # 5. Apply prompt prefix to each prompt - from pixelle_video.utils.prompt_helper import build_image_prompt - - # Get prompt prefix from config (fix: correct path is comfyui.image.prompt_prefix) - image_config = self.core.config.get("comfyui", {}).get("image", {}) - prompt_prefix = image_config.get("prompt_prefix", "") - - # Apply prefix to each base prompt - final_prompts = [] - for base_prompt in base_prompts: - final_prompt = build_image_prompt(base_prompt, prompt_prefix) - final_prompts.append(final_prompt) - - logger.info(f"Generated {len(final_prompts)} final image prompts with prefix applied") - return final_prompts - - async def _generate_batch_prompts( - self, - batch_narrations: List[str], - config: StoryboardConfig, - batch_idx: int, - attempt: int - ) -> List[str]: - """ - Generate image prompts for a single batch of narrations - - Args: - batch_narrations: Batch of narrations - config: Storyboard configuration - batch_idx: Batch index (for logging) - attempt: Attempt number (for logging) - - Returns: - List of image prompts for this batch - - Raises: - json.JSONDecodeError: If unable to parse JSON - KeyError: If response format is invalid - """ - logger.debug(f"Batch {batch_idx} attempt {attempt}: Generating prompts for {len(batch_narrations)} narrations") - - # 1. Build prompt - prompt = build_image_prompt_prompt( - narrations=batch_narrations, - min_words=config.min_image_prompt_words, - max_words=config.max_image_prompt_words - ) - - # 2. Call LLM - response = await self.core.llm( - prompt=prompt, - temperature=0.7, - max_tokens=8192 - ) - - logger.debug(f"Batch {batch_idx} attempt {attempt}: LLM response length: {len(response)} chars") - - # 3. Parse JSON - result = self._parse_json(response) - - if "image_prompts" not in result: - logger.error("Response missing 'image_prompts' key") - raise KeyError("Invalid response format: missing 'image_prompts'") - - return result["image_prompts"] - - def _parse_json(self, text: str) -> dict: - """ - Parse JSON from text, with fallback to extract JSON from markdown code blocks - - Args: - text: Text containing JSON - - Returns: - Parsed JSON dict - """ - # Try direct parsing first - try: - return json.loads(text) - except json.JSONDecodeError: - pass - - # Try to extract JSON from markdown code block - json_pattern = r'```(?:json)?\s*([\s\S]+?)\s*```' - match = re.search(json_pattern, text, re.DOTALL) - if match: - try: - return json.loads(match.group(1)) - except json.JSONDecodeError: - pass - - # Try to find any JSON object in the text - json_pattern = r'\{[^{}]*"image_prompts"\s*:\s*\[[^\]]*\][^{}]*\}' - match = re.search(json_pattern, text, re.DOTALL) - if match: - try: - return json.loads(match.group(0)) - except json.JSONDecodeError: - pass - - # If all fails, raise error - raise json.JSONDecodeError("No valid JSON found", text, 0) - diff --git a/pixelle_video/services/narration_generator.py b/pixelle_video/services/narration_generator.py deleted file mode 100644 index 8098109..0000000 --- a/pixelle_video/services/narration_generator.py +++ /dev/null @@ -1,179 +0,0 @@ -""" -Narration generation service - -Supports two content sources: -1. Topic: Generate narrations from a topic/theme -2. Content: Extract/refine narrations from user-provided content -""" - -import json -import re -from typing import List, Optional, Literal - -from loguru import logger - -from pixelle_video.models.storyboard import StoryboardConfig, ContentMetadata -from pixelle_video.prompts import ( - build_topic_narration_prompt, - build_content_narration_prompt, -) - - -class NarrationGeneratorService: - """Narration generation service""" - - def __init__(self, pixelle_video_core): - """ - Initialize - - Args: - pixelle_video_core: PixelleVideoCore instance (for calling llm) - """ - self.core = pixelle_video_core - - async def generate_narrations( - self, - config: StoryboardConfig, - source_type: Literal["topic", "content"], - content_metadata: Optional[ContentMetadata] = None, - topic: Optional[str] = None, - content: Optional[str] = None, - ) -> List[str]: - """ - Generate storyboard narrations from different sources - - Args: - config: Storyboard configuration - source_type: Type of content source ("topic" or "content") - content_metadata: Content metadata (optional, not currently used) - topic: Topic/theme (required if source_type="topic") - content: User-provided content (required if source_type="content") - - Returns: - List of narration texts - - Raises: - ValueError: If parameters don't match source_type or narration count mismatch - json.JSONDecodeError: If unable to parse LLM response as JSON - - Examples: - # Generate from topic - >>> narrations = await service.generate_narrations( - ... config=config, - ... source_type="topic", - ... topic="如何提高学习效率" - ... ) - - # Generate from user content - >>> narrations = await service.generate_narrations( - ... config=config, - ... source_type="content", - ... content="Today I want to share three useful tips..." - ... ) - """ - # 1. Build prompt based on source_type - if source_type == "topic": - if topic is None: - raise ValueError("topic is required when source_type='topic'") - logger.info(f"Generating topic narrations for: {topic}") - prompt = build_topic_narration_prompt( - topic=topic, - n_storyboard=config.n_storyboard, - min_words=config.min_narration_words, - max_words=config.max_narration_words - ) - - else: # content - if content is None: - raise ValueError("content is required when source_type='content'") - logger.info(f"Generating narrations from user content ({len(content)} chars)") - prompt = build_content_narration_prompt( - content=content, - n_storyboard=config.n_storyboard, - min_words=config.min_narration_words, - max_words=config.max_narration_words - ) - - # 2. Call LLM (using self.core.llm) - response = await self.core.llm( - prompt=prompt, - temperature=0.8, # Higher temperature for more creativity - max_tokens=2000 - ) - - logger.debug(f"LLM response: {response[:200]}...") - - # 3. Parse JSON - try: - result = self._parse_json(response) - narrations = result["narrations"] - except json.JSONDecodeError as e: - logger.error(f"Failed to parse LLM response as JSON: {e}") - logger.error(f"Response: {response}") - raise - except KeyError: - logger.error("Response JSON missing 'narrations' key") - logger.error(f"Response: {response}") - raise ValueError("Invalid response format") - - # 4. Validate count (take first N if got more) - if len(narrations) > config.n_storyboard: - logger.warning( - f"Got {len(narrations)} narrations, taking first {config.n_storyboard}" - ) - narrations = narrations[:config.n_storyboard] - elif len(narrations) < config.n_storyboard: - raise ValueError( - f"Expected at least {config.n_storyboard} narrations, " - f"got only {len(narrations)}" - ) - - # 5. Validate word count for each narration - for i, text in enumerate(narrations): - word_count = len(text) - if word_count < config.min_narration_words: - logger.warning( - f"Narration {i} too short: {word_count} chars " - f"(min: {config.min_narration_words})" - ) - - logger.info(f"Generated {len(narrations)} narrations successfully") - return narrations - - def _parse_json(self, text: str) -> dict: - """ - Parse JSON from text, with fallback to extract JSON from markdown code blocks - - Args: - text: Text containing JSON - - Returns: - Parsed JSON dict - """ - # Try direct parsing first - try: - return json.loads(text) - except json.JSONDecodeError: - pass - - # Try to extract JSON from markdown code block - json_pattern = r'```(?:json)?\s*([\s\S]+?)\s*```' - match = re.search(json_pattern, text, re.DOTALL) - if match: - try: - return json.loads(match.group(1)) - except json.JSONDecodeError: - pass - - # Try to find any JSON object in the text - json_pattern = r'\{[^{}]*"narrations"\s*:\s*\[[^\]]*\][^{}]*\}' - match = re.search(json_pattern, text, re.DOTALL) - if match: - try: - return json.loads(match.group(0)) - except json.JSONDecodeError: - pass - - # If all fails, raise error - raise json.JSONDecodeError("No valid JSON found", text, 0) - diff --git a/pixelle_video/services/title_generator.py b/pixelle_video/services/title_generator.py deleted file mode 100644 index 50328da..0000000 --- a/pixelle_video/services/title_generator.py +++ /dev/null @@ -1,138 +0,0 @@ -""" -Title Generator Service - -Service for generating video titles from content. -""" - -from typing import Literal - -from loguru import logger - - -# Title generation constants -AUTO_LENGTH_THRESHOLD = 15 -MAX_TITLE_LENGTH = 15 - - -class TitleGeneratorService: - """ - Title generation service - - Generates video titles from content using different strategies: - - auto: Automatically decide based on content length - - direct: Use content directly as title - - llm: Always use LLM to generate title - """ - - def __init__(self, pixelle_video_core): - """ - Initialize title generator service - - Args: - pixelle_video_core: PixelleVideoCore instance - """ - self.core = pixelle_video_core - - async def __call__( - self, - content: str, - strategy: Literal["auto", "direct", "llm"] = "auto", - max_length: int = MAX_TITLE_LENGTH - ) -> str: - """ - Generate title from content - - Args: - content: Source content (topic or script) - strategy: Generation strategy - - "auto": Auto-decide based on content length (default) - * If content <= AUTO_LENGTH_THRESHOLD chars: use directly - * If content > AUTO_LENGTH_THRESHOLD chars: use LLM - - "direct": Use content directly (truncated to max_length if needed) - - "llm": Always use LLM to generate title - max_length: Maximum title length (default: MAX_TITLE_LENGTH) - - Returns: - Generated title - - Examples: - # Auto strategy (default) - >>> title = await title_generator("AI技术") # Short, use directly - >>> # Returns: "AI技术" - - >>> title = await title_generator("如何在信息爆炸时代保持深度思考") # Long, use LLM - >>> # Returns: "信息时代的深度思考" (LLM generated) - - # Direct strategy - >>> title = await title_generator("Very long content...", strategy="direct") - >>> # Returns: "Very long content..." (truncated to max_length) - - # LLM strategy - >>> title = await title_generator("AI", strategy="llm") # Force LLM even for short content - >>> # Returns: "人工智能技术" (LLM generated) - """ - if strategy == "direct": - return self._use_directly(content, max_length) - elif strategy == "llm": - return await self._generate_by_llm(content, max_length) - else: # auto - if len(content.strip()) <= AUTO_LENGTH_THRESHOLD: - return content.strip() - return await self._generate_by_llm(content, max_length) - - def _use_directly(self, content: str, max_length: int) -> str: - """ - Use content directly as title (with truncation if needed) - - Args: - content: Source content - max_length: Maximum title length - - Returns: - Truncated or original content - """ - content = content.strip() - if len(content) <= max_length: - return content - return content[:max_length] - - async def _generate_by_llm(self, content: str, max_length: int) -> str: - """ - Generate title using LLM - - Args: - content: Source content (topic or script) - max_length: Maximum title length - - Returns: - LLM-generated title - """ - from pixelle_video.prompts import build_title_generation_prompt - - # Build prompt using template - prompt = build_title_generation_prompt(content, max_length=500) - - # Call LLM to generate title - response = await self.core.llm( - prompt=prompt, - temperature=0.7, - max_tokens=50 - ) - - # Clean up response - title = response.strip() - - # Remove quotes if present - if title.startswith('"') and title.endswith('"'): - title = title[1:-1] - if title.startswith("'") and title.endswith("'"): - title = title[1:-1] - - # Limit to max_length (safety) - if len(title) > max_length: - title = title[:max_length] - - logger.debug(f"Generated title: '{title}' (length: {len(title)})") - - return title - diff --git a/pixelle_video/services/video_generator.py b/pixelle_video/services/video_generator.py deleted file mode 100644 index f0ee4fb..0000000 --- a/pixelle_video/services/video_generator.py +++ /dev/null @@ -1,492 +0,0 @@ -""" -Video Generator Service - -End-to-end service for generating short videos from content. -""" - -from datetime import datetime -from pathlib import Path -from typing import Optional, Callable, Literal - -from loguru import logger - -from pixelle_video.models.progress import ProgressEvent -from pixelle_video.models.storyboard import ( - Storyboard, - StoryboardFrame, - StoryboardConfig, - ContentMetadata, - VideoGenerationResult -) - - -class VideoGeneratorService: - """ - Video generation service - - Orchestrates the complete pipeline: - 1. Generate narrations (LLM) - 2. Generate image prompts (LLM) - 3. Process each frame (TTS + Image + Compose + Video) - 4. Concatenate all segments - 5. Add BGM (optional) - """ - - def __init__(self, pixelle_video_core): - """ - Initialize video generator service - - Args: - pixelle_video_core: PixelleVideoCore instance - """ - self.core = pixelle_video_core - - async def __call__( - self, - # === Input === - text: str, - - # === Processing Mode === - mode: Literal["generate", "fixed"] = "generate", - - # === Optional Title === - title: Optional[str] = None, - - # === Basic Config === - n_scenes: int = 5, # Only used in generate mode; ignored in fixed mode - voice_id: str = "[Chinese] zh-CN Yunjian", - tts_workflow: Optional[str] = None, - tts_speed: float = 1.2, - ref_audio: Optional[str] = None, # Reference audio for voice cloning - output_path: Optional[str] = None, - - # === LLM Parameters === - min_narration_words: int = 5, - max_narration_words: int = 20, - min_image_prompt_words: int = 30, - max_image_prompt_words: int = 60, - - # === Image Parameters === - image_width: int = 1024, - image_height: int = 1024, - image_workflow: Optional[str] = None, - - # === Video Parameters === - video_fps: int = 30, - - # === Frame Template (determines video size) === - frame_template: Optional[str] = None, - - # === Image Style === - prompt_prefix: Optional[str] = None, - - # === BGM Parameters === - bgm_path: Optional[str] = None, - bgm_volume: float = 0.2, - bgm_mode: Literal["once", "loop"] = "loop", - - # === Advanced Options === - content_metadata: Optional[ContentMetadata] = None, - progress_callback: Optional[Callable[[ProgressEvent], None]] = None, - ) -> VideoGenerationResult: - """ - Generate short video from text input - - Args: - text: Text input (required) - - For generate mode: topic/theme (e.g., "如何提高学习效率") - - For fixed mode: complete narration script (each line is a narration) - - mode: Processing mode (default "generate") - - "generate": LLM generates narrations from topic/theme, creates n_scenes - - "fixed": Use existing script as-is, each line becomes a narration - - Note: In fixed mode, n_scenes is ignored (uses actual line count) - - title: Video title (optional) - - If provided, use it as the video title - - If not provided: - * generate mode → use text as title - * fixed mode → LLM generates title from script - - n_scenes: Number of storyboard scenes (default 5) - Only effective in generate mode; ignored in fixed mode - - voice_id: TTS voice ID (default "[Chinese] zh-CN Yunjian") - tts_workflow: TTS workflow filename (e.g., "tts_edge.json", None = use default) - tts_speed: TTS speed multiplier (1.0 = normal, 1.2 = 20% faster, default 1.2) - output_path: Output video path (auto-generated if None) - - min_narration_words: Min narration length (generate mode only) - max_narration_words: Max narration length (generate mode only) - min_image_prompt_words: Min image prompt length - max_image_prompt_words: Max image prompt length - - image_width: Generated image width (default 1024) - image_height: Generated image height (default 1024) - image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default) - - video_fps: Video frame rate (default 30) - - frame_template: HTML template path with size (None = use default "1080x1920/default.html") - Format: "SIZExSIZE/template.html" (e.g., "1080x1920/default.html", "1920x1080/modern.html") - Video size is automatically determined from template path - - prompt_prefix: Image prompt prefix (overrides config.yaml if provided) - e.g., "anime style, vibrant colors" or "" for no prefix - - bgm_path: BGM path (filename like "default.mp3", custom path, or None) - bgm_volume: BGM volume 0.0-1.0 (default 0.2) - bgm_mode: BGM mode "once" or "loop" (default "loop") - - content_metadata: Content metadata (optional, for display) - progress_callback: Progress callback function(message, progress) - - Returns: - VideoGenerationResult with video path and metadata - - Examples: - # Generate mode: LLM creates narrations from topic - >>> result = await pixelle_video.generate_video( - ... text="如何在信息爆炸时代保持深度思考", - ... mode="generate", - ... n_scenes=5, - ... bgm_path="default" - ... ) - - # Fixed mode: Use existing script (each line is a narration) - >>> script = '''大家好,今天跟你分享三个学习技巧 - ... 第一个技巧是专注力训练,每天冥想10分钟 - ... 第二个技巧是主动回忆,学完立即复述 - ... 第三个技巧是间隔重复,学习后定期复习''' - >>> result = await pixelle_video.generate_video( - ... text=script, - ... mode="fixed", - ... title="三个学习技巧" - ... ) - >>> print(result.video_path) - """ - # ========== Step 0: Process text and determine title ========== - logger.info(f"🚀 Starting video generation in '{mode}' mode") - logger.info(f" Text length: {len(text)} chars") - - # Determine final title (priority: user-specified > auto-generated) - if title: - # User specified title, use it directly - final_title = title - logger.info(f" Title: '{title}' (user-specified)") - else: - # Auto-generate title using title_generator service - self._report_progress(progress_callback, "generating_title", 0.01) - if mode == "generate": - # Auto strategy: decide based on content length - final_title = await self.core.title_generator(text, strategy="auto") - logger.info(f" Title: '{final_title}' (auto-generated)") - else: # fixed - # Force LLM strategy: always use LLM for script - final_title = await self.core.title_generator(text, strategy="llm") - logger.info(f" Title: '{final_title}' (LLM-generated)") - - # ========== Step 0.5: Create isolated task directory ========== - from pixelle_video.utils.os_util import ( - create_task_output_dir, - get_task_final_video_path - ) - - # Create isolated task directory for this video generation - task_dir, task_id = create_task_output_dir() - logger.info(f"📁 Task directory created: {task_dir}") - logger.info(f" Task ID: {task_id}") - - # Determine final video path - user_specified_output = None - if output_path is None: - # Use standardized path: output/{task_id}/final.mp4 - output_path = get_task_final_video_path(task_id) - else: - # User specified custom path: save it and use task path for generation - user_specified_output = output_path - output_path = get_task_final_video_path(task_id) - logger.info(f" Will copy final video to: {user_specified_output}") - - # Create storyboard config - config = StoryboardConfig( - task_id=task_id, # Pass task_id for file isolation - n_storyboard=n_scenes, - min_narration_words=min_narration_words, - max_narration_words=max_narration_words, - min_image_prompt_words=min_image_prompt_words, - max_image_prompt_words=max_image_prompt_words, - video_fps=video_fps, - voice_id=voice_id, - tts_workflow=tts_workflow, - tts_speed=tts_speed, - ref_audio=ref_audio, - image_width=image_width, - image_height=image_height, - image_workflow=image_workflow, - frame_template=frame_template or "1080x1920/default.html" - ) - - # Create storyboard - storyboard = Storyboard( - title=final_title, # Use final_title as video title - config=config, - content_metadata=content_metadata, - created_at=datetime.now() - ) - - try: - # ========== Step 1: Generate/Split narrations ========== - if mode == "generate": - # Generate narrations using LLM - self._report_progress(progress_callback, "generating_narrations", 0.05) - narrations = await self.core.narration_generator.generate_narrations( - config=config, - source_type="topic", - content_metadata=None, - topic=text, - content=None - ) - logger.info(f"✅ Generated {len(narrations)} narrations") - else: # fixed - # Split fixed script by lines (trust user input completely) - self._report_progress(progress_callback, "splitting_script", 0.05) - narrations = await self._split_narration_script(text, config) - logger.info(f"✅ Split script into {len(narrations)} segments (by lines)") - logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode") - - # Step 2: Generate image prompts - self._report_progress(progress_callback, "generating_image_prompts", 0.15) - - # Override prompt_prefix if provided (temporarily modify config) - original_prefix = None - if prompt_prefix is not None: - # Fix: image config is under comfyui.image, not directly under config - image_config = self.core.config.get("comfyui", {}).get("image", {}) - original_prefix = image_config.get("prompt_prefix") - image_config["prompt_prefix"] = prompt_prefix - logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'") - - try: - # Create progress callback wrapper for image prompt generation (15%-30% range) - def image_prompt_progress(completed: int, total: int, message: str): - # Map batch progress to 15%-30% range - batch_progress = completed / total if total > 0 else 0 - overall_progress = 0.15 + (batch_progress * 0.15) # 15% -> 30% - self._report_progress( - progress_callback, - "generating_image_prompts", - overall_progress, - extra_info=message - ) - - image_prompts = await self.core.image_prompt_generator.generate_image_prompts( - narrations=narrations, - config=config, - progress_callback=image_prompt_progress - ) - finally: - # Restore original prompt_prefix - if original_prefix is not None: - image_config["prompt_prefix"] = original_prefix - logger.info(f"✅ Generated {len(image_prompts)} image prompts") - - # Step 3: Create frames - for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)): - frame = StoryboardFrame( - index=i, - narration=narration, - image_prompt=image_prompt, - created_at=datetime.now() - ) - storyboard.frames.append(frame) - - # Step 4: Process each frame - for i, frame in enumerate(storyboard.frames): - # Calculate fine-grained progress for this frame - base_progress = 0.2 # Frames processing starts at 20% - frame_range = 0.6 # Frames processing takes 60% (20%-80%) - per_frame_progress = frame_range / len(storyboard.frames) - - # Create frame-specific progress callback - def frame_progress_callback(event: ProgressEvent): - """Report sub-step progress within current frame""" - # Calculate overall progress: base + previous frames + current frame progress - overall_progress = base_progress + (per_frame_progress * i) + (per_frame_progress * event.progress) - # Forward the event with adjusted overall progress - if progress_callback: - adjusted_event = ProgressEvent( - event_type=event.event_type, - progress=overall_progress, - frame_current=event.frame_current, - frame_total=event.frame_total, - step=event.step, - action=event.action - ) - progress_callback(adjusted_event) - - # Report frame start - self._report_progress( - progress_callback, - "processing_frame", - base_progress + (per_frame_progress * i), - frame_current=i+1, - frame_total=len(storyboard.frames) - ) - - processed_frame = await self.core.frame_processor( - frame=frame, - storyboard=storyboard, - config=config, - total_frames=len(storyboard.frames), - progress_callback=frame_progress_callback - ) - storyboard.total_duration += processed_frame.duration - logger.info(f"✅ Frame {i+1} completed ({processed_frame.duration:.2f}s)") - - # Step 5: Concatenate videos - self._report_progress(progress_callback, "concatenating", 0.85) - segment_paths = [frame.video_segment_path for frame in storyboard.frames] - - from pixelle_video.services.video import VideoService - video_service = VideoService() - - final_video_path = video_service.concat_videos( - videos=segment_paths, - output=output_path, - bgm_path=bgm_path, - bgm_volume=bgm_volume, - bgm_mode=bgm_mode - ) - - storyboard.final_video_path = final_video_path - storyboard.completed_at = datetime.now() - - # Copy to user-specified path if provided - if user_specified_output: - import shutil - Path(user_specified_output).parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(final_video_path, user_specified_output) - logger.info(f"📹 Final video copied to: {user_specified_output}") - # Use user-specified path in result - final_video_path = user_specified_output - storyboard.final_video_path = user_specified_output - - logger.success(f"🎬 Video generation completed: {final_video_path}") - - # Step 6: Create result - self._report_progress(progress_callback, "completed", 1.0) - - video_path_obj = Path(final_video_path) - file_size = video_path_obj.stat().st_size - - result = VideoGenerationResult( - video_path=final_video_path, - storyboard=storyboard, - duration=storyboard.total_duration, - file_size=file_size - ) - - logger.info(f"✅ Generated video: {final_video_path}") - logger.info(f" Duration: {storyboard.total_duration:.2f}s") - logger.info(f" Size: {file_size / (1024*1024):.2f} MB") - logger.info(f" Frames: {len(storyboard.frames)}") - - return result - - except Exception as e: - logger.error(f"❌ Video generation failed: {e}") - raise - - def _report_progress( - self, - callback: Optional[Callable[[ProgressEvent], None]], - event_type: str, - progress: float, - **kwargs - ): - """ - Report progress via callback - - Args: - callback: Progress callback function - event_type: Type of progress event - progress: Progress value (0.0-1.0) - **kwargs: Additional event-specific parameters (frame_current, frame_total, etc.) - """ - if callback: - event = ProgressEvent(event_type=event_type, progress=progress, **kwargs) - callback(event) - logger.debug(f"Progress: {progress*100:.0f}% - {event_type}") - else: - logger.debug(f"Progress: {progress*100:.0f}% - {event_type}") - - def _parse_json(self, text: str) -> dict: - """ - Parse JSON from text, with fallback to extract JSON from markdown code blocks - - Args: - text: Text containing JSON - - Returns: - Parsed JSON dict - """ - import json - import re - - # Try direct parsing first - try: - return json.loads(text) - except json.JSONDecodeError: - pass - - # Try to extract JSON from markdown code block - json_pattern = r'```(?:json)?\s*([\s\S]+?)\s*```' - match = re.search(json_pattern, text, re.DOTALL) - if match: - try: - return json.loads(match.group(1)) - except json.JSONDecodeError: - pass - - # Try to find any JSON object in the text (flexible pattern for narrations) - json_pattern = r'\{[^{}]*"narrations"\s*:\s*\[[^\]]*\][^{}]*\}' - match = re.search(json_pattern, text, re.DOTALL) - if match: - try: - return json.loads(match.group(0)) - except json.JSONDecodeError: - pass - - # If all fails, raise error - raise json.JSONDecodeError("No valid JSON found", text, 0) - - async def _split_narration_script(self, script: str, config: StoryboardConfig) -> list[str]: - """ - Split user-provided narration script into segments (trust user input completely). - - Simply split by newline, each line becomes a narration segment. - Empty lines are filtered out. - - Args: - script: Fixed narration script (each line is a narration) - config: Storyboard configuration (unused, kept for interface compatibility) - - Returns: - List of narration segments - """ - logger.info(f"Splitting script by lines (length: {len(script)} chars)") - - # Split by newline, filter empty lines - narrations = [line.strip() for line in script.split('\n') if line.strip()] - - logger.info(f"✅ Split script into {len(narrations)} segments (by lines)") - - # Log statistics - if narrations: - lengths = [len(s) for s in narrations] - logger.info(f" Min: {min(lengths)} chars, Max: {max(lengths)} chars, Avg: {sum(lengths)//len(lengths)} chars") - - return narrations -