抽象pipeline逻辑

2025-11-04 11:23:46 +08:00
parent b475090880
commit 22c46cf2c5
9 changed files with 1345 additions and 13 deletions
--- a/pixelle_video/pipelines/standard.py
+++ b/pixelle_video/pipelines/standard.py
@@ -0,0 +1,388 @@
+"""
+Standard Video Generation Pipeline
+
+Standard workflow for generating short videos from topic or fixed script.
+This is the default pipeline that replicates the original VideoGeneratorService logic.
+"""
+
+from datetime import datetime
+from pathlib import Path
+from typing import Optional, Callable, Literal
+
+from loguru import logger
+
+from pixelle_video.pipelines.base import BasePipeline
+from pixelle_video.models.progress import ProgressEvent
+from pixelle_video.models.storyboard import (
+    Storyboard,
+    StoryboardFrame,
+    StoryboardConfig,
+    ContentMetadata,
+    VideoGenerationResult
+)
+from pixelle_video.utils.content_generators import (
+    generate_title,
+    generate_narrations_from_topic,
+    split_narration_script,
+    generate_image_prompts,
+)
+
+
+class StandardPipeline(BasePipeline):
+    """
+    Standard video generation pipeline
+    
+    Workflow:
+    1. Generate/determine title
+    2. Generate narrations (from topic or split fixed script)
+    3. Generate image prompts for each narration
+    4. For each frame:
+       - Generate audio (TTS)
+       - Generate image
+       - Compose frame with template
+       - Create video segment
+    5. Concatenate all segments
+    6. Add BGM (optional)
+    
+    Supports two modes:
+    - "generate": LLM generates narrations from topic
+    - "fixed": Use provided script as-is (each line = one narration)
+    """
+    
+    async def __call__(
+        self,
+        # === Input ===
+        text: str,
+        
+        # === Processing Mode ===
+        mode: Literal["generate", "fixed"] = "generate",
+        
+        # === Optional Title ===
+        title: Optional[str] = None,
+        
+        # === Basic Config ===
+        n_scenes: int = 5,  # Only used in generate mode; ignored in fixed mode
+        voice_id: str = "[Chinese] zh-CN Yunjian",
+        tts_workflow: Optional[str] = None,
+        tts_speed: float = 1.2,
+        ref_audio: Optional[str] = None,  # Reference audio for voice cloning
+        output_path: Optional[str] = None,
+        
+        # === LLM Parameters ===
+        min_narration_words: int = 5,
+        max_narration_words: int = 20,
+        min_image_prompt_words: int = 30,
+        max_image_prompt_words: int = 60,
+        
+        # === Image Parameters ===
+        image_width: int = 1024,
+        image_height: int = 1024,
+        image_workflow: Optional[str] = None,
+        
+        # === Video Parameters ===
+        video_fps: int = 30,
+        
+        # === Frame Template (determines video size) ===
+        frame_template: Optional[str] = None,
+        
+        # === Image Style ===
+        prompt_prefix: Optional[str] = None,
+        
+        # === BGM Parameters ===
+        bgm_path: Optional[str] = None,
+        bgm_volume: float = 0.2,
+        bgm_mode: Literal["once", "loop"] = "loop",
+        
+        # === Advanced Options ===
+        content_metadata: Optional[ContentMetadata] = None,
+        progress_callback: Optional[Callable[[ProgressEvent], None]] = None,
+    ) -> VideoGenerationResult:
+        """
+        Generate short video from text input
+        
+        Args:
+            text: Text input (required)
+                  - For generate mode: topic/theme (e.g., "如何提高学习效率")
+                  - For fixed mode: complete narration script (each line is a narration)
+            
+            mode: Processing mode (default "generate")
+                  - "generate": LLM generates narrations from topic, creates n_scenes
+                  - "fixed": Use existing script as-is, each line becomes a narration
+                  
+                  Note: In fixed mode, n_scenes is ignored (uses actual line count)
+            
+            title: Video title (optional)
+                   - If provided, use it as the video title
+                   - If not provided:
+                     * generate mode → use text as title
+                     * fixed mode → LLM generates title from script
+            
+            n_scenes: Number of storyboard scenes (default 5)
+                      Only effective in generate mode; ignored in fixed mode
+            
+            voice_id: TTS voice ID (default "[Chinese] zh-CN Yunjian")
+            tts_workflow: TTS workflow filename (e.g., "tts_edge.json", None = use default)
+            tts_speed: TTS speed multiplier (1.0 = normal, 1.2 = 20% faster, default 1.2)
+            ref_audio: Reference audio path for voice cloning (optional)
+            output_path: Output video path (auto-generated if None)
+            
+            min_narration_words: Min narration length (generate mode only)
+            max_narration_words: Max narration length (generate mode only)
+            min_image_prompt_words: Min image prompt length
+            max_image_prompt_words: Max image prompt length
+            
+            image_width: Generated image width (default 1024)
+            image_height: Generated image height (default 1024)
+            image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)
+            
+            video_fps: Video frame rate (default 30)
+            
+            frame_template: HTML template path with size (None = use default "1080x1920/default.html")
+                           Format: "SIZExSIZE/template.html" (e.g., "1080x1920/default.html", "1920x1080/modern.html")
+                           Video size is automatically determined from template path
+            
+            prompt_prefix: Image prompt prefix (overrides config.yaml if provided)
+                          e.g., "anime style, vibrant colors" or "" for no prefix
+            
+            bgm_path: BGM path (filename like "default.mp3", custom path, or None)
+            bgm_volume: BGM volume 0.0-1.0 (default 0.2)
+            bgm_mode: BGM mode "once" or "loop" (default "loop")
+            
+            content_metadata: Content metadata (optional, for display)
+            progress_callback: Progress callback function(ProgressEvent)
+        
+        Returns:
+            VideoGenerationResult with video path and metadata
+        """
+        # ========== Step 0: Process text and determine title ==========
+        logger.info(f"🚀 Starting StandardPipeline in '{mode}' mode")
+        logger.info(f"   Text length: {len(text)} chars")
+        
+        # Determine final title
+        if title:
+            final_title = title
+            logger.info(f"   Title: '{title}' (user-specified)")
+        else:
+            self._report_progress(progress_callback, "generating_title", 0.01)
+            if mode == "generate":
+                final_title = await generate_title(self.llm, text, strategy="auto")
+                logger.info(f"   Title: '{final_title}' (auto-generated)")
+            else:  # fixed
+                final_title = await generate_title(self.llm, text, strategy="llm")
+                logger.info(f"   Title: '{final_title}' (LLM-generated)")
+        
+        # ========== Step 0.5: Create isolated task directory ==========
+        from pixelle_video.utils.os_util import (
+            create_task_output_dir,
+            get_task_final_video_path
+        )
+        
+        task_dir, task_id = create_task_output_dir()
+        logger.info(f"📁 Task directory created: {task_dir}")
+        logger.info(f"   Task ID: {task_id}")
+        
+        # Determine final video path
+        user_specified_output = None
+        if output_path is None:
+            output_path = get_task_final_video_path(task_id)
+        else:
+            user_specified_output = output_path
+            output_path = get_task_final_video_path(task_id)
+            logger.info(f"   Will copy final video to: {user_specified_output}")
+        
+        # Create storyboard config
+        config = StoryboardConfig(
+            task_id=task_id,
+            n_storyboard=n_scenes,
+            min_narration_words=min_narration_words,
+            max_narration_words=max_narration_words,
+            min_image_prompt_words=min_image_prompt_words,
+            max_image_prompt_words=max_image_prompt_words,
+            video_fps=video_fps,
+            voice_id=voice_id,
+            tts_workflow=tts_workflow,
+            tts_speed=tts_speed,
+            ref_audio=ref_audio,
+            image_width=image_width,
+            image_height=image_height,
+            image_workflow=image_workflow,
+            frame_template=frame_template or "1080x1920/default.html"
+        )
+        
+        # Create storyboard
+        storyboard = Storyboard(
+            title=final_title,
+            config=config,
+            content_metadata=content_metadata,
+            created_at=datetime.now()
+        )
+        
+        try:
+            # ========== Step 1: Generate/Split narrations ==========
+            if mode == "generate":
+                self._report_progress(progress_callback, "generating_narrations", 0.05)
+                narrations = await generate_narrations_from_topic(
+                    self.llm,
+                    topic=text,
+                    n_scenes=n_scenes,
+                    min_words=min_narration_words,
+                    max_words=max_narration_words
+                )
+                logger.info(f"✅ Generated {len(narrations)} narrations")
+            else:  # fixed
+                self._report_progress(progress_callback, "splitting_script", 0.05)
+                narrations = await split_narration_script(text)
+                logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
+                logger.info(f"   Note: n_scenes={n_scenes} is ignored in fixed mode")
+            
+            # ========== Step 2: Generate image prompts ==========
+            self._report_progress(progress_callback, "generating_image_prompts", 0.15)
+            
+            # Override prompt_prefix if provided
+            original_prefix = None
+            if prompt_prefix is not None:
+                image_config = self.core.config.get("comfyui", {}).get("image", {})
+                original_prefix = image_config.get("prompt_prefix")
+                image_config["prompt_prefix"] = prompt_prefix
+                logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
+            
+            try:
+                # Create progress callback wrapper for image prompt generation
+                def image_prompt_progress(completed: int, total: int, message: str):
+                    batch_progress = completed / total if total > 0 else 0
+                    overall_progress = 0.15 + (batch_progress * 0.15)
+                    self._report_progress(
+                        progress_callback,
+                        "generating_image_prompts",
+                        overall_progress,
+                        extra_info=message
+                    )
+                
+                # Generate base image prompts
+                base_image_prompts = await generate_image_prompts(
+                    self.llm,
+                    narrations=narrations,
+                    min_words=min_image_prompt_words,
+                    max_words=max_image_prompt_words,
+                    progress_callback=image_prompt_progress
+                )
+                
+                # Apply prompt prefix
+                from pixelle_video.utils.prompt_helper import build_image_prompt
+                image_config = self.core.config.get("comfyui", {}).get("image", {})
+                prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
+                
+                image_prompts = []
+                for base_prompt in base_image_prompts:
+                    final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
+                    image_prompts.append(final_prompt)
+                
+            finally:
+                # Restore original prompt_prefix
+                if original_prefix is not None:
+                    image_config["prompt_prefix"] = original_prefix
+            
+            logger.info(f"✅ Generated {len(image_prompts)} image prompts")
+            
+            # ========== Step 3: Create frames ==========
+            for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
+                frame = StoryboardFrame(
+                    index=i,
+                    narration=narration,
+                    image_prompt=image_prompt,
+                    created_at=datetime.now()
+                )
+                storyboard.frames.append(frame)
+            
+            # ========== Step 4: Process each frame ==========
+            for i, frame in enumerate(storyboard.frames):
+                base_progress = 0.2
+                frame_range = 0.6
+                per_frame_progress = frame_range / len(storyboard.frames)
+                
+                # Create frame-specific progress callback
+                def frame_progress_callback(event: ProgressEvent):
+                    overall_progress = base_progress + (per_frame_progress * i) + (per_frame_progress * event.progress)
+                    if progress_callback:
+                        adjusted_event = ProgressEvent(
+                            event_type=event.event_type,
+                            progress=overall_progress,
+                            frame_current=event.frame_current,
+                            frame_total=event.frame_total,
+                            step=event.step,
+                            action=event.action
+                        )
+                        progress_callback(adjusted_event)
+                
+                # Report frame start
+                self._report_progress(
+                    progress_callback,
+                    "processing_frame",
+                    base_progress + (per_frame_progress * i),
+                    frame_current=i+1,
+                    frame_total=len(storyboard.frames)
+                )
+                
+                processed_frame = await self.core.frame_processor(
+                    frame=frame,
+                    storyboard=storyboard,
+                    config=config,
+                    total_frames=len(storyboard.frames),
+                    progress_callback=frame_progress_callback
+                )
+                storyboard.total_duration += processed_frame.duration
+                logger.info(f"✅ Frame {i+1} completed ({processed_frame.duration:.2f}s)")
+            
+            # ========== Step 5: Concatenate videos ==========
+            self._report_progress(progress_callback, "concatenating", 0.85)
+            segment_paths = [frame.video_segment_path for frame in storyboard.frames]
+            
+            from pixelle_video.services.video import VideoService
+            video_service = VideoService()
+            
+            final_video_path = video_service.concat_videos(
+                videos=segment_paths,
+                output=output_path,
+                bgm_path=bgm_path,
+                bgm_volume=bgm_volume,
+                bgm_mode=bgm_mode
+            )
+            
+            storyboard.final_video_path = final_video_path
+            storyboard.completed_at = datetime.now()
+            
+            # Copy to user-specified path if provided
+            if user_specified_output:
+                import shutil
+                Path(user_specified_output).parent.mkdir(parents=True, exist_ok=True)
+                shutil.copy2(final_video_path, user_specified_output)
+                logger.info(f"📹 Final video copied to: {user_specified_output}")
+                final_video_path = user_specified_output
+                storyboard.final_video_path = user_specified_output
+            
+            logger.success(f"🎬 Video generation completed: {final_video_path}")
+            
+            # ========== Step 6: Create result ==========
+            self._report_progress(progress_callback, "completed", 1.0)
+            
+            video_path_obj = Path(final_video_path)
+            file_size = video_path_obj.stat().st_size
+            
+            result = VideoGenerationResult(
+                video_path=final_video_path,
+                storyboard=storyboard,
+                duration=storyboard.total_duration,
+                file_size=file_size
+            )
+            
+            logger.info(f"✅ Generated video: {final_video_path}")
+            logger.info(f"   Duration: {storyboard.total_duration:.2f}s")
+            logger.info(f"   Size: {file_size / (1024*1024):.2f} MB")
+            logger.info(f"   Frames: {len(storyboard.frames)}")
+            
+            return result
+            
+        except Exception as e:
+            logger.error(f"❌ Video generation failed: {e}")
+            raise
+