分镜支持视频功能

2025-11-11 20:38:31 +08:00
parent cf9321feac
commit 0e2b6b17d0
17 changed files with 1225 additions and 321 deletions
--- a/pixelle_video/pipelines/base.py
+++ b/pixelle_video/pipelines/base.py
@@ -63,8 +63,11 @@ class BasePipeline(ABC):
        # Quick access to services (convenience)
        self.llm = pixelle_video_core.llm
        self.tts = pixelle_video_core.tts
-        self.image = pixelle_video_core.image
+        self.media = pixelle_video_core.media
        self.video = pixelle_video_core.video
+        
+        # Backward compatibility alias
+        self.image = pixelle_video_core.media
    
    @abstractmethod
    async def __call__(
--- a/pixelle_video/pipelines/standard.py
+++ b/pixelle_video/pipelines/standard.py
@@ -269,11 +269,13 @@ class StandardPipeline(BasePipeline):
        )
        
        # ========== Step 0.8: Check template requirements ==========
-        template_requires_image = self._check_template_requires_image(config.frame_template)
-        if template_requires_image:
+        template_media_type = self._check_template_media_type(config.frame_template)
+        if template_media_type == "video":
+            logger.info(f"🎬 Template requires video generation")
+        elif template_media_type == "image":
            logger.info(f"📸 Template requires image generation")
-        else:
-            logger.info(f"⚡ Template does not require images - skipping image generation pipeline")
+        else:  # text
+            logger.info(f"⚡ Template does not require media - skipping media generation pipeline")
            logger.info(f"   💡 Benefits: Faster generation + Lower cost + No ComfyUI dependency")
        
        try:
@@ -294,8 +296,61 @@ class StandardPipeline(BasePipeline):
                logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
                logger.info(f"   Note: n_scenes={n_scenes} is ignored in fixed mode")
            
-            # ========== Step 2: Generate image prompts (conditional) ==========
-            if template_requires_image:
+            # ========== Step 2: Generate media prompts (conditional) ==========
+            if template_media_type == "video":
+                # Video template: generate video prompts
+                self._report_progress(progress_callback, "generating_video_prompts", 0.15)
+                
+                from pixelle_video.utils.content_generators import generate_video_prompts
+                
+                # Override prompt_prefix if provided
+                original_prefix = None
+                if prompt_prefix is not None:
+                    image_config = self.core.config.get("comfyui", {}).get("image", {})
+                    original_prefix = image_config.get("prompt_prefix")
+                    image_config["prompt_prefix"] = prompt_prefix
+                    logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
+                
+                try:
+                    # Create progress callback wrapper for video prompt generation
+                    def video_prompt_progress(completed: int, total: int, message: str):
+                        batch_progress = completed / total if total > 0 else 0
+                        overall_progress = 0.15 + (batch_progress * 0.15)
+                        self._report_progress(
+                            progress_callback,
+                            "generating_video_prompts",
+                            overall_progress,
+                            extra_info=message
+                        )
+                    
+                    # Generate base video prompts
+                    base_image_prompts = await generate_video_prompts(
+                        self.llm,
+                        narrations=narrations,
+                        min_words=min_image_prompt_words,
+                        max_words=max_image_prompt_words,
+                        progress_callback=video_prompt_progress
+                    )
+                    
+                    # Apply prompt prefix
+                    from pixelle_video.utils.prompt_helper import build_image_prompt
+                    image_config = self.core.config.get("comfyui", {}).get("image", {})
+                    prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
+                    
+                    image_prompts = []
+                    for base_prompt in base_image_prompts:
+                        final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
+                        image_prompts.append(final_prompt)
+                    
+                finally:
+                    # Restore original prompt_prefix
+                    if original_prefix is not None:
+                        image_config["prompt_prefix"] = original_prefix
+                
+                logger.info(f"✅ Generated {len(image_prompts)} video prompts")
+            
+            elif template_media_type == "image":
+                # Image template: generate image prompts
                self._report_progress(progress_callback, "generating_image_prompts", 0.15)
                
                # Override prompt_prefix if provided
@@ -343,12 +398,13 @@ class StandardPipeline(BasePipeline):
                        image_config["prompt_prefix"] = original_prefix
                
                logger.info(f"✅ Generated {len(image_prompts)} image prompts")
-            else:
-                # Skip image prompt generation
+            
+            else:  # text
+                # Text-only template: skip media prompt generation
                image_prompts = [None] * len(narrations)
                self._report_progress(progress_callback, "preparing_frames", 0.15)
-                logger.info(f"⚡ Skipped image prompt generation (template doesn't need images)")
-                logger.info(f"   💡 Savings: {len(narrations)} LLM calls + {len(narrations)} image generations")
+                logger.info(f"⚡ Skipped media prompt generation (text-only template)")
+                logger.info(f"   💡 Savings: {len(narrations)} LLM calls + {len(narrations)} media generations")
            
            # ========== Step 3: Create frames ==========
            for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
@@ -452,29 +508,44 @@ class StandardPipeline(BasePipeline):
            logger.error(f"❌ Video generation failed: {e}")
            raise
    
-    def _check_template_requires_image(self, frame_template: str) -> bool:
+    def _check_template_media_type(self, frame_template: str) -> str:
        """
-        Check if template requires image generation
+        Check template media type requirement
        
        This is checked at pipeline level to avoid unnecessary:
-        - LLM calls (generating image_prompts)
-        - Image generation API calls
+        - LLM calls (generating media prompts)
+        - Media generation API calls
        - ComfyUI dependency
        
+        Template naming rules:
+        - video_*.html: Video template (returns "video")
+        - Other templates with {{image}}: Image template (returns "image")
+        - Other templates without {{image}}: Text-only template (returns "text")
+        
        Args:
-            frame_template: Template path (e.g., "1080x1920/default.html")
+            frame_template: Template path (e.g., "1080x1920/default.html" or "1080x1920/video_default.html")
        
        Returns:
-            True if template contains {{image}}, False otherwise
+            "video", "image", or "text"
        """
        from pixelle_video.services.frame_html import HTMLFrameGenerator
        from pixelle_video.utils.template_util import resolve_template_path
        
+        # Check if template name starts with video_
+        template_name = Path(frame_template).name
+        if template_name.startswith("video_"):
+            logger.debug(f"Template '{frame_template}' is video template (video_ prefix)")
+            return "video"
+        
+        # Check if template contains {{image}}
        template_path = resolve_template_path(frame_template)
        generator = HTMLFrameGenerator(template_path)
        
-        requires = generator.requires_image()
-        logger.debug(f"Template '{frame_template}' requires_image={requires}")
-        
-        return requires
+        requires_image = generator.requires_image()
+        if requires_image:
+            logger.debug(f"Template '{frame_template}' is image template (has {{image}})")
+            return "image"
+        else:
+            logger.debug(f"Template '{frame_template}' is text-only template")
+            return "text"