支持视频生成时插图没必填,大幅提升视频生成速度

2025-11-07 14:09:32 +08:00
parent 514dbfaa1b
commit 8d5c578958
11 changed files with 674 additions and 330 deletions
--- a/pixelle_video/pipelines/custom.py
+++ b/pixelle_video/pipelines/custom.py
@@ -32,10 +32,24 @@ class CustomPipeline(BasePipeline):
    You can customize:
    - Content processing logic
    - Narration generation strategy
-    - Image prompt generation
+    - Image prompt generation (conditional based on template)
    - Frame composition
    - Video assembly
    
+    KEY OPTIMIZATION: Conditional Image Generation
+    -----------------------------------------------
+    This pipeline supports automatic detection of template image requirements.
+    If your template doesn't use {{image}}, the entire image generation pipeline
+    can be skipped, providing:
+      ⚡ Faster generation (no image API calls)
+      💰 Lower cost (no LLM calls for image prompts)
+      🚀 Reduced dependencies (no ComfyUI needed for text-only videos)
+    
+    Usage patterns:
+      1. Text-only videos: Use templates/1080x1920/simple.html
+      2. AI-generated images: Use templates with {{image}} placeholder
+      3. Custom logic: Modify template or override the detection logic in your subclass
+    
    Example usage:
        # 1. Create your own pipeline by copying this file
        # 2. Modify the __call__ method with your custom logic
@@ -90,6 +104,11 @@ class CustomPipeline(BasePipeline):
        
        Returns:
            VideoGenerationResult
+        
+        Image Generation Logic:
+            - If template has {{image}} → automatically generates images
+            - If template has no {{image}} → skips image generation (faster, cheaper)
+            - To customize: Override the template_requires_image logic in your subclass
        """
        logger.info("Starting CustomPipeline")
        logger.info(f"Input text length: {len(text)} chars")
@@ -114,6 +133,22 @@ class CustomPipeline(BasePipeline):
            user_specified_output = output_path
            output_path = get_task_final_video_path(task_id)
        
+        # ========== Step 0.5: Check template requirements ==========
+        # Detect if template requires {{image}} parameter
+        # This allows skipping the entire image generation pipeline for text-only templates
+        from pixelle_video.services.frame_html import HTMLFrameGenerator
+        from pixelle_video.utils.template_util import resolve_template_path
+        
+        template_path = resolve_template_path(frame_template)
+        generator = HTMLFrameGenerator(template_path)
+        template_requires_image = generator.requires_image()
+        
+        if template_requires_image:
+            logger.info(f"📸 Template requires image generation")
+        else:
+            logger.info(f"⚡ Template does not require images - skipping image generation pipeline")
+            logger.info(f"   💡 Benefits: Faster generation + Lower cost + No ComfyUI dependency")
+        
        # ========== Step 1: Process content (CUSTOMIZE THIS) ==========
        self._report_progress(progress_callback, "processing_content", 0.10)
        
@@ -138,29 +173,37 @@ class CustomPipeline(BasePipeline):
        
        logger.info(f"Generated {len(narrations)} narrations")
        
-        # ========== Step 2: Generate image prompts (CUSTOMIZE THIS) ==========
+        # ========== Step 2: Generate image prompts (CONDITIONAL - CUSTOMIZE THIS) ==========
        self._report_progress(progress_callback, "generating_image_prompts", 0.25)
        
-        # Example: Generate image prompts using LLM
-        from pixelle_video.utils.content_generators import generate_image_prompts
-        
-        image_prompts = await generate_image_prompts(
-            self.llm,
-            narrations=narrations,
-            min_words=30,
-            max_words=60
-        )
-        
-        # Example: Apply custom prompt prefix
-        from pixelle_video.utils.prompt_helper import build_image_prompt
-        custom_prefix = "cinematic style, professional lighting"  # Customize this
-        
-        final_image_prompts = []
-        for base_prompt in image_prompts:
-            final_prompt = build_image_prompt(base_prompt, custom_prefix)
-            final_image_prompts.append(final_prompt)
-        
-        logger.info(f"Generated {len(final_image_prompts)} image prompts")
+        # IMPORTANT: Check if template actually needs images
+        # If your template doesn't use {{image}}, you can skip this entire step!
+        if template_requires_image:
+            # Template requires images - generate image prompts using LLM
+            from pixelle_video.utils.content_generators import generate_image_prompts
+            
+            image_prompts = await generate_image_prompts(
+                self.llm,
+                narrations=narrations,
+                min_words=30,
+                max_words=60
+            )
+            
+            # Example: Apply custom prompt prefix
+            from pixelle_video.utils.prompt_helper import build_image_prompt
+            custom_prefix = "cinematic style, professional lighting"  # Customize this
+            
+            final_image_prompts = []
+            for base_prompt in image_prompts:
+                final_prompt = build_image_prompt(base_prompt, custom_prefix)
+                final_image_prompts.append(final_prompt)
+            
+            logger.info(f"✅ Generated {len(final_image_prompts)} image prompts")
+        else:
+            # Template doesn't need images - skip image generation entirely
+            final_image_prompts = [None] * len(narrations)
+            logger.info(f"⚡ Skipped image prompt generation (template doesn't need images)")
+            logger.info(f"   💡 Savings: {len(narrations)} LLM calls + {len(narrations)} image generations")
        
        # ========== Step 3: Create storyboard ==========
        config = StoryboardConfig(
@@ -317,8 +360,8 @@ class CustomPipeline(BasePipeline):
 # ==================== Usage Examples ====================

 """
-Example 1: Register and use custom pipeline
----------------------------------------
+Example 1: Text-only video (no AI image generation)
+---------------------------------------------------
 from pixelle_video import pixelle_video
 from pixelle_video.pipelines.custom import CustomPipeline

@@ -328,15 +371,27 @@ await pixelle_video.initialize()
 # Register custom pipeline
 pixelle_video.pipelines["my_custom"] = CustomPipeline(pixelle_video)

-# Use it
+# Use text-only template - no image generation!
 result = await pixelle_video.generate_video(
-    text="Your input content here",
+    text="Your content here",
    pipeline="my_custom",
-    custom_param_example="custom_value"
+    frame_template="1080x1920/simple.html"  # Template without {{image}}
 )
+# Benefits: ⚡ Fast, 💰 Cheap, 🚀 No ComfyUI needed


-Example 2: Create your own pipeline class
+Example 2: AI-generated image video
+---------------------------------------------------
+# Use template with {{image}} - automatic image generation
+result = await pixelle_video.generate_video(
+    text="Your content here",
+    pipeline="my_custom",
+    frame_template="1080x1920/default.html"  # Template with {{image}}
+)
+# Will automatically generate images via LLM + ComfyUI
+
+
+Example 3: Create your own pipeline class
 ----------------------------------------
 from pixelle_video.pipelines.custom import CustomPipeline

@@ -351,7 +406,7 @@ class MySpecialPipeline(CustomPipeline):
        return result


-Example 3: Inline custom pipeline
+Example 4: Inline custom pipeline
 ----------------------------------------
 from pixelle_video.pipelines.base import BasePipeline

--- a/pixelle_video/pipelines/standard.py
+++ b/pixelle_video/pipelines/standard.py
@@ -250,6 +250,14 @@ class StandardPipeline(BasePipeline):
            created_at=datetime.now()
        )
        
+        # ========== Step 0.8: Check template requirements ==========
+        template_requires_image = self._check_template_requires_image(config.frame_template)
+        if template_requires_image:
+            logger.info(f"📸 Template requires image generation")
+        else:
+            logger.info(f"⚡ Template does not require images - skipping image generation pipeline")
+            logger.info(f"   💡 Benefits: Faster generation + Lower cost + No ComfyUI dependency")
+        
        try:
            # ========== Step 1: Generate/Split narrations ==========
            if mode == "generate":
@@ -268,54 +276,61 @@ class StandardPipeline(BasePipeline):
                logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
                logger.info(f"   Note: n_scenes={n_scenes} is ignored in fixed mode")
            
-            # ========== Step 2: Generate image prompts ==========
-            self._report_progress(progress_callback, "generating_image_prompts", 0.15)
-            
-            # Override prompt_prefix if provided
-            original_prefix = None
-            if prompt_prefix is not None:
-                image_config = self.core.config.get("comfyui", {}).get("image", {})
-                original_prefix = image_config.get("prompt_prefix")
-                image_config["prompt_prefix"] = prompt_prefix
-                logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
-            
-            try:
-                # Create progress callback wrapper for image prompt generation
-                def image_prompt_progress(completed: int, total: int, message: str):
-                    batch_progress = completed / total if total > 0 else 0
-                    overall_progress = 0.15 + (batch_progress * 0.15)
-                    self._report_progress(
-                        progress_callback,
-                        "generating_image_prompts",
-                        overall_progress,
-                        extra_info=message
+            # ========== Step 2: Generate image prompts (conditional) ==========
+            if template_requires_image:
+                self._report_progress(progress_callback, "generating_image_prompts", 0.15)
+                
+                # Override prompt_prefix if provided
+                original_prefix = None
+                if prompt_prefix is not None:
+                    image_config = self.core.config.get("comfyui", {}).get("image", {})
+                    original_prefix = image_config.get("prompt_prefix")
+                    image_config["prompt_prefix"] = prompt_prefix
+                    logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
+                
+                try:
+                    # Create progress callback wrapper for image prompt generation
+                    def image_prompt_progress(completed: int, total: int, message: str):
+                        batch_progress = completed / total if total > 0 else 0
+                        overall_progress = 0.15 + (batch_progress * 0.15)
+                        self._report_progress(
+                            progress_callback,
+                            "generating_image_prompts",
+                            overall_progress,
+                            extra_info=message
+                        )
+                    
+                    # Generate base image prompts
+                    base_image_prompts = await generate_image_prompts(
+                        self.llm,
+                        narrations=narrations,
+                        min_words=min_image_prompt_words,
+                        max_words=max_image_prompt_words,
+                        progress_callback=image_prompt_progress
                    )
+                    
+                    # Apply prompt prefix
+                    from pixelle_video.utils.prompt_helper import build_image_prompt
+                    image_config = self.core.config.get("comfyui", {}).get("image", {})
+                    prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
+                    
+                    image_prompts = []
+                    for base_prompt in base_image_prompts:
+                        final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
+                        image_prompts.append(final_prompt)
+                    
+                finally:
+                    # Restore original prompt_prefix
+                    if original_prefix is not None:
+                        image_config["prompt_prefix"] = original_prefix
                
-                # Generate base image prompts
-                base_image_prompts = await generate_image_prompts(
-                    self.llm,
-                    narrations=narrations,
-                    min_words=min_image_prompt_words,
-                    max_words=max_image_prompt_words,
-                    progress_callback=image_prompt_progress
-                )
-                
-                # Apply prompt prefix
-                from pixelle_video.utils.prompt_helper import build_image_prompt
-                image_config = self.core.config.get("comfyui", {}).get("image", {})
-                prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
-                
-                image_prompts = []
-                for base_prompt in base_image_prompts:
-                    final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
-                    image_prompts.append(final_prompt)
-                
-            finally:
-                # Restore original prompt_prefix
-                if original_prefix is not None:
-                    image_config["prompt_prefix"] = original_prefix
-            
-            logger.info(f"✅ Generated {len(image_prompts)} image prompts")
+                logger.info(f"✅ Generated {len(image_prompts)} image prompts")
+            else:
+                # Skip image prompt generation
+                image_prompts = [None] * len(narrations)
+                self._report_progress(progress_callback, "preparing_frames", 0.15)
+                logger.info(f"⚡ Skipped image prompt generation (template doesn't need images)")
+                logger.info(f"   💡 Savings: {len(narrations)} LLM calls + {len(narrations)} image generations")
            
            # ========== Step 3: Create frames ==========
            for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
@@ -418,4 +433,30 @@ class StandardPipeline(BasePipeline):
        except Exception as e:
            logger.error(f"❌ Video generation failed: {e}")
            raise
+    
+    def _check_template_requires_image(self, frame_template: str) -> bool:
+        """
+        Check if template requires image generation
+        
+        This is checked at pipeline level to avoid unnecessary:
+        - LLM calls (generating image_prompts)
+        - Image generation API calls
+        - ComfyUI dependency
+        
+        Args:
+            frame_template: Template path (e.g., "1080x1920/default.html")
+        
+        Returns:
+            True if template contains {{image}}, False otherwise
+        """
+        from pixelle_video.services.frame_html import HTMLFrameGenerator
+        from pixelle_video.utils.template_util import resolve_template_path
+        
+        template_path = resolve_template_path(frame_template)
+        generator = HTMLFrameGenerator(template_path)
+        
+        requires = generator.requires_image()
+        logger.debug(f"Template '{frame_template}' requires_image={requires}")
+        
+        return requires

--- a/pixelle_video/prompts/README.md
+++ b/pixelle_video/prompts/README.md
@@ -1,99 +0,0 @@
-# Prompts Directory
-
-Centralized prompt management for all LLM interactions in Pixelle-Video.
-
-## Structure
-
-Each prompt is in its own file for easy maintenance and modification:
-
-```
-prompts/
-├── __init__.py                  # Exports all builder functions
-├── topic_narration.py           # Generate narrations from topic
-├── content_narration.py         # Extract/refine narrations from content
-├── script_split.py              # Split fixed script into segments
-├── title_generation.py          # Generate video title from content
-├── image_generation.py          # Generate image prompts from narrations
-└── style_conversion.py          # Convert style description to image prompt
-```
-
-## Usage
-
-All builder functions are exported from the package root:
-
-```python
-from pixelle_video.prompts import (
-    build_topic_narration_prompt,
-    build_content_narration_prompt,
-    build_script_split_prompt,
-    build_title_generation_prompt,
-    build_image_prompt_prompt,
-    build_style_conversion_prompt,
-)
-```
-
-## Prompt Files
-
-### Narration Prompts
-
-1. **topic_narration.py**
-   - Purpose: Generate engaging narrations from a topic/theme
-   - Input: topic, n_storyboard, min_words, max_words
-   - Output: JSON with narrations array
-
-2. **content_narration.py**
-   - Purpose: Extract and refine narrations from user content
-   - Input: content, n_storyboard, min_words, max_words
-   - Output: JSON with narrations array
-
-3. **script_split.py**
-   - Purpose: Split fixed script into natural segments (no modification)
-   - Input: script, min_words (reference), max_words (reference)
-   - Output: JSON with narrations array
-
-4. **title_generation.py**
-   - Purpose: Generate short, attractive video title
-   - Input: content, max_length
-   - Output: Plain text title
-
-### Image Prompts
-
-5. **image_generation.py**
-   - Purpose: Generate English image prompts from narrations
-   - Input: narrations, min_words, max_words, style_preset/style_description
-   - Output: JSON with image_prompts array
-   - Contains: IMAGE_STYLE_PRESETS dictionary
-
-6. **style_conversion.py**
-   - Purpose: Convert custom style description to English image prompt
-   - Input: description (any language)
-   - Output: Plain text English image prompt
-
-## Modifying Prompts
-
-To modify a prompt:
-
-1. Locate the relevant file (e.g., `topic_narration.py`)
-2. Edit the prompt constant (e.g., `TOPIC_NARRATION_PROMPT`)
-3. Changes take effect immediately (no need to modify service code)
-
-## Adding New Prompts
-
-To add a new prompt:
-
-1. Create a new file (e.g., `my_new_prompt.py`)
-2. Define the prompt constant and builder function
-3. Export the builder function in `__init__.py`
-4. Use it in service code:
-   ```python
-   from pixelle_video.prompts import build_my_new_prompt
-   ```
-
-## Design Principles
-
- **One File, One Prompt**: Each prompt has its own file for clarity
- **Builder Functions**: Each file exports a `build_*_prompt()` function
- **Centralized Exports**: All builders are exported from `__init__.py`
- **Consistent Format**: All prompts follow similar structure and style
- **Easy Maintenance**: Modify prompts without touching service code
-
--- a/pixelle_video/services/frame_html.py
+++ b/pixelle_video/services/frame_html.py
@@ -57,6 +57,22 @@ class HTMLFrameGenerator:
        self._check_linux_dependencies()
        logger.debug(f"Loaded HTML template: {template_path} (size: {self.width}x{self.height})")
    
+    def requires_image(self) -> bool:
+        """
+        Detect if template requires {{image}} parameter
+        
+        This method checks if the template uses the {{image}} variable.
+        If the template doesn't use images, the entire image generation
+        pipeline can be skipped, significantly improving:
+        - Generation speed (no image generation API calls)
+        - Cost efficiency (no LLM calls for image prompts)
+        - Dependency requirements (no ComfyUI needed)
+        
+        Returns:
+            True if template contains {{image}}, False otherwise
+        """
+        return '{{image}}' in self.template
+    
    def _check_linux_dependencies(self):
        """Check Linux system dependencies and warn if missing"""
        if os.name != 'posix':
@@ -403,7 +419,7 @@ class HTMLFrameGenerator:
        
        # Replace variables in HTML (supports DSL syntax: {{param:type=default}})
        html = self._replace_parameters(self.template, context)
-        logger.info(f"html--->{html}")
+        logger.debug(f"html--->{html}")
        # Use provided output path or auto-generate
        if output_path is None:
            # Fallback: auto-generate (for backward compatibility)
--- a/pixelle_video/services/frame_processor.py
+++ b/pixelle_video/services/frame_processor.py
@@ -56,6 +56,9 @@ class FrameProcessor:
        
        frame_num = frame.index + 1
        
+        # Determine if this frame needs image generation
+        needs_image = frame.image_prompt is not None
+        
        try:
            # Step 1: Generate audio (TTS)
            if progress_callback:
@@ -69,23 +72,27 @@ class FrameProcessor:
                ))
            await self._step_generate_audio(frame, config)
            
-            # Step 2: Generate image (ComfyKit)
-            if progress_callback:
-                progress_callback(ProgressEvent(
-                    event_type="frame_step",
-                    progress=0.25,
-                    frame_current=frame_num,
-                    frame_total=total_frames,
-                    step=2,
-                    action="image"
-                ))
-            await self._step_generate_image(frame, config)
+            # Step 2: Generate image (conditional)
+            if needs_image:
+                if progress_callback:
+                    progress_callback(ProgressEvent(
+                        event_type="frame_step",
+                        progress=0.25,
+                        frame_current=frame_num,
+                        frame_total=total_frames,
+                        step=2,
+                        action="image"
+                    ))
+                await self._step_generate_image(frame, config)
+            else:
+                frame.image_path = None
+                logger.debug(f"  2/4: Skipped image generation (not required by template)")
            
            # Step 3: Compose frame (add subtitle)
            if progress_callback:
                progress_callback(ProgressEvent(
                    event_type="frame_step",
-                    progress=0.50,
+                    progress=0.50 if needs_image else 0.33,
                    frame_current=frame_num,
                    frame_total=total_frames,
                    step=3,
@@ -97,7 +104,7 @@ class FrameProcessor:
            if progress_callback:
                progress_callback(ProgressEvent(
                    event_type="frame_step",
-                    progress=0.75,
+                    progress=0.75 if needs_image else 0.67,
                    frame_current=frame_num,
                    frame_total=total_frames,
                    step=4,