对分镜视频工作流传递duration参数

2025-11-21 00:32:22 +08:00
parent 9ab53d06dc
commit d8e380bdb5
2 changed files with 27 additions and 8 deletions
--- a/pixelle_video/services/frame_processor.py
+++ b/pixelle_video/services/frame_processor.py
@@ -14,6 +14,10 @@
 Frame processor - Process single frame through complete pipeline

 Orchestrates: TTS → Image Generation → Frame Composition → Video Segment
+
+Key Feature:
+- TTS-driven video duration: Audio duration from TTS is passed to video generation workflows
+  to ensure perfect sync between audio and video (no padding, no trimming needed)
 """

 from typing import Callable, Optional
@@ -193,14 +197,23 @@ class FrameProcessor:
        
        logger.debug(f"  → Media type: {media_type} (workflow: {workflow_name})")
        
-        # Call Media generation (with optional preset)
-        media_result = await self.core.media(
-            prompt=frame.image_prompt,
-            workflow=config.media_workflow,  # Pass workflow from config (None = use default)
-            media_type=media_type,
-            width=config.media_width,
-            height=config.media_height
-        )
+        # Build media generation parameters
+        media_params = {
+            "prompt": frame.image_prompt,
+            "workflow": config.media_workflow,  # Pass workflow from config (None = use default)
+            "media_type": media_type,
+            "width": config.media_width,
+            "height": config.media_height
+        }
+        
+        # For video workflows: pass audio duration as target video duration
+        # This ensures video length matches audio length from the source
+        if is_video_workflow and frame.duration:
+            media_params["duration"] = frame.duration
+            logger.info(f"  → Generating video with target duration: {frame.duration:.2f}s (from TTS audio)")
+        
+        # Call Media generation
+        media_result = await self.core.media(**media_params)
        
        # Store media type
        frame.media_type = media_result.media_type
--- a/pixelle_video/services/media.py
+++ b/pixelle_video/services/media.py
@@ -119,6 +119,7 @@ class MediaService(ComfyBaseService):
        # Common workflow parameters
        width: Optional[int] = None,
        height: Optional[int] = None,
+        duration: Optional[float] = None,  # Video duration in seconds (for video workflows)
        negative_prompt: Optional[str] = None,
        steps: Optional[int] = None,
        seed: Optional[int] = None,
@@ -140,6 +141,7 @@ class MediaService(ComfyBaseService):
            runninghub_api_key: RunningHub API key (optional, overrides config)
            width: Media width
            height: Media height
+            duration: Target video duration in seconds (only for video workflows, typically from TTS audio duration)
            negative_prompt: Negative prompt
            steps: Sampling steps
            seed: Random seed
@@ -203,6 +205,10 @@ class MediaService(ComfyBaseService):
            workflow_params["width"] = width
        if height is not None:
            workflow_params["height"] = height
+        if duration is not None:
+            workflow_params["duration"] = duration
+            if media_type == "video":
+                logger.info(f"📏 Target video duration: {duration:.2f}s (from TTS audio)")
        if negative_prompt is not None:
            workflow_params["negative_prompt"] = negative_prompt
        if steps is not None: