对分镜视频工作流传递duration参数

This commit is contained in:
puke
2025-11-21 00:32:22 +08:00
parent 9ab53d06dc
commit d8e380bdb5
2 changed files with 27 additions and 8 deletions

View File

@@ -14,6 +14,10 @@
Frame processor - Process single frame through complete pipeline Frame processor - Process single frame through complete pipeline
Orchestrates: TTS → Image Generation → Frame Composition → Video Segment Orchestrates: TTS → Image Generation → Frame Composition → Video Segment
Key Feature:
- TTS-driven video duration: Audio duration from TTS is passed to video generation workflows
to ensure perfect sync between audio and video (no padding, no trimming needed)
""" """
from typing import Callable, Optional from typing import Callable, Optional
@@ -193,14 +197,23 @@ class FrameProcessor:
logger.debug(f" → Media type: {media_type} (workflow: {workflow_name})") logger.debug(f" → Media type: {media_type} (workflow: {workflow_name})")
# Call Media generation (with optional preset) # Build media generation parameters
media_result = await self.core.media( media_params = {
prompt=frame.image_prompt, "prompt": frame.image_prompt,
workflow=config.media_workflow, # Pass workflow from config (None = use default) "workflow": config.media_workflow, # Pass workflow from config (None = use default)
media_type=media_type, "media_type": media_type,
width=config.media_width, "width": config.media_width,
height=config.media_height "height": config.media_height
) }
# For video workflows: pass audio duration as target video duration
# This ensures video length matches audio length from the source
if is_video_workflow and frame.duration:
media_params["duration"] = frame.duration
logger.info(f" → Generating video with target duration: {frame.duration:.2f}s (from TTS audio)")
# Call Media generation
media_result = await self.core.media(**media_params)
# Store media type # Store media type
frame.media_type = media_result.media_type frame.media_type = media_result.media_type

View File

@@ -119,6 +119,7 @@ class MediaService(ComfyBaseService):
# Common workflow parameters # Common workflow parameters
width: Optional[int] = None, width: Optional[int] = None,
height: Optional[int] = None, height: Optional[int] = None,
duration: Optional[float] = None, # Video duration in seconds (for video workflows)
negative_prompt: Optional[str] = None, negative_prompt: Optional[str] = None,
steps: Optional[int] = None, steps: Optional[int] = None,
seed: Optional[int] = None, seed: Optional[int] = None,
@@ -140,6 +141,7 @@ class MediaService(ComfyBaseService):
runninghub_api_key: RunningHub API key (optional, overrides config) runninghub_api_key: RunningHub API key (optional, overrides config)
width: Media width width: Media width
height: Media height height: Media height
duration: Target video duration in seconds (only for video workflows, typically from TTS audio duration)
negative_prompt: Negative prompt negative_prompt: Negative prompt
steps: Sampling steps steps: Sampling steps
seed: Random seed seed: Random seed
@@ -203,6 +205,10 @@ class MediaService(ComfyBaseService):
workflow_params["width"] = width workflow_params["width"] = width
if height is not None: if height is not None:
workflow_params["height"] = height workflow_params["height"] = height
if duration is not None:
workflow_params["duration"] = duration
if media_type == "video":
logger.info(f"📏 Target video duration: {duration:.2f}s (from TTS audio)")
if negative_prompt is not None: if negative_prompt is not None:
workflow_params["negative_prompt"] = negative_prompt workflow_params["negative_prompt"] = negative_prompt
if steps is not None: if steps is not None: