From d8e380bdb5724570ff241a3291c1e59443e021b7 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Fri, 21 Nov 2025 00:32:22 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AF=B9=E5=88=86=E9=95=9C=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E5=B7=A5=E4=BD=9C=E6=B5=81=E4=BC=A0=E9=80=92duration=E5=8F=82?= =?UTF-8?q?=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pixelle_video/services/frame_processor.py | 29 ++++++++++++++++------- pixelle_video/services/media.py | 6 +++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/pixelle_video/services/frame_processor.py b/pixelle_video/services/frame_processor.py index 09bd1da..f7b64ae 100644 --- a/pixelle_video/services/frame_processor.py +++ b/pixelle_video/services/frame_processor.py @@ -14,6 +14,10 @@ Frame processor - Process single frame through complete pipeline Orchestrates: TTS → Image Generation → Frame Composition → Video Segment + +Key Feature: +- TTS-driven video duration: Audio duration from TTS is passed to video generation workflows + to ensure perfect sync between audio and video (no padding, no trimming needed) """ from typing import Callable, Optional @@ -193,14 +197,23 @@ class FrameProcessor: logger.debug(f" → Media type: {media_type} (workflow: {workflow_name})") - # Call Media generation (with optional preset) - media_result = await self.core.media( - prompt=frame.image_prompt, - workflow=config.media_workflow, # Pass workflow from config (None = use default) - media_type=media_type, - width=config.media_width, - height=config.media_height - ) + # Build media generation parameters + media_params = { + "prompt": frame.image_prompt, + "workflow": config.media_workflow, # Pass workflow from config (None = use default) + "media_type": media_type, + "width": config.media_width, + "height": config.media_height + } + + # For video workflows: pass audio duration as target video duration + # This ensures video length matches audio length from the source + if is_video_workflow and frame.duration: + media_params["duration"] = frame.duration + logger.info(f" → Generating video with target duration: {frame.duration:.2f}s (from TTS audio)") + + # Call Media generation + media_result = await self.core.media(**media_params) # Store media type frame.media_type = media_result.media_type diff --git a/pixelle_video/services/media.py b/pixelle_video/services/media.py index 75d9e33..d894339 100644 --- a/pixelle_video/services/media.py +++ b/pixelle_video/services/media.py @@ -119,6 +119,7 @@ class MediaService(ComfyBaseService): # Common workflow parameters width: Optional[int] = None, height: Optional[int] = None, + duration: Optional[float] = None, # Video duration in seconds (for video workflows) negative_prompt: Optional[str] = None, steps: Optional[int] = None, seed: Optional[int] = None, @@ -140,6 +141,7 @@ class MediaService(ComfyBaseService): runninghub_api_key: RunningHub API key (optional, overrides config) width: Media width height: Media height + duration: Target video duration in seconds (only for video workflows, typically from TTS audio duration) negative_prompt: Negative prompt steps: Sampling steps seed: Random seed @@ -203,6 +205,10 @@ class MediaService(ComfyBaseService): workflow_params["width"] = width if height is not None: workflow_params["height"] = height + if duration is not None: + workflow_params["duration"] = duration + if media_type == "video": + logger.info(f"📏 Target video duration: {duration:.2f}s (from TTS audio)") if negative_prompt is not None: workflow_params["negative_prompt"] = negative_prompt if steps is not None: