From 7f904f6b191e8b7e36725990ff06832bd783ba16 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Thu, 20 Nov 2025 20:09:43 +0800 Subject: [PATCH 01/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E5=B0=BA=E5=AF=B8=E4=BC=A0=E5=8F=82=E6=9C=AA=E7=94=9F=E6=95=88?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/routers/resources.py | 38 ++++++++++++++++++++--- api/routers/video.py | 32 ++++++++++++++++--- api/schemas/video.py | 6 ++-- pixelle_video/models/storyboard.py | 10 +++--- pixelle_video/pipelines/custom.py | 16 +++++----- pixelle_video/pipelines/standard.py | 28 ++++++++--------- pixelle_video/services/frame_processor.py | 8 ++--- pixelle_video/services/persistence.py | 12 +++---- web/components/output_preview.py | 12 ++++--- web/components/style_config.py | 24 +++++++------- 10 files changed, 123 insertions(+), 63 deletions(-) diff --git a/api/routers/resources.py b/api/routers/resources.py index f247874..3ba14fb 100644 --- a/api/routers/resources.py +++ b/api/routers/resources.py @@ -76,12 +76,12 @@ async def list_tts_workflows(pixelle_video: PixelleVideoDep): raise HTTPException(status_code=500, detail=str(e)) -@router.get("/workflows/image", response_model=WorkflowListResponse) -async def list_image_workflows(pixelle_video: PixelleVideoDep): +@router.get("/workflows/media", response_model=WorkflowListResponse) +async def list_media_workflows(pixelle_video: PixelleVideoDep): """ - List available image generation workflows + List available media workflows (both image and video) - Returns list of image workflows from both RunningHub and self-hosted sources. + Returns list of all media workflows from both RunningHub and self-hosted sources. Example response: ```json @@ -94,13 +94,41 @@ async def list_image_workflows(pixelle_video: PixelleVideoDep): "path": "workflows/runninghub/image_flux.json", "key": "runninghub/image_flux.json", "workflow_id": "123456" + }, + { + "name": "video_wan2.1.json", + "display_name": "video_wan2.1.json - Runninghub", + "source": "runninghub", + "path": "workflows/runninghub/video_wan2.1.json", + "key": "runninghub/video_wan2.1.json", + "workflow_id": "123457" } ] } ``` """ try: - # Get all workflows from media service (image generation is handled by media service) + # Get all workflows from media service (includes both image and video) + all_workflows = pixelle_video.media.list_workflows() + + media_workflows = [WorkflowInfo(**wf) for wf in all_workflows] + + return WorkflowListResponse(workflows=media_workflows) + + except Exception as e: + logger.error(f"List media workflows error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +# Keep old endpoint for backward compatibility +@router.get("/workflows/image", response_model=WorkflowListResponse) +async def list_image_workflows(pixelle_video: PixelleVideoDep): + """ + List available image workflows (deprecated, use /workflows/media instead) + + This endpoint is kept for backward compatibility but will filter to image_ workflows only. + """ + try: all_workflows = pixelle_video.media.list_workflows() # Filter to image workflows only (filename starts with "image_") diff --git a/api/routers/video.py b/api/routers/video.py index 207e3c2..9f09ccf 100644 --- a/api/routers/video.py +++ b/api/routers/video.py @@ -63,6 +63,17 @@ async def generate_video_sync( try: logger.info(f"Sync video generation: {request_body.text[:50]}...") + # Auto-determine media_width and media_height from template meta tags (required) + if not request_body.frame_template: + raise ValueError("frame_template is required to determine media size") + + from pixelle_video.services.frame_html import HTMLFrameGenerator + from pixelle_video.utils.template_util import resolve_template_path + template_path = resolve_template_path(request_body.frame_template) + generator = HTMLFrameGenerator(template_path) + media_width, media_height = generator.get_media_size() + logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}") + # Build video generation parameters video_params = { "text": request_body.text, @@ -73,8 +84,9 @@ async def generate_video_sync( "max_narration_words": request_body.max_narration_words, "min_image_prompt_words": request_body.min_image_prompt_words, "max_image_prompt_words": request_body.max_image_prompt_words, - # Note: image_width and image_height are now auto-determined from template - "image_workflow": request_body.image_workflow, + "media_width": media_width, + "media_height": media_height, + "media_workflow": request_body.media_workflow, "video_fps": request_body.video_fps, "frame_template": request_body.frame_template, "prompt_prefix": request_body.prompt_prefix, @@ -150,6 +162,17 @@ async def generate_video_async( # Define async execution function async def execute_video_generation(): """Execute video generation in background""" + # Auto-determine media_width and media_height from template meta tags (required) + if not request_body.frame_template: + raise ValueError("frame_template is required to determine media size") + + from pixelle_video.services.frame_html import HTMLFrameGenerator + from pixelle_video.utils.template_util import resolve_template_path + template_path = resolve_template_path(request_body.frame_template) + generator = HTMLFrameGenerator(template_path) + media_width, media_height = generator.get_media_size() + logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}") + # Build video generation parameters video_params = { "text": request_body.text, @@ -160,8 +183,9 @@ async def generate_video_async( "max_narration_words": request_body.max_narration_words, "min_image_prompt_words": request_body.min_image_prompt_words, "max_image_prompt_words": request_body.max_image_prompt_words, - # Note: image_width and image_height are now auto-determined from template - "image_workflow": request_body.image_workflow, + "media_width": media_width, + "media_height": media_height, + "media_workflow": request_body.media_workflow, "video_fps": request_body.video_fps, "frame_template": request_body.frame_template, "prompt_prefix": request_body.prompt_prefix, diff --git a/api/schemas/video.py b/api/schemas/video.py index d37dd80..483fd16 100644 --- a/api/schemas/video.py +++ b/api/schemas/video.py @@ -56,9 +56,9 @@ class VideoGenerateRequest(BaseModel): min_image_prompt_words: int = Field(30, ge=10, le=100, description="Min image prompt words") max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words") - # === Image Parameters === - # Note: image_width and image_height are now auto-determined from template meta tags - image_workflow: Optional[str] = Field(None, description="Custom image workflow") + # === Media Parameters === + # Note: media_width and media_height are auto-determined from template meta tags + media_workflow: Optional[str] = Field(None, description="Custom media workflow (image or video)") # === Video Parameters === video_fps: int = Field(30, ge=15, le=60, description="Video FPS") diff --git a/pixelle_video/models/storyboard.py b/pixelle_video/models/storyboard.py index 1204991..750ccb4 100644 --- a/pixelle_video/models/storyboard.py +++ b/pixelle_video/models/storyboard.py @@ -23,6 +23,10 @@ from typing import List, Optional, Dict, Any class StoryboardConfig: """Storyboard configuration parameters""" + # Required parameters (must come first in dataclass) + media_width: int # Media width (image or video, required) + media_height: int # Media height (image or video, required) + # Task isolation task_id: Optional[str] = None # Task ID for file isolation (auto-generated if None) @@ -42,10 +46,8 @@ class StoryboardConfig: tts_speed: Optional[float] = None # TTS speed multiplier (0.5-2.0, 1.0 = normal) ref_audio: Optional[str] = None # Reference audio for voice cloning (ComfyUI mode only) - # Image parameters - image_width: int = 1024 - image_height: int = 1024 - image_workflow: Optional[str] = None # Image workflow filename (None = use default) + # Media workflow + media_workflow: Optional[str] = None # Media workflow filename (image or video, None = use default) # Frame template (includes size information in path) frame_template: str = "1080x1920/default.html" # Template path with size (e.g., "1080x1920/default.html") diff --git a/pixelle_video/pipelines/custom.py b/pixelle_video/pipelines/custom.py index 0030214..749d458 100644 --- a/pixelle_video/pipelines/custom.py +++ b/pixelle_video/pipelines/custom.py @@ -93,8 +93,8 @@ class CustomPipeline(BasePipeline): tts_speed: float = 1.2, ref_audio: Optional[str] = None, - image_workflow: Optional[str] = None, - # Note: image_width and image_height are now auto-determined from template + media_workflow: Optional[str] = None, + # Note: media_width and media_height are auto-determined from template frame_template: Optional[str] = None, video_fps: int = 30, @@ -189,8 +189,8 @@ class CustomPipeline(BasePipeline): # Read media size from template meta tags template_path = resolve_template_path(frame_template) generator = HTMLFrameGenerator(template_path) - image_width, image_height = generator.get_media_size() - logger.info(f"πŸ“ Media size from template: {image_width}x{image_height}") + media_width, media_height = generator.get_media_size() + logger.info(f"πŸ“ Media size from template: {media_width}x{media_height}") if template_type == "image": logger.info(f"πŸ“Έ Template requires image generation") @@ -270,9 +270,9 @@ class CustomPipeline(BasePipeline): tts_workflow=final_tts_workflow, # Use processed workflow tts_speed=tts_speed, ref_audio=ref_audio, - image_width=image_width, - image_height=image_height, - image_workflow=image_workflow, + media_width=media_width, + media_height=media_height, + media_workflow=media_workflow, frame_template=frame_template ) @@ -387,7 +387,7 @@ class CustomPipeline(BasePipeline): "tts_workflow": tts_workflow, "tts_speed": tts_speed, "ref_audio": ref_audio, - "image_workflow": image_workflow, + "media_workflow": media_workflow, "frame_template": frame_template, "bgm_path": bgm_path, "bgm_volume": bgm_volume, diff --git a/pixelle_video/pipelines/standard.py b/pixelle_video/pipelines/standard.py index ee3e0a8..fab9684 100644 --- a/pixelle_video/pipelines/standard.py +++ b/pixelle_video/pipelines/standard.py @@ -68,8 +68,10 @@ class StandardPipeline(BasePipeline): async def __call__( self, - # === Input === + # === Input (Required) === text: str, + media_width: int, # Required: Media width (from template) + media_height: int, # Required: Media height (from template) # === Processing Mode === mode: Literal["generate", "fixed"] = "generate", @@ -95,10 +97,8 @@ class StandardPipeline(BasePipeline): min_image_prompt_words: int = 30, max_image_prompt_words: int = 60, - # === Image Parameters === - image_width: int = 1024, - image_height: int = 1024, - image_workflow: Optional[str] = None, + # === Media Workflow === + media_workflow: Optional[str] = None, # === Video Parameters === video_fps: int = 30, @@ -155,9 +155,9 @@ class StandardPipeline(BasePipeline): min_image_prompt_words: Min image prompt length max_image_prompt_words: Max image prompt length - image_width: Generated image width (default 1024) - image_height: Generated image height (default 1024) - image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default) + media_width: Media width (image or video, required) + media_height: Media height (image or video, required) + media_workflow: Media workflow filename (image or video, e.g., "image_flux.json", "video_wan.json", None = use default) video_fps: Video frame rate (default 30) @@ -254,9 +254,9 @@ class StandardPipeline(BasePipeline): tts_workflow=final_tts_workflow, # Use processed workflow tts_speed=tts_speed, ref_audio=ref_audio, - image_width=image_width, - image_height=image_height, - image_workflow=image_workflow, + media_width=media_width, + media_height=media_height, + media_workflow=media_workflow, frame_template=frame_template or "1080x1920/default.html", template_params=template_params # Custom template parameters ) @@ -374,13 +374,13 @@ class StandardPipeline(BasePipeline): # Enable parallel if either TTS or Image uses RunningHub (most time-consuming parts) is_runninghub = ( (config.tts_workflow and config.tts_workflow.startswith("runninghub/")) or - (config.image_workflow and config.image_workflow.startswith("runninghub/")) + (config.media_workflow and config.media_workflow.startswith("runninghub/")) ) if is_runninghub and RUNNING_HUB_PARALLEL_LIMIT > 1: logger.info(f"πŸš€ Using parallel processing for RunningHub workflows (max {RUNNING_HUB_PARALLEL_LIMIT} concurrent)") logger.info(f" TTS: {'runninghub' if config.tts_workflow and config.tts_workflow.startswith('runninghub/') else 'local'}") - logger.info(f" Image: {'runninghub' if config.image_workflow and config.image_workflow.startswith('runninghub/') else 'local'}") + logger.info(f" Media: {'runninghub' if config.media_workflow and config.media_workflow.startswith('runninghub/') else 'local'}") semaphore = asyncio.Semaphore(RUNNING_HUB_PARALLEL_LIMIT) completed_count = 0 @@ -541,7 +541,7 @@ class StandardPipeline(BasePipeline): "tts_workflow": tts_workflow, "tts_speed": tts_speed, "ref_audio": ref_audio, - "image_workflow": image_workflow, + "media_workflow": media_workflow, "prompt_prefix": prompt_prefix, "frame_template": frame_template, "template_params": template_params, diff --git a/pixelle_video/services/frame_processor.py b/pixelle_video/services/frame_processor.py index 1e5cc71..09bd1da 100644 --- a/pixelle_video/services/frame_processor.py +++ b/pixelle_video/services/frame_processor.py @@ -187,7 +187,7 @@ class FrameProcessor: # Determine media type based on workflow # video_ prefix in workflow name indicates video generation - workflow_name = config.image_workflow or "" + workflow_name = config.media_workflow or "" is_video_workflow = "video_" in workflow_name.lower() media_type = "video" if is_video_workflow else "image" @@ -196,10 +196,10 @@ class FrameProcessor: # Call Media generation (with optional preset) media_result = await self.core.media( prompt=frame.image_prompt, - workflow=config.image_workflow, # Pass workflow from config (None = use default) + workflow=config.media_workflow, # Pass workflow from config (None = use default) media_type=media_type, - width=config.image_width, - height=config.image_height + width=config.media_width, + height=config.media_height ) # Store media type diff --git a/pixelle_video/services/persistence.py b/pixelle_video/services/persistence.py index d739f55..82377d2 100644 --- a/pixelle_video/services/persistence.py +++ b/pixelle_video/services/persistence.py @@ -380,9 +380,9 @@ class PersistenceService: "tts_workflow": config.tts_workflow, "tts_speed": config.tts_speed, "ref_audio": config.ref_audio, - "image_width": config.image_width, - "image_height": config.image_height, - "image_workflow": config.image_workflow, + "media_width": config.media_width, + "media_height": config.media_height, + "media_workflow": config.media_workflow, "frame_template": config.frame_template, "template_params": config.template_params, } @@ -402,9 +402,9 @@ class PersistenceService: tts_workflow=data.get("tts_workflow"), tts_speed=data.get("tts_speed"), ref_audio=data.get("ref_audio"), - image_width=data.get("image_width", 1024), - image_height=data.get("image_height", 1024), - image_workflow=data.get("image_workflow"), + media_width=data.get("media_width", data.get("image_width", 1024)), # Backward compatibility + media_height=data.get("media_height", data.get("image_height", 1024)), # Backward compatibility + media_workflow=data.get("media_workflow", data.get("image_workflow")), # Backward compatibility frame_template=data.get("frame_template", "1080x1920/default.html"), template_params=data.get("template_params"), ) diff --git a/web/components/output_preview.py b/web/components/output_preview.py index 6b97bfe..203f291 100644 --- a/web/components/output_preview.py +++ b/web/components/output_preview.py @@ -58,7 +58,7 @@ def render_single_output(pixelle_video, video_params): frame_template = video_params.get("frame_template") custom_values_for_video = video_params.get("template_params", {}) - workflow_key = video_params.get("image_workflow") + workflow_key = video_params.get("media_workflow") prompt_prefix = video_params.get("prompt_prefix", "") with st.container(border=True): @@ -123,18 +123,20 @@ def render_single_output(pixelle_video, video_params): progress_bar.progress(min(int(event.progress * 100), 99)) # Cap at 99% until complete # Generate video (directly pass parameters) - # Note: image_width and image_height are now auto-determined from template + # Note: media_width and media_height are auto-determined from template video_params = { "text": text, "mode": mode, "title": title if title else None, "n_scenes": n_scenes, - "image_workflow": workflow_key, + "media_workflow": workflow_key, "frame_template": frame_template, "prompt_prefix": prompt_prefix, "bgm_path": bgm_path, "bgm_volume": bgm_volume if bgm_path else 0.2, "progress_callback": update_progress, + "media_width": st.session_state.get('template_media_width'), + "media_height": st.session_state.get('template_media_height'), } # Add TTS parameters based on mode @@ -245,12 +247,14 @@ def render_batch_output(pixelle_video, video_params): shared_config = { "title_prefix": video_params.get("title_prefix"), "n_scenes": video_params.get("n_scenes") or 5, - "image_workflow": video_params.get("image_workflow"), + "media_workflow": video_params.get("media_workflow"), "frame_template": video_params.get("frame_template"), "prompt_prefix": video_params.get("prompt_prefix") or "", "bgm_path": video_params.get("bgm_path"), "bgm_volume": video_params.get("bgm_volume") or 0.2, "tts_inference_mode": video_params.get("tts_inference_mode") or "local", + "media_width": video_params.get("media_width"), + "media_height": video_params.get("media_height"), } # Add TTS parameters based on mode (only add non-None values) diff --git a/web/components/style_config.py b/web/components/style_config.py index 71d7b54..889998a 100644 --- a/web/components/style_config.py +++ b/web/components/style_config.py @@ -610,7 +610,7 @@ def render_style_config(pixelle_video): workflow_options if workflow_options else ["No workflows found"], index=default_workflow_index, label_visibility="collapsed", - key="image_workflow_select" + key="media_workflow_select" ) # Get the actual workflow key (e.g., "runninghub/image_flux.json") @@ -621,14 +621,14 @@ def render_style_config(pixelle_video): workflow_key = "runninghub/image_flux.json" # fallback # Get media size from template - image_width = st.session_state.get('template_media_width', 1024) - image_height = st.session_state.get('template_media_height', 1024) + media_width = st.session_state.get('template_media_width') + media_height = st.session_state.get('template_media_height') # Display media size info (read-only) if template_media_type == "video": - size_info_text = tr('style.video_size_info', width=image_width, height=image_height) + size_info_text = tr('style.video_size_info', width=media_width, height=media_height) else: - size_info_text = tr('style.image_size_info', width=image_width, height=image_height) + size_info_text = tr('style.image_size_info', width=media_width, height=media_height) st.info(f"πŸ“ {size_info_text}") # Prompt prefix input @@ -679,8 +679,8 @@ def render_style_config(pixelle_video): prompt=final_prompt, workflow=workflow_key, media_type=template_media_type, - width=int(image_width), - height=int(image_height) + width=int(media_width), + height=int(media_height) )) preview_media_path = media_result.url @@ -725,8 +725,8 @@ def render_style_config(pixelle_video): st.caption(tr("image.not_required_hint")) # Get media size from template (even though not used, for consistency) - image_width = st.session_state.get('template_media_width', 1024) - image_height = st.session_state.get('template_media_height', 1024) + media_width = st.session_state.get('template_media_width') + media_height = st.session_state.get('template_media_height') # Set default values for later use workflow_key = None @@ -741,6 +741,8 @@ def render_style_config(pixelle_video): "ref_audio": str(ref_audio_path) if ref_audio_path else None, "frame_template": frame_template, "template_params": custom_values_for_video if custom_values_for_video else None, - "image_workflow": workflow_key, - "prompt_prefix": prompt_prefix if prompt_prefix else "" + "media_workflow": workflow_key, + "prompt_prefix": prompt_prefix if prompt_prefix else "", + "media_width": media_width, + "media_height": media_height } From 9ab53d06dcf3579683516077bedf3cbe39b6e452 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Thu, 20 Nov 2025 21:26:07 +0800 Subject: [PATCH 02/12] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5a0a2c7..872d27e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pixelle-video" -version = "0.1.6" +version = "0.1.7" description = "AI-powered video creation platform - Part of Pixelle ecosystem" authors = [ {name = "Pixelle.AI"} diff --git a/uv.lock b/uv.lock index 8cee9a5..96e02c8 100644 --- a/uv.lock +++ b/uv.lock @@ -1664,7 +1664,7 @@ wheels = [ [[package]] name = "pixelle-video" -version = "0.1.6" +version = "0.1.7" source = { editable = "." } dependencies = [ { name = "beautifulsoup4" }, From d8e380bdb5724570ff241a3291c1e59443e021b7 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Fri, 21 Nov 2025 00:32:22 +0800 Subject: [PATCH 03/12] =?UTF-8?q?=E5=AF=B9=E5=88=86=E9=95=9C=E8=A7=86?= =?UTF-8?q?=E9=A2=91=E5=B7=A5=E4=BD=9C=E6=B5=81=E4=BC=A0=E9=80=92duration?= =?UTF-8?q?=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pixelle_video/services/frame_processor.py | 29 ++++++++++++++++------- pixelle_video/services/media.py | 6 +++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/pixelle_video/services/frame_processor.py b/pixelle_video/services/frame_processor.py index 09bd1da..f7b64ae 100644 --- a/pixelle_video/services/frame_processor.py +++ b/pixelle_video/services/frame_processor.py @@ -14,6 +14,10 @@ Frame processor - Process single frame through complete pipeline Orchestrates: TTS β†’ Image Generation β†’ Frame Composition β†’ Video Segment + +Key Feature: +- TTS-driven video duration: Audio duration from TTS is passed to video generation workflows + to ensure perfect sync between audio and video (no padding, no trimming needed) """ from typing import Callable, Optional @@ -193,14 +197,23 @@ class FrameProcessor: logger.debug(f" β†’ Media type: {media_type} (workflow: {workflow_name})") - # Call Media generation (with optional preset) - media_result = await self.core.media( - prompt=frame.image_prompt, - workflow=config.media_workflow, # Pass workflow from config (None = use default) - media_type=media_type, - width=config.media_width, - height=config.media_height - ) + # Build media generation parameters + media_params = { + "prompt": frame.image_prompt, + "workflow": config.media_workflow, # Pass workflow from config (None = use default) + "media_type": media_type, + "width": config.media_width, + "height": config.media_height + } + + # For video workflows: pass audio duration as target video duration + # This ensures video length matches audio length from the source + if is_video_workflow and frame.duration: + media_params["duration"] = frame.duration + logger.info(f" β†’ Generating video with target duration: {frame.duration:.2f}s (from TTS audio)") + + # Call Media generation + media_result = await self.core.media(**media_params) # Store media type frame.media_type = media_result.media_type diff --git a/pixelle_video/services/media.py b/pixelle_video/services/media.py index 75d9e33..d894339 100644 --- a/pixelle_video/services/media.py +++ b/pixelle_video/services/media.py @@ -119,6 +119,7 @@ class MediaService(ComfyBaseService): # Common workflow parameters width: Optional[int] = None, height: Optional[int] = None, + duration: Optional[float] = None, # Video duration in seconds (for video workflows) negative_prompt: Optional[str] = None, steps: Optional[int] = None, seed: Optional[int] = None, @@ -140,6 +141,7 @@ class MediaService(ComfyBaseService): runninghub_api_key: RunningHub API key (optional, overrides config) width: Media width height: Media height + duration: Target video duration in seconds (only for video workflows, typically from TTS audio duration) negative_prompt: Negative prompt steps: Sampling steps seed: Random seed @@ -203,6 +205,10 @@ class MediaService(ComfyBaseService): workflow_params["width"] = width if height is not None: workflow_params["height"] = height + if duration is not None: + workflow_params["duration"] = duration + if media_type == "video": + logger.info(f"πŸ“ Target video duration: {duration:.2f}s (from TTS audio)") if negative_prompt is not None: workflow_params["negative_prompt"] = negative_prompt if steps is not None: From 02ef878e3bb2c11c2e2d36416068b54552a3e47c Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Fri, 21 Nov 2025 00:56:24 +0800 Subject: [PATCH 04/12] =?UTF-8?q?=E4=BC=98=E5=8C=96=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E9=9F=B3=E9=A2=91=E5=90=88=E5=B9=B6=E9=80=BB=E8=BE=91=EF=BC=8C?= =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=99=BA=E8=83=BD=E6=97=B6=E9=95=BF=E8=B0=83?= =?UTF-8?q?=E6=95=B4=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pixelle_video/services/video.py | 177 ++++++++++++++++++++++++++++++-- 1 file changed, 170 insertions(+), 7 deletions(-) diff --git a/pixelle_video/services/video.py b/pixelle_video/services/video.py index 5cbe31c..fa2a9f2 100644 --- a/pixelle_video/services/video.py +++ b/pixelle_video/services/video.py @@ -27,6 +27,7 @@ Note: Requires FFmpeg to be installed on the system. import os import shutil import tempfile +import uuid from pathlib import Path from typing import List, Literal, Optional @@ -316,12 +317,16 @@ class VideoService: audio_volume: float = 1.0, video_volume: float = 0.0, pad_strategy: str = "freeze", # "freeze" (freeze last frame) or "black" (black screen) + auto_adjust_duration: bool = True, # Automatically adjust video duration to match audio + duration_tolerance: float = 0.3, # Tolerance for video being longer than audio (seconds) ) -> str: """ - Merge audio with video, using the longer duration + Merge audio with video with intelligent duration adjustment - The output video duration will be the maximum of video and audio duration. - If audio is longer than video, the video will be padded using the specified strategy. + Automatically handles duration mismatches between video and audio: + - If video < audio: Pad video to match audio (avoid black screen) + - If video > audio (within tolerance): Keep as-is (acceptable) + - If video > audio (exceeds tolerance): Trim video to match audio Automatically handles videos with or without audio streams. - If video has no audio: adds the audio track @@ -339,6 +344,9 @@ class VideoService: pad_strategy: Strategy to pad video if audio is longer - "freeze": Freeze last frame (default) - "black": Fill with black screen + auto_adjust_duration: Enable intelligent duration adjustment (default: True) + duration_tolerance: Tolerance for video being longer than audio in seconds (default: 0.3) + Videos within this tolerance won't be trimmed Returns: Path to the output video file @@ -361,6 +369,28 @@ class VideoService: logger.info(f"Video duration: {video_duration:.2f}s, Audio duration: {audio_duration:.2f}s") + # Intelligent duration adjustment (if enabled) + if auto_adjust_duration: + diff = video_duration - audio_duration + + if diff < 0: + # Video shorter than audio β†’ Must pad to avoid black screen + logger.warning(f"⚠️ Video shorter than audio by {abs(diff):.2f}s, padding required") + video = self._pad_video_to_duration(video, audio_duration, pad_strategy) + video_duration = audio_duration # Update duration after padding + logger.info(f"πŸ“Œ Padded video to {audio_duration:.2f}s") + + elif diff > duration_tolerance: + # Video significantly longer than audio β†’ Trim + logger.info(f"⚠️ Video longer than audio by {diff:.2f}s (tolerance: {duration_tolerance}s)") + video = self._trim_video_to_duration(video, audio_duration) + video_duration = audio_duration # Update duration after trimming + logger.info(f"βœ‚οΈ Trimmed video to {audio_duration:.2f}s") + + else: # 0 <= diff <= duration_tolerance + # Video slightly longer but within tolerance β†’ Keep as-is + logger.info(f"βœ… Duration acceptable: video={video_duration:.2f}s, audio={audio_duration:.2f}s (diff={diff:.2f}s)") + # Determine target duration (max of both) target_duration = max(video_duration, audio_duration) logger.info(f"Target output duration: {target_duration:.2f}s") @@ -382,9 +412,6 @@ class VideoService: video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration) else: # black # Generate black frames for padding duration - from pixelle_video.utils.os_util import get_temp_path - import os - # Get video properties probe = ffmpeg.probe(video) video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video') @@ -395,7 +422,7 @@ class VideoService: fps = fps_num / fps_den if fps_den != 0 else 30 # Create black video for padding - black_video_path = get_temp_path(f"black_pad_{os.path.basename(output)}") + black_video_path = self._get_unique_temp_path("black_pad", os.path.basename(output)) black_input = ffmpeg.input( f'color=c=black:s={width}x{height}:r={fps}', f='lavfi', @@ -778,6 +805,26 @@ class VideoService: fade_in=0.0 ) + def _get_unique_temp_path(self, prefix: str, original_filename: str) -> str: + """ + Generate unique temporary file path to avoid concurrent conflicts + + Args: + prefix: Prefix for the temp file (e.g., "trimmed", "padded", "black_pad") + original_filename: Original filename to preserve in temp path + + Returns: + Unique temporary file path with format: temp/{prefix}_{uuid}_{original_filename} + + Example: + >>> self._get_unique_temp_path("trimmed", "video.mp4") + >>> # Returns: "temp/trimmed_a3f2d8c1_video.mp4" + """ + from pixelle_video.utils.os_util import get_temp_path + + unique_id = uuid.uuid4().hex[:8] + return get_temp_path(f"{prefix}_{unique_id}_{original_filename}") + def _resolve_bgm_path(self, bgm_path: str) -> str: """ Resolve BGM path (filename or custom path) with custom override support @@ -841,4 +888,120 @@ class VideoService: except Exception as e: logger.warning(f"Failed to list BGM files: {e}") return [] + + def _trim_video_to_duration(self, video: str, target_duration: float) -> str: + """ + Trim video to specified duration + + Args: + video: Input video file path + target_duration: Target duration in seconds + + Returns: + Path to trimmed video (temp file) + + Raises: + RuntimeError: If FFmpeg execution fails + """ + output = self._get_unique_temp_path("trimmed", os.path.basename(video)) + + try: + # Use stream copy when possible for fast trimming + ( + ffmpeg + .input(video, t=target_duration) + .output(output, vcodec='copy', acodec='copy' if self.has_audio_stream(video) else 'copy') + .overwrite_output() + .run(capture_stdout=True, capture_stderr=True, quiet=True) + ) + return output + except ffmpeg.Error as e: + error_msg = e.stderr.decode() if e.stderr else str(e) + logger.error(f"FFmpeg error trimming video: {error_msg}") + raise RuntimeError(f"Failed to trim video: {error_msg}") + + def _pad_video_to_duration(self, video: str, target_duration: float, pad_strategy: str = "freeze") -> str: + """ + Pad video to specified duration by extending the last frame or adding black frames + + Args: + video: Input video file path + target_duration: Target duration in seconds + pad_strategy: Padding strategy - "freeze" (freeze last frame) or "black" (black screen) + + Returns: + Path to padded video (temp file) + + Raises: + RuntimeError: If FFmpeg execution fails + """ + output = self._get_unique_temp_path("padded", os.path.basename(video)) + + video_duration = self._get_video_duration(video) + pad_duration = target_duration - video_duration + + if pad_duration <= 0: + # No padding needed, return original + return video + + try: + input_video = ffmpeg.input(video) + video_stream = input_video.video + + if pad_strategy == "freeze": + # Freeze last frame using tpad filter + video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration) + + # Output with re-encoding (tpad requires it) + ( + ffmpeg + .output( + video_stream, + output, + vcodec='libx264', + preset='fast', + crf=23 + ) + .overwrite_output() + .run(capture_stdout=True, capture_stderr=True, quiet=True) + ) + else: # black + # Generate black frames for padding duration + # Get video properties + probe = ffmpeg.probe(video) + video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video') + width = int(video_info['width']) + height = int(video_info['height']) + fps_str = video_info['r_frame_rate'] + fps_num, fps_den = map(int, fps_str.split('/')) + fps = fps_num / fps_den if fps_den != 0 else 30 + + # Create black video for padding + black_input = ffmpeg.input( + f'color=c=black:s={width}x{height}:r={fps}', + f='lavfi', + t=pad_duration + ) + + # Concatenate original video with black padding + video_stream = ffmpeg.concat(video_stream, black_input.video, v=1, a=0) + + ( + ffmpeg + .output( + video_stream, + output, + vcodec='libx264', + preset='fast', + crf=23 + ) + .overwrite_output() + .run(capture_stdout=True, capture_stderr=True, quiet=True) + ) + + return output + except ffmpeg.Error as e: + error_msg = e.stderr.decode() if e.stderr else str(e) + logger.error(f"FFmpeg error padding video: {error_msg}") + raise RuntimeError(f"Failed to pad video: {error_msg}") From 3d4aea3b11549d49cffbdf0e5eb86028091b89ff Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Fri, 21 Nov 2025 01:06:11 +0800 Subject: [PATCH 05/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=89=B9=E9=87=8F?= =?UTF-8?q?=E7=94=9F=E6=88=90=E5=90=8E=E6=9F=A5=E7=9C=8B=E5=8E=86=E5=8F=B2?= =?UTF-8?q?=E6=8C=89=E9=92=AE=E8=B7=B3=E8=BD=AC=E5=A4=B1=E6=95=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web/components/output_preview.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/web/components/output_preview.py b/web/components/output_preview.py index 203f291..6b0b8cc 100644 --- a/web/components/output_preview.py +++ b/web/components/output_preview.py @@ -372,13 +372,28 @@ def render_batch_output(pixelle_video, video_params): st.success(tr("batch.success_message")) st.info(tr("batch.view_in_history")) - # Button to go to History page - if st.button( - f"πŸ“š {tr('batch.goto_history')}", - type="secondary", - use_container_width=True - ): - st.switch_page("pages/2_πŸ“š_History.py") + # Button to go to History page using JavaScript URL navigation + st.markdown( + f""" + + + + """, + unsafe_allow_html=True + ) # Show failed tasks if any if batch_result["errors"]: From d018e24fd32d33fbde5f85df8ab37f09e78c15b6 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Fri, 21 Nov 2025 01:08:40 +0800 Subject: [PATCH 06/12] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=9B=BD=E5=86=85docke?= =?UTF-8?q?r=E9=83=A8=E7=BD=B2=E7=9A=84=E9=95=9C=E5=83=8F=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index b7ec2f3..da4785c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,16 +46,14 @@ RUN uv --version COPY pyproject.toml uv.lock README.md ./ COPY pixelle_video ./pixelle_video -# Install Python dependencies using uv with configurable index URL -# Create uv.toml config file to force using the mirror (most reliable method) -# Only create config when USE_CN_MIRROR=true, otherwise use default PyPI -RUN if [ "$USE_CN_MIRROR" = "true" ]; then \ - echo '[[index]]' > uv.toml && \ - echo 'url = "https://pypi.tuna.tsinghua.edu.cn/simple"' >> uv.toml && \ - echo 'default = true' >> uv.toml; \ - fi && \ - export UV_HTTP_TIMEOUT=300 && \ - uv sync --frozen --no-dev +# Install Python dependencies using uv pip install +# Use -i flag to specify mirror when USE_CN_MIRROR=true +RUN export UV_HTTP_TIMEOUT=300 && \ + if [ "$USE_CN_MIRROR" = "true" ]; then \ + uv pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \ + else \ + uv pip install -e .; \ + fi # Copy rest of application code COPY api ./api From 66d8061d4e3b349a8f688cbfc32fca2b5bcea5ad Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Fri, 21 Nov 2025 01:10:23 +0800 Subject: [PATCH 07/12] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=9B=BD=E5=86=85docke?= =?UTF-8?q?r=E9=83=A8=E7=BD=B2=E7=9A=84=E9=95=9C=E5=83=8F=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index da4785c..6d4c470 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,12 +47,12 @@ COPY pyproject.toml uv.lock README.md ./ COPY pixelle_video ./pixelle_video # Install Python dependencies using uv pip install -# Use -i flag to specify mirror when USE_CN_MIRROR=true +# Use --system flag for Docker environment, -i flag to specify mirror when USE_CN_MIRROR=true RUN export UV_HTTP_TIMEOUT=300 && \ if [ "$USE_CN_MIRROR" = "true" ]; then \ - uv pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \ + uv pip install --system -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \ else \ - uv pip install -e .; \ + uv pip install --system -e .; \ fi # Copy rest of application code From a29b8556c8dc71e42e84d40f00aa4cc13474c667 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Fri, 21 Nov 2025 01:22:07 +0800 Subject: [PATCH 08/12] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=9B=BD=E5=86=85docke?= =?UTF-8?q?r=E9=83=A8=E7=BD=B2=E7=9A=84=E9=95=9C=E5=83=8F=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6d4c470..fd9ed7c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,13 +46,14 @@ RUN uv --version COPY pyproject.toml uv.lock README.md ./ COPY pixelle_video ./pixelle_video -# Install Python dependencies using uv pip install -# Use --system flag for Docker environment, -i flag to specify mirror when USE_CN_MIRROR=true +# Create virtual environment and install dependencies +# Use -i flag to specify mirror when USE_CN_MIRROR=true RUN export UV_HTTP_TIMEOUT=300 && \ + uv venv && \ if [ "$USE_CN_MIRROR" = "true" ]; then \ - uv pip install --system -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \ + uv pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \ else \ - uv pip install --system -e .; \ + uv pip install -e .; \ fi # Copy rest of application code From bbaa153b9e611d2c9ddad3c0d082a05d53f84116 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Fri, 21 Nov 2025 01:28:08 +0800 Subject: [PATCH 09/12] =?UTF-8?q?Docker=E6=B7=BB=E5=8A=A0temp=E7=9B=AE?= =?UTF-8?q?=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index fd9ed7c..976d0fa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,8 +64,8 @@ COPY templates ./templates COPY workflows ./workflows COPY resources ./resources -# Create output and data directories -RUN mkdir -p /app/output /app/data +# Create output, data and temp directories +RUN mkdir -p /app/output /app/data /app/temp # Set environment variables for html2image to use chromium ENV BROWSER_EXECUTABLE_PATH=/usr/bin/chromium From 8310183405b8177bbf568b3fa7f09ec7fd9cffbe Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Fri, 21 Nov 2025 10:47:02 +0800 Subject: [PATCH 10/12] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=9C=AC=E5=9C=B0Comfy?= =?UTF-8?q?UI=E7=9A=84index-tts2=E5=B7=A5=E4=BD=9C=E6=B5=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- workflows/selfhost/tts_index2.json | 64 ++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 workflows/selfhost/tts_index2.json diff --git a/workflows/selfhost/tts_index2.json b/workflows/selfhost/tts_index2.json new file mode 100644 index 0000000..fa72053 --- /dev/null +++ b/workflows/selfhost/tts_index2.json @@ -0,0 +1,64 @@ +{ + "3": { + "inputs": { + "text": "εΊŠε‰ζ˜Žζœˆε…‰οΌŒη–‘ζ˜―εœ°δΈŠιœœγ€‚" + }, + "class_type": "Text _O", + "_meta": { + "title": "$text.text!" + } + }, + "5": { + "inputs": { + "text": [ + "3", + 0 + ], + "mode": "Auto", + "do_sample_mode": "on", + "temperature": 0.8, + "top_p": 0.9, + "top_k": 30, + "num_beams": 3, + "repetition_penalty": 10, + "length_penalty": 0, + "max_mel_tokens": 1815, + "max_tokens_per_sentence": 120, + "seed": 4266796044, + "reference_audio": [ + "12", + 0 + ] + }, + "class_type": "IndexTTS2BaseNode", + "_meta": { + "title": "Index TTS 2 - Base" + } + }, + "8": { + "inputs": { + "filename_prefix": "audio/ComfyUI", + "quality": "V0", + "audioUI": "", + "audio": [ + "5", + 0 + ] + }, + "class_type": "SaveAudioMP3", + "_meta": { + "title": "Save Audio (MP3)" + } + }, + "12": { + "inputs": { + "audio": "小裴钱.wav", + "start_time": 0, + "duration": 0 + }, + "class_type": "VHS_LoadAudioUpload", + "_meta": { + "title": "$ref_audio.audio" + } + } +} \ No newline at end of file From 910d0cdf99ec6fbc910cc199fae97e3070a6c124 Mon Sep 17 00:00:00 2001 From: xianshi-yyds <150135158+xianshi-yyds@users.noreply.github.com> Date: Fri, 21 Nov 2025 10:51:49 +0800 Subject: [PATCH 11/12] add wan2.2 workflow --- workflows/runninghub/video_wan2.2.json | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 workflows/runninghub/video_wan2.2.json diff --git a/workflows/runninghub/video_wan2.2.json b/workflows/runninghub/video_wan2.2.json new file mode 100644 index 0000000..6aff063 --- /dev/null +++ b/workflows/runninghub/video_wan2.2.json @@ -0,0 +1,4 @@ +{ + "source": "runninghub", + "workflow_id": "1991693844100100097" +} \ No newline at end of file From a9e12d539b3ce45e96be520999b32958d4c7cc6c Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Fri, 21 Nov 2025 11:22:06 +0800 Subject: [PATCH 12/12] =?UTF-8?q?=E7=A1=AE=E4=BF=9D=E4=B8=B4=E6=97=B6?= =?UTF-8?q?=E3=80=81=E6=95=B0=E6=8D=AE=E5=92=8C=E8=BE=93=E5=87=BA=E7=9B=AE?= =?UTF-8?q?=E5=BD=95=E5=9C=A8=E8=BF=94=E5=9B=9E=E8=B7=AF=E5=BE=84=E4=B9=8B?= =?UTF-8?q?=E5=89=8D=E5=AD=98=E5=9C=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pixelle_video/utils/os_util.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pixelle_video/utils/os_util.py b/pixelle_video/utils/os_util.py index 12c26dc..bbec0ce 100644 --- a/pixelle_video/utils/os_util.py +++ b/pixelle_video/utils/os_util.py @@ -83,6 +83,8 @@ def get_temp_path(*paths: str) -> str: """ Get path relative to Pixelle-Video temp folder + Ensures temp directory exists before returning path. + Args: *paths: Path components to join @@ -94,6 +96,10 @@ def get_temp_path(*paths: str) -> str: # Returns: "/path/to/project/temp/audio.mp3" """ temp_path = get_root_path("temp") + + # Ensure temp directory exists + os.makedirs(temp_path, exist_ok=True) + if paths: return os.path.join(temp_path, *paths) return temp_path @@ -102,6 +108,8 @@ def get_temp_path(*paths: str) -> str: def get_data_path(*paths: str) -> str: """ Get path relative to Pixelle-Video data folder + + Ensures data directory exists before returning path. Args: *paths: Path components to join @@ -114,6 +122,10 @@ def get_data_path(*paths: str) -> str: # Returns: "/path/to/project/data/videos/output.mp4" """ data_path = get_root_path("data") + + # Ensure data directory exists + os.makedirs(data_path, exist_ok=True) + if paths: return os.path.join(data_path, *paths) return data_path @@ -122,6 +134,8 @@ def get_data_path(*paths: str) -> str: def get_output_path(*paths: str) -> str: """ Get path relative to Pixelle-Video output folder + + Ensures output directory exists before returning path. Args: *paths: Path components to join @@ -134,6 +148,10 @@ def get_output_path(*paths: str) -> str: # Returns: "/path/to/project/output/video.mp4" """ output_path = get_root_path("output") + + # Ensure output directory exists + os.makedirs(output_path, exist_ok=True) + if paths: return os.path.join(output_path, *paths) return output_path