From 7f904f6b191e8b7e36725990ff06832bd783ba16 Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Thu, 20 Nov 2025 20:09:43 +0800
Subject: [PATCH 01/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=A7=86=E9=A2=91?=
 =?UTF-8?q?=E5=B0=BA=E5=AF=B8=E4=BC=A0=E5=8F=82=E6=9C=AA=E7=94=9F=E6=95=88?=
 =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api/routers/resources.py                  | 38 ++++++++++++++++++++---
 api/routers/video.py                      | 32 ++++++++++++++++---
 api/schemas/video.py                      |  6 ++--
 pixelle_video/models/storyboard.py        | 10 +++---
 pixelle_video/pipelines/custom.py         | 16 +++++-----
 pixelle_video/pipelines/standard.py       | 28 ++++++++---------
 pixelle_video/services/frame_processor.py |  8 ++---
 pixelle_video/services/persistence.py     | 12 +++----
 web/components/output_preview.py          | 12 ++++---
 web/components/style_config.py            | 24 +++++++-------
 10 files changed, 123 insertions(+), 63 deletions(-)

diff --git a/api/routers/resources.py b/api/routers/resources.py
index f247874..3ba14fb 100644
--- a/api/routers/resources.py
+++ b/api/routers/resources.py
@@ -76,12 +76,12 @@ async def list_tts_workflows(pixelle_video: PixelleVideoDep):
         raise HTTPException(status_code=500, detail=str(e))
 
 
-@router.get("/workflows/image", response_model=WorkflowListResponse)
-async def list_image_workflows(pixelle_video: PixelleVideoDep):
+@router.get("/workflows/media", response_model=WorkflowListResponse)
+async def list_media_workflows(pixelle_video: PixelleVideoDep):
     """
-    List available image generation workflows
+    List available media workflows (both image and video)
     
-    Returns list of image workflows from both RunningHub and self-hosted sources.
+    Returns list of all media workflows from both RunningHub and self-hosted sources.
     
     Example response:
     ```json
@@ -94,13 +94,41 @@ async def list_image_workflows(pixelle_video: PixelleVideoDep):
                 "path": "workflows/runninghub/image_flux.json",
                 "key": "runninghub/image_flux.json",
                 "workflow_id": "123456"
+            },
+            {
+                "name": "video_wan2.1.json",
+                "display_name": "video_wan2.1.json - Runninghub",
+                "source": "runninghub",
+                "path": "workflows/runninghub/video_wan2.1.json",
+                "key": "runninghub/video_wan2.1.json",
+                "workflow_id": "123457"
             }
         ]
     }
     ```
     """
     try:
-        # Get all workflows from media service (image generation is handled by media service)
+        # Get all workflows from media service (includes both image and video)
+        all_workflows = pixelle_video.media.list_workflows()
+        
+        media_workflows = [WorkflowInfo(**wf) for wf in all_workflows]
+        
+        return WorkflowListResponse(workflows=media_workflows)
+        
+    except Exception as e:
+        logger.error(f"List media workflows error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# Keep old endpoint for backward compatibility
+@router.get("/workflows/image", response_model=WorkflowListResponse)
+async def list_image_workflows(pixelle_video: PixelleVideoDep):
+    """
+    List available image workflows (deprecated, use /workflows/media instead)
+    
+    This endpoint is kept for backward compatibility but will filter to image_ workflows only.
+    """
+    try:
         all_workflows = pixelle_video.media.list_workflows()
         
         # Filter to image workflows only (filename starts with "image_")
diff --git a/api/routers/video.py b/api/routers/video.py
index 207e3c2..9f09ccf 100644
--- a/api/routers/video.py
+++ b/api/routers/video.py
@@ -63,6 +63,17 @@ async def generate_video_sync(
     try:
         logger.info(f"Sync video generation: {request_body.text[:50]}...")
         
+        # Auto-determine media_width and media_height from template meta tags (required)
+        if not request_body.frame_template:
+            raise ValueError("frame_template is required to determine media size")
+        
+        from pixelle_video.services.frame_html import HTMLFrameGenerator
+        from pixelle_video.utils.template_util import resolve_template_path
+        template_path = resolve_template_path(request_body.frame_template)
+        generator = HTMLFrameGenerator(template_path)
+        media_width, media_height = generator.get_media_size()
+        logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}")
+        
         # Build video generation parameters
         video_params = {
             "text": request_body.text,
@@ -73,8 +84,9 @@ async def generate_video_sync(
             "max_narration_words": request_body.max_narration_words,
             "min_image_prompt_words": request_body.min_image_prompt_words,
             "max_image_prompt_words": request_body.max_image_prompt_words,
-            # Note: image_width and image_height are now auto-determined from template
-            "image_workflow": request_body.image_workflow,
+            "media_width": media_width,
+            "media_height": media_height,
+            "media_workflow": request_body.media_workflow,
             "video_fps": request_body.video_fps,
             "frame_template": request_body.frame_template,
             "prompt_prefix": request_body.prompt_prefix,
@@ -150,6 +162,17 @@ async def generate_video_async(
         # Define async execution function
         async def execute_video_generation():
             """Execute video generation in background"""
+            # Auto-determine media_width and media_height from template meta tags (required)
+            if not request_body.frame_template:
+                raise ValueError("frame_template is required to determine media size")
+            
+            from pixelle_video.services.frame_html import HTMLFrameGenerator
+            from pixelle_video.utils.template_util import resolve_template_path
+            template_path = resolve_template_path(request_body.frame_template)
+            generator = HTMLFrameGenerator(template_path)
+            media_width, media_height = generator.get_media_size()
+            logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}")
+            
             # Build video generation parameters
             video_params = {
                 "text": request_body.text,
@@ -160,8 +183,9 @@ async def generate_video_async(
                 "max_narration_words": request_body.max_narration_words,
                 "min_image_prompt_words": request_body.min_image_prompt_words,
                 "max_image_prompt_words": request_body.max_image_prompt_words,
-                # Note: image_width and image_height are now auto-determined from template
-                "image_workflow": request_body.image_workflow,
+                "media_width": media_width,
+                "media_height": media_height,
+                "media_workflow": request_body.media_workflow,
                 "video_fps": request_body.video_fps,
                 "frame_template": request_body.frame_template,
                 "prompt_prefix": request_body.prompt_prefix,
diff --git a/api/schemas/video.py b/api/schemas/video.py
index d37dd80..483fd16 100644
--- a/api/schemas/video.py
+++ b/api/schemas/video.py
@@ -56,9 +56,9 @@ class VideoGenerateRequest(BaseModel):
     min_image_prompt_words: int = Field(30, ge=10, le=100, description="Min image prompt words")
     max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words")
     
-    # === Image Parameters ===
-    # Note: image_width and image_height are now auto-determined from template meta tags
-    image_workflow: Optional[str] = Field(None, description="Custom image workflow")
+    # === Media Parameters ===
+    # Note: media_width and media_height are auto-determined from template meta tags
+    media_workflow: Optional[str] = Field(None, description="Custom media workflow (image or video)")
     
     # === Video Parameters ===
     video_fps: int = Field(30, ge=15, le=60, description="Video FPS")
diff --git a/pixelle_video/models/storyboard.py b/pixelle_video/models/storyboard.py
index 1204991..750ccb4 100644
--- a/pixelle_video/models/storyboard.py
+++ b/pixelle_video/models/storyboard.py
@@ -23,6 +23,10 @@ from typing import List, Optional, Dict, Any
 class StoryboardConfig:
     """Storyboard configuration parameters"""
     
+    # Required parameters (must come first in dataclass)
+    media_width: int                           # Media width (image or video, required)
+    media_height: int                          # Media height (image or video, required)
+    
     # Task isolation
     task_id: Optional[str] = None              # Task ID for file isolation (auto-generated if None)
     
@@ -42,10 +46,8 @@ class StoryboardConfig:
     tts_speed: Optional[float] = None          # TTS speed multiplier (0.5-2.0, 1.0 = normal)
     ref_audio: Optional[str] = None            # Reference audio for voice cloning (ComfyUI mode only)
     
-    # Image parameters
-    image_width: int = 1024
-    image_height: int = 1024
-    image_workflow: Optional[str] = None       # Image workflow filename (None = use default)
+    # Media workflow
+    media_workflow: Optional[str] = None       # Media workflow filename (image or video, None = use default)
     
     # Frame template (includes size information in path)
     frame_template: str = "1080x1920/default.html"  # Template path with size (e.g., "1080x1920/default.html")
diff --git a/pixelle_video/pipelines/custom.py b/pixelle_video/pipelines/custom.py
index 0030214..749d458 100644
--- a/pixelle_video/pipelines/custom.py
+++ b/pixelle_video/pipelines/custom.py
@@ -93,8 +93,8 @@ class CustomPipeline(BasePipeline):
         tts_speed: float = 1.2,
         ref_audio: Optional[str] = None,
         
-        image_workflow: Optional[str] = None,
-        # Note: image_width and image_height are now auto-determined from template
+        media_workflow: Optional[str] = None,
+        # Note: media_width and media_height are auto-determined from template
         
         frame_template: Optional[str] = None,
         video_fps: int = 30,
@@ -189,8 +189,8 @@ class CustomPipeline(BasePipeline):
         # Read media size from template meta tags
         template_path = resolve_template_path(frame_template)
         generator = HTMLFrameGenerator(template_path)
-        image_width, image_height = generator.get_media_size()
-        logger.info(f"📐 Media size from template: {image_width}x{image_height}")
+        media_width, media_height = generator.get_media_size()
+        logger.info(f"📐 Media size from template: {media_width}x{media_height}")
         
         if template_type == "image":
             logger.info(f"📸 Template requires image generation")
@@ -270,9 +270,9 @@ class CustomPipeline(BasePipeline):
             tts_workflow=final_tts_workflow,  # Use processed workflow
             tts_speed=tts_speed,
             ref_audio=ref_audio,
-            image_width=image_width,
-            image_height=image_height,
-            image_workflow=image_workflow,
+            media_width=media_width,
+            media_height=media_height,
+            media_workflow=media_workflow,
             frame_template=frame_template
         )
         
@@ -387,7 +387,7 @@ class CustomPipeline(BasePipeline):
                     "tts_workflow": tts_workflow,
                     "tts_speed": tts_speed,
                     "ref_audio": ref_audio,
-                    "image_workflow": image_workflow,
+                    "media_workflow": media_workflow,
                     "frame_template": frame_template,
                     "bgm_path": bgm_path,
                     "bgm_volume": bgm_volume,
diff --git a/pixelle_video/pipelines/standard.py b/pixelle_video/pipelines/standard.py
index ee3e0a8..fab9684 100644
--- a/pixelle_video/pipelines/standard.py
+++ b/pixelle_video/pipelines/standard.py
@@ -68,8 +68,10 @@ class StandardPipeline(BasePipeline):
     
     async def __call__(
         self,
-        # === Input ===
+        # === Input (Required) ===
         text: str,
+        media_width: int,  # Required: Media width (from template)
+        media_height: int,  # Required: Media height (from template)
         
         # === Processing Mode ===
         mode: Literal["generate", "fixed"] = "generate",
@@ -95,10 +97,8 @@ class StandardPipeline(BasePipeline):
         min_image_prompt_words: int = 30,
         max_image_prompt_words: int = 60,
         
-        # === Image Parameters ===
-        image_width: int = 1024,
-        image_height: int = 1024,
-        image_workflow: Optional[str] = None,
+        # === Media Workflow ===
+        media_workflow: Optional[str] = None,
         
         # === Video Parameters ===
         video_fps: int = 30,
@@ -155,9 +155,9 @@ class StandardPipeline(BasePipeline):
             min_image_prompt_words: Min image prompt length
             max_image_prompt_words: Max image prompt length
             
-            image_width: Generated image width (default 1024)
-            image_height: Generated image height (default 1024)
-            image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)
+            media_width: Media width (image or video, required)
+            media_height: Media height (image or video, required)
+            media_workflow: Media workflow filename (image or video, e.g., "image_flux.json", "video_wan.json", None = use default)
             
             video_fps: Video frame rate (default 30)
             
@@ -254,9 +254,9 @@ class StandardPipeline(BasePipeline):
             tts_workflow=final_tts_workflow,  # Use processed workflow
             tts_speed=tts_speed,
             ref_audio=ref_audio,
-            image_width=image_width,
-            image_height=image_height,
-            image_workflow=image_workflow,
+            media_width=media_width,
+            media_height=media_height,
+            media_workflow=media_workflow,
             frame_template=frame_template or "1080x1920/default.html",
             template_params=template_params  # Custom template parameters
         )
@@ -374,13 +374,13 @@ class StandardPipeline(BasePipeline):
             # Enable parallel if either TTS or Image uses RunningHub (most time-consuming parts)
             is_runninghub = (
                 (config.tts_workflow and config.tts_workflow.startswith("runninghub/")) or
-                (config.image_workflow and config.image_workflow.startswith("runninghub/"))
+                (config.media_workflow and config.media_workflow.startswith("runninghub/"))
             )
             
             if is_runninghub and RUNNING_HUB_PARALLEL_LIMIT > 1:
                 logger.info(f"🚀 Using parallel processing for RunningHub workflows (max {RUNNING_HUB_PARALLEL_LIMIT} concurrent)")
                 logger.info(f"   TTS: {'runninghub' if config.tts_workflow and config.tts_workflow.startswith('runninghub/') else 'local'}")
-                logger.info(f"   Image: {'runninghub' if config.image_workflow and config.image_workflow.startswith('runninghub/') else 'local'}")
+                logger.info(f"   Media: {'runninghub' if config.media_workflow and config.media_workflow.startswith('runninghub/') else 'local'}")
                 
                 semaphore = asyncio.Semaphore(RUNNING_HUB_PARALLEL_LIMIT)
                 completed_count = 0
@@ -541,7 +541,7 @@ class StandardPipeline(BasePipeline):
                     "tts_workflow": tts_workflow,
                     "tts_speed": tts_speed,
                     "ref_audio": ref_audio,
-                    "image_workflow": image_workflow,
+                    "media_workflow": media_workflow,
                     "prompt_prefix": prompt_prefix,
                     "frame_template": frame_template,
                     "template_params": template_params,
diff --git a/pixelle_video/services/frame_processor.py b/pixelle_video/services/frame_processor.py
index 1e5cc71..09bd1da 100644
--- a/pixelle_video/services/frame_processor.py
+++ b/pixelle_video/services/frame_processor.py
@@ -187,7 +187,7 @@ class FrameProcessor:
         
         # Determine media type based on workflow
         # video_ prefix in workflow name indicates video generation
-        workflow_name = config.image_workflow or ""
+        workflow_name = config.media_workflow or ""
         is_video_workflow = "video_" in workflow_name.lower()
         media_type = "video" if is_video_workflow else "image"
         
@@ -196,10 +196,10 @@ class FrameProcessor:
         # Call Media generation (with optional preset)
         media_result = await self.core.media(
             prompt=frame.image_prompt,
-            workflow=config.image_workflow,  # Pass workflow from config (None = use default)
+            workflow=config.media_workflow,  # Pass workflow from config (None = use default)
             media_type=media_type,
-            width=config.image_width,
-            height=config.image_height
+            width=config.media_width,
+            height=config.media_height
         )
         
         # Store media type
diff --git a/pixelle_video/services/persistence.py b/pixelle_video/services/persistence.py
index d739f55..82377d2 100644
--- a/pixelle_video/services/persistence.py
+++ b/pixelle_video/services/persistence.py
@@ -380,9 +380,9 @@ class PersistenceService:
             "tts_workflow": config.tts_workflow,
             "tts_speed": config.tts_speed,
             "ref_audio": config.ref_audio,
-            "image_width": config.image_width,
-            "image_height": config.image_height,
-            "image_workflow": config.image_workflow,
+            "media_width": config.media_width,
+            "media_height": config.media_height,
+            "media_workflow": config.media_workflow,
             "frame_template": config.frame_template,
             "template_params": config.template_params,
         }
@@ -402,9 +402,9 @@ class PersistenceService:
             tts_workflow=data.get("tts_workflow"),
             tts_speed=data.get("tts_speed"),
             ref_audio=data.get("ref_audio"),
-            image_width=data.get("image_width", 1024),
-            image_height=data.get("image_height", 1024),
-            image_workflow=data.get("image_workflow"),
+            media_width=data.get("media_width", data.get("image_width", 1024)),  # Backward compatibility
+            media_height=data.get("media_height", data.get("image_height", 1024)),  # Backward compatibility
+            media_workflow=data.get("media_workflow", data.get("image_workflow")),  # Backward compatibility
             frame_template=data.get("frame_template", "1080x1920/default.html"),
             template_params=data.get("template_params"),
         )
diff --git a/web/components/output_preview.py b/web/components/output_preview.py
index 6b97bfe..203f291 100644
--- a/web/components/output_preview.py
+++ b/web/components/output_preview.py
@@ -58,7 +58,7 @@ def render_single_output(pixelle_video, video_params):
     
     frame_template = video_params.get("frame_template")
     custom_values_for_video = video_params.get("template_params", {})
-    workflow_key = video_params.get("image_workflow")
+    workflow_key = video_params.get("media_workflow")
     prompt_prefix = video_params.get("prompt_prefix", "")
     
     with st.container(border=True):
@@ -123,18 +123,20 @@ def render_single_output(pixelle_video, video_params):
                     progress_bar.progress(min(int(event.progress * 100), 99))  # Cap at 99% until complete
                 
                 # Generate video (directly pass parameters)
-                # Note: image_width and image_height are now auto-determined from template
+                # Note: media_width and media_height are auto-determined from template
                 video_params = {
                     "text": text,
                     "mode": mode,
                     "title": title if title else None,
                     "n_scenes": n_scenes,
-                    "image_workflow": workflow_key,
+                    "media_workflow": workflow_key,
                     "frame_template": frame_template,
                     "prompt_prefix": prompt_prefix,
                     "bgm_path": bgm_path,
                     "bgm_volume": bgm_volume if bgm_path else 0.2,
                     "progress_callback": update_progress,
+                    "media_width": st.session_state.get('template_media_width'),
+                    "media_height": st.session_state.get('template_media_height'),
                 }
                 
                 # Add TTS parameters based on mode
@@ -245,12 +247,14 @@ def render_batch_output(pixelle_video, video_params):
             shared_config = {
                 "title_prefix": video_params.get("title_prefix"),
                 "n_scenes": video_params.get("n_scenes") or 5,
-                "image_workflow": video_params.get("image_workflow"),
+                "media_workflow": video_params.get("media_workflow"),
                 "frame_template": video_params.get("frame_template"),
                 "prompt_prefix": video_params.get("prompt_prefix") or "",
                 "bgm_path": video_params.get("bgm_path"),
                 "bgm_volume": video_params.get("bgm_volume") or 0.2,
                 "tts_inference_mode": video_params.get("tts_inference_mode") or "local",
+                "media_width": video_params.get("media_width"),
+                "media_height": video_params.get("media_height"),
             }
             
             # Add TTS parameters based on mode (only add non-None values)
diff --git a/web/components/style_config.py b/web/components/style_config.py
index 71d7b54..889998a 100644
--- a/web/components/style_config.py
+++ b/web/components/style_config.py
@@ -610,7 +610,7 @@ def render_style_config(pixelle_video):
                 workflow_options if workflow_options else ["No workflows found"],
                 index=default_workflow_index,
                 label_visibility="collapsed",
-                key="image_workflow_select"
+                key="media_workflow_select"
             )
         
             # Get the actual workflow key (e.g., "runninghub/image_flux.json")
@@ -621,14 +621,14 @@ def render_style_config(pixelle_video):
                 workflow_key = "runninghub/image_flux.json"  # fallback
         
             # Get media size from template
-            image_width = st.session_state.get('template_media_width', 1024)
-            image_height = st.session_state.get('template_media_height', 1024)
+            media_width = st.session_state.get('template_media_width')
+            media_height = st.session_state.get('template_media_height')
             
             # Display media size info (read-only)
             if template_media_type == "video":
-                size_info_text = tr('style.video_size_info', width=image_width, height=image_height)
+                size_info_text = tr('style.video_size_info', width=media_width, height=media_height)
             else:
-                size_info_text = tr('style.image_size_info', width=image_width, height=image_height)
+                size_info_text = tr('style.image_size_info', width=media_width, height=media_height)
             st.info(f"📐 {size_info_text}")
         
             # Prompt prefix input
@@ -679,8 +679,8 @@ def render_style_config(pixelle_video):
                                 prompt=final_prompt,
                                 workflow=workflow_key,
                                 media_type=template_media_type,
-                                width=int(image_width),
-                                height=int(image_height)
+                                width=int(media_width),
+                                height=int(media_height)
                             ))
                             preview_media_path = media_result.url
                         
@@ -725,8 +725,8 @@ def render_style_config(pixelle_video):
             st.caption(tr("image.not_required_hint"))
             
             # Get media size from template (even though not used, for consistency)
-            image_width = st.session_state.get('template_media_width', 1024)
-            image_height = st.session_state.get('template_media_height', 1024)
+            media_width = st.session_state.get('template_media_width')
+            media_height = st.session_state.get('template_media_height')
             
             # Set default values for later use
             workflow_key = None
@@ -741,6 +741,8 @@ def render_style_config(pixelle_video):
         "ref_audio": str(ref_audio_path) if ref_audio_path else None,
         "frame_template": frame_template,
         "template_params": custom_values_for_video if custom_values_for_video else None,
-        "image_workflow": workflow_key,
-        "prompt_prefix": prompt_prefix if prompt_prefix else ""
+        "media_workflow": workflow_key,
+        "prompt_prefix": prompt_prefix if prompt_prefix else "",
+        "media_width": media_width,
+        "media_height": media_height
     }

From 9ab53d06dcf3579683516077bedf3cbe39b6e452 Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Thu, 20 Nov 2025 21:26:07 +0800
Subject: [PATCH 02/12] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E7=89=88=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5a0a2c7..872d27e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "pixelle-video"
-version = "0.1.6"
+version = "0.1.7"
 description = "AI-powered video creation platform - Part of Pixelle ecosystem"
 authors = [
     {name = "Pixelle.AI"}
diff --git a/uv.lock b/uv.lock
index 8cee9a5..96e02c8 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1664,7 +1664,7 @@ wheels = [
 
 [[package]]
 name = "pixelle-video"
-version = "0.1.6"
+version = "0.1.7"
 source = { editable = "." }
 dependencies = [
     { name = "beautifulsoup4" },

From d8e380bdb5724570ff241a3291c1e59443e021b7 Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Fri, 21 Nov 2025 00:32:22 +0800
Subject: [PATCH 03/12] =?UTF-8?q?=E5=AF=B9=E5=88=86=E9=95=9C=E8=A7=86?=
 =?UTF-8?q?=E9=A2=91=E5=B7=A5=E4=BD=9C=E6=B5=81=E4=BC=A0=E9=80=92duration?=
 =?UTF-8?q?=E5=8F=82=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pixelle_video/services/frame_processor.py | 29 ++++++++++++++++-------
 pixelle_video/services/media.py           |  6 +++++
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/pixelle_video/services/frame_processor.py b/pixelle_video/services/frame_processor.py
index 09bd1da..f7b64ae 100644
--- a/pixelle_video/services/frame_processor.py
+++ b/pixelle_video/services/frame_processor.py
@@ -14,6 +14,10 @@
 Frame processor - Process single frame through complete pipeline
 
 Orchestrates: TTS → Image Generation → Frame Composition → Video Segment
+
+Key Feature:
+- TTS-driven video duration: Audio duration from TTS is passed to video generation workflows
+  to ensure perfect sync between audio and video (no padding, no trimming needed)
 """
 
 from typing import Callable, Optional
@@ -193,14 +197,23 @@ class FrameProcessor:
         
         logger.debug(f"  → Media type: {media_type} (workflow: {workflow_name})")
         
-        # Call Media generation (with optional preset)
-        media_result = await self.core.media(
-            prompt=frame.image_prompt,
-            workflow=config.media_workflow,  # Pass workflow from config (None = use default)
-            media_type=media_type,
-            width=config.media_width,
-            height=config.media_height
-        )
+        # Build media generation parameters
+        media_params = {
+            "prompt": frame.image_prompt,
+            "workflow": config.media_workflow,  # Pass workflow from config (None = use default)
+            "media_type": media_type,
+            "width": config.media_width,
+            "height": config.media_height
+        }
+        
+        # For video workflows: pass audio duration as target video duration
+        # This ensures video length matches audio length from the source
+        if is_video_workflow and frame.duration:
+            media_params["duration"] = frame.duration
+            logger.info(f"  → Generating video with target duration: {frame.duration:.2f}s (from TTS audio)")
+        
+        # Call Media generation
+        media_result = await self.core.media(**media_params)
         
         # Store media type
         frame.media_type = media_result.media_type
diff --git a/pixelle_video/services/media.py b/pixelle_video/services/media.py
index 75d9e33..d894339 100644
--- a/pixelle_video/services/media.py
+++ b/pixelle_video/services/media.py
@@ -119,6 +119,7 @@ class MediaService(ComfyBaseService):
         # Common workflow parameters
         width: Optional[int] = None,
         height: Optional[int] = None,
+        duration: Optional[float] = None,  # Video duration in seconds (for video workflows)
         negative_prompt: Optional[str] = None,
         steps: Optional[int] = None,
         seed: Optional[int] = None,
@@ -140,6 +141,7 @@ class MediaService(ComfyBaseService):
             runninghub_api_key: RunningHub API key (optional, overrides config)
             width: Media width
             height: Media height
+            duration: Target video duration in seconds (only for video workflows, typically from TTS audio duration)
             negative_prompt: Negative prompt
             steps: Sampling steps
             seed: Random seed
@@ -203,6 +205,10 @@ class MediaService(ComfyBaseService):
             workflow_params["width"] = width
         if height is not None:
             workflow_params["height"] = height
+        if duration is not None:
+            workflow_params["duration"] = duration
+            if media_type == "video":
+                logger.info(f"📏 Target video duration: {duration:.2f}s (from TTS audio)")
         if negative_prompt is not None:
             workflow_params["negative_prompt"] = negative_prompt
         if steps is not None:

From 02ef878e3bb2c11c2e2d36416068b54552a3e47c Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Fri, 21 Nov 2025 00:56:24 +0800
Subject: [PATCH 04/12] =?UTF-8?q?=E4=BC=98=E5=8C=96=E8=A7=86=E9=A2=91?=
 =?UTF-8?q?=E9=9F=B3=E9=A2=91=E5=90=88=E5=B9=B6=E9=80=BB=E8=BE=91=EF=BC=8C?=
 =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=99=BA=E8=83=BD=E6=97=B6=E9=95=BF=E8=B0=83?=
 =?UTF-8?q?=E6=95=B4=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pixelle_video/services/video.py | 177 ++++++++++++++++++++++++++++++--
 1 file changed, 170 insertions(+), 7 deletions(-)

diff --git a/pixelle_video/services/video.py b/pixelle_video/services/video.py
index 5cbe31c..fa2a9f2 100644
--- a/pixelle_video/services/video.py
+++ b/pixelle_video/services/video.py
@@ -27,6 +27,7 @@ Note: Requires FFmpeg to be installed on the system.
 import os
 import shutil
 import tempfile
+import uuid
 from pathlib import Path
 from typing import List, Literal, Optional
 
@@ -316,12 +317,16 @@ class VideoService:
         audio_volume: float = 1.0,
         video_volume: float = 0.0,
         pad_strategy: str = "freeze",  # "freeze" (freeze last frame) or "black" (black screen)
+        auto_adjust_duration: bool = True,  # Automatically adjust video duration to match audio
+        duration_tolerance: float = 0.3,  # Tolerance for video being longer than audio (seconds)
     ) -> str:
         """
-        Merge audio with video, using the longer duration
+        Merge audio with video with intelligent duration adjustment
         
-        The output video duration will be the maximum of video and audio duration.
-        If audio is longer than video, the video will be padded using the specified strategy.
+        Automatically handles duration mismatches between video and audio:
+        - If video < audio: Pad video to match audio (avoid black screen)
+        - If video > audio (within tolerance): Keep as-is (acceptable)
+        - If video > audio (exceeds tolerance): Trim video to match audio
         
         Automatically handles videos with or without audio streams.
         - If video has no audio: adds the audio track
@@ -339,6 +344,9 @@ class VideoService:
             pad_strategy: Strategy to pad video if audio is longer
                          - "freeze": Freeze last frame (default)
                          - "black": Fill with black screen
+            auto_adjust_duration: Enable intelligent duration adjustment (default: True)
+            duration_tolerance: Tolerance for video being longer than audio in seconds (default: 0.3)
+                              Videos within this tolerance won't be trimmed
         
         Returns:
             Path to the output video file
@@ -361,6 +369,28 @@ class VideoService:
         
         logger.info(f"Video duration: {video_duration:.2f}s, Audio duration: {audio_duration:.2f}s")
         
+        # Intelligent duration adjustment (if enabled)
+        if auto_adjust_duration:
+            diff = video_duration - audio_duration
+            
+            if diff < 0:
+                # Video shorter than audio → Must pad to avoid black screen
+                logger.warning(f"⚠️ Video shorter than audio by {abs(diff):.2f}s, padding required")
+                video = self._pad_video_to_duration(video, audio_duration, pad_strategy)
+                video_duration = audio_duration  # Update duration after padding
+                logger.info(f"📌 Padded video to {audio_duration:.2f}s")
+            
+            elif diff > duration_tolerance:
+                # Video significantly longer than audio → Trim
+                logger.info(f"⚠️ Video longer than audio by {diff:.2f}s (tolerance: {duration_tolerance}s)")
+                video = self._trim_video_to_duration(video, audio_duration)
+                video_duration = audio_duration  # Update duration after trimming
+                logger.info(f"✂️ Trimmed video to {audio_duration:.2f}s")
+            
+            else:  # 0 <= diff <= duration_tolerance
+                # Video slightly longer but within tolerance → Keep as-is
+                logger.info(f"✅ Duration acceptable: video={video_duration:.2f}s, audio={audio_duration:.2f}s (diff={diff:.2f}s)")
+        
         # Determine target duration (max of both)
         target_duration = max(video_duration, audio_duration)
         logger.info(f"Target output duration: {target_duration:.2f}s")
@@ -382,9 +412,6 @@ class VideoService:
                 video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration)
             else:  # black
                 # Generate black frames for padding duration
-                from pixelle_video.utils.os_util import get_temp_path
-                import os
-                
                 # Get video properties
                 probe = ffmpeg.probe(video)
                 video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
@@ -395,7 +422,7 @@ class VideoService:
                 fps = fps_num / fps_den if fps_den != 0 else 30
                 
                 # Create black video for padding
-                black_video_path = get_temp_path(f"black_pad_{os.path.basename(output)}")
+                black_video_path = self._get_unique_temp_path("black_pad", os.path.basename(output))
                 black_input = ffmpeg.input(
                     f'color=c=black:s={width}x{height}:r={fps}',
                     f='lavfi',
@@ -778,6 +805,26 @@ class VideoService:
             fade_in=0.0
         )
     
+    def _get_unique_temp_path(self, prefix: str, original_filename: str) -> str:
+        """
+        Generate unique temporary file path to avoid concurrent conflicts
+        
+        Args:
+            prefix: Prefix for the temp file (e.g., "trimmed", "padded", "black_pad")
+            original_filename: Original filename to preserve in temp path
+        
+        Returns:
+            Unique temporary file path with format: temp/{prefix}_{uuid}_{original_filename}
+        
+        Example:
+            >>> self._get_unique_temp_path("trimmed", "video.mp4")
+            >>> # Returns: "temp/trimmed_a3f2d8c1_video.mp4"
+        """
+        from pixelle_video.utils.os_util import get_temp_path
+        
+        unique_id = uuid.uuid4().hex[:8]
+        return get_temp_path(f"{prefix}_{unique_id}_{original_filename}")
+    
     def _resolve_bgm_path(self, bgm_path: str) -> str:
         """
         Resolve BGM path (filename or custom path) with custom override support
@@ -841,4 +888,120 @@ class VideoService:
         except Exception as e:
             logger.warning(f"Failed to list BGM files: {e}")
             return []
+    
+    def _trim_video_to_duration(self, video: str, target_duration: float) -> str:
+        """
+        Trim video to specified duration
+        
+        Args:
+            video: Input video file path
+            target_duration: Target duration in seconds
+        
+        Returns:
+            Path to trimmed video (temp file)
+        
+        Raises:
+            RuntimeError: If FFmpeg execution fails
+        """
+        output = self._get_unique_temp_path("trimmed", os.path.basename(video))
+        
+        try:
+            # Use stream copy when possible for fast trimming
+            (
+                ffmpeg
+                .input(video, t=target_duration)
+                .output(output, vcodec='copy', acodec='copy' if self.has_audio_stream(video) else 'copy')
+                .overwrite_output()
+                .run(capture_stdout=True, capture_stderr=True, quiet=True)
+            )
+            return output
+        except ffmpeg.Error as e:
+            error_msg = e.stderr.decode() if e.stderr else str(e)
+            logger.error(f"FFmpeg error trimming video: {error_msg}")
+            raise RuntimeError(f"Failed to trim video: {error_msg}")
+    
+    def _pad_video_to_duration(self, video: str, target_duration: float, pad_strategy: str = "freeze") -> str:
+        """
+        Pad video to specified duration by extending the last frame or adding black frames
+        
+        Args:
+            video: Input video file path
+            target_duration: Target duration in seconds
+            pad_strategy: Padding strategy - "freeze" (freeze last frame) or "black" (black screen)
+        
+        Returns:
+            Path to padded video (temp file)
+        
+        Raises:
+            RuntimeError: If FFmpeg execution fails
+        """
+        output = self._get_unique_temp_path("padded", os.path.basename(video))
+        
+        video_duration = self._get_video_duration(video)
+        pad_duration = target_duration - video_duration
+        
+        if pad_duration <= 0:
+            # No padding needed, return original
+            return video
+        
+        try:
+            input_video = ffmpeg.input(video)
+            video_stream = input_video.video
+            
+            if pad_strategy == "freeze":
+                # Freeze last frame using tpad filter
+                video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration)
+                
+                # Output with re-encoding (tpad requires it)
+                (
+                    ffmpeg
+                    .output(
+                        video_stream,
+                        output,
+                        vcodec='libx264',
+                        preset='fast',
+                        crf=23
+                    )
+                    .overwrite_output()
+                    .run(capture_stdout=True, capture_stderr=True, quiet=True)
+                )
+            else:  # black
+                # Generate black frames for padding duration
+                # Get video properties
+                probe = ffmpeg.probe(video)
+                video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
+                width = int(video_info['width'])
+                height = int(video_info['height'])
+                fps_str = video_info['r_frame_rate']
+                fps_num, fps_den = map(int, fps_str.split('/'))
+                fps = fps_num / fps_den if fps_den != 0 else 30
+                
+                # Create black video for padding
+                black_input = ffmpeg.input(
+                    f'color=c=black:s={width}x{height}:r={fps}',
+                    f='lavfi',
+                    t=pad_duration
+                )
+                
+                # Concatenate original video with black padding
+                video_stream = ffmpeg.concat(video_stream, black_input.video, v=1, a=0)
+                
+                (
+                    ffmpeg
+                    .output(
+                        video_stream,
+                        output,
+                        vcodec='libx264',
+                        preset='fast',
+                        crf=23
+                    )
+                    .overwrite_output()
+                    .run(capture_stdout=True, capture_stderr=True, quiet=True)
+                )
+            
+            return output
+        except ffmpeg.Error as e:
+            error_msg = e.stderr.decode() if e.stderr else str(e)
+            logger.error(f"FFmpeg error padding video: {error_msg}")
+            raise RuntimeError(f"Failed to pad video: {error_msg}")
 

From 3d4aea3b11549d49cffbdf0e5eb86028091b89ff Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Fri, 21 Nov 2025 01:06:11 +0800
Subject: [PATCH 05/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=89=B9=E9=87=8F?=
 =?UTF-8?q?=E7=94=9F=E6=88=90=E5=90=8E=E6=9F=A5=E7=9C=8B=E5=8E=86=E5=8F=B2?=
 =?UTF-8?q?=E6=8C=89=E9=92=AE=E8=B7=B3=E8=BD=AC=E5=A4=B1=E6=95=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web/components/output_preview.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/web/components/output_preview.py b/web/components/output_preview.py
index 203f291..6b0b8cc 100644
--- a/web/components/output_preview.py
+++ b/web/components/output_preview.py
@@ -372,13 +372,28 @@ def render_batch_output(pixelle_video, video_params):
             st.success(tr("batch.success_message"))
             st.info(tr("batch.view_in_history"))
             
-            # Button to go to History page
-            if st.button(
-                f"📚 {tr('batch.goto_history')}",
-                type="secondary",
-                use_container_width=True
-            ):
-                st.switch_page("pages/2_📚_History.py")
+            # Button to go to History page using JavaScript URL navigation
+            st.markdown(
+                f"""
+                <a href="/History" target="_blank">
+                    <button style="
+                        width: 100%;
+                        padding: 0.5rem 1rem;
+                        background-color: white;
+                        color: rgb(49, 51, 63);
+                        border: 1px solid rgba(49, 51, 63, 0.2);
+                        border-radius: 0.5rem;
+                        cursor: pointer;
+                        font-size: 1rem;
+                        font-weight: 400;
+                        text-align: center;
+                    ">
+                        📚 {tr('batch.goto_history')}
+                    </button>
+                </a>
+                """,
+                unsafe_allow_html=True
+            )
             
             # Show failed tasks if any
             if batch_result["errors"]:

From d018e24fd32d33fbde5f85df8ab37f09e78c15b6 Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Fri, 21 Nov 2025 01:08:40 +0800
Subject: [PATCH 06/12] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=9B=BD=E5=86=85docke?=
 =?UTF-8?q?r=E9=83=A8=E7=BD=B2=E7=9A=84=E9=95=9C=E5=83=8F=E9=80=BB?=
 =?UTF-8?q?=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index b7ec2f3..da4785c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -46,16 +46,14 @@ RUN uv --version
 COPY pyproject.toml uv.lock README.md ./
 COPY pixelle_video ./pixelle_video
 
-# Install Python dependencies using uv with configurable index URL
-# Create uv.toml config file to force using the mirror (most reliable method)
-# Only create config when USE_CN_MIRROR=true, otherwise use default PyPI
-RUN if [ "$USE_CN_MIRROR" = "true" ]; then \
-        echo '[[index]]' > uv.toml && \
-        echo 'url = "https://pypi.tuna.tsinghua.edu.cn/simple"' >> uv.toml && \
-        echo 'default = true' >> uv.toml; \
-    fi && \
-    export UV_HTTP_TIMEOUT=300 && \
-    uv sync --frozen --no-dev
+# Install Python dependencies using uv pip install
+# Use -i flag to specify mirror when USE_CN_MIRROR=true
+RUN export UV_HTTP_TIMEOUT=300 && \
+    if [ "$USE_CN_MIRROR" = "true" ]; then \
+        uv pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \
+    else \
+        uv pip install -e .; \
+    fi
 
 # Copy rest of application code
 COPY api ./api

From 66d8061d4e3b349a8f688cbfc32fca2b5bcea5ad Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Fri, 21 Nov 2025 01:10:23 +0800
Subject: [PATCH 07/12] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=9B=BD=E5=86=85docke?=
 =?UTF-8?q?r=E9=83=A8=E7=BD=B2=E7=9A=84=E9=95=9C=E5=83=8F=E9=80=BB?=
 =?UTF-8?q?=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index da4785c..6d4c470 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -47,12 +47,12 @@ COPY pyproject.toml uv.lock README.md ./
 COPY pixelle_video ./pixelle_video
 
 # Install Python dependencies using uv pip install
-# Use -i flag to specify mirror when USE_CN_MIRROR=true
+# Use --system flag for Docker environment, -i flag to specify mirror when USE_CN_MIRROR=true
 RUN export UV_HTTP_TIMEOUT=300 && \
     if [ "$USE_CN_MIRROR" = "true" ]; then \
-        uv pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \
+        uv pip install --system -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \
     else \
-        uv pip install -e .; \
+        uv pip install --system -e .; \
     fi
 
 # Copy rest of application code

From a29b8556c8dc71e42e84d40f00aa4cc13474c667 Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Fri, 21 Nov 2025 01:22:07 +0800
Subject: [PATCH 08/12] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=9B=BD=E5=86=85docke?=
 =?UTF-8?q?r=E9=83=A8=E7=BD=B2=E7=9A=84=E9=95=9C=E5=83=8F=E9=80=BB?=
 =?UTF-8?q?=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 6d4c470..fd9ed7c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -46,13 +46,14 @@ RUN uv --version
 COPY pyproject.toml uv.lock README.md ./
 COPY pixelle_video ./pixelle_video
 
-# Install Python dependencies using uv pip install
-# Use --system flag for Docker environment, -i flag to specify mirror when USE_CN_MIRROR=true
+# Create virtual environment and install dependencies
+# Use -i flag to specify mirror when USE_CN_MIRROR=true
 RUN export UV_HTTP_TIMEOUT=300 && \
+    uv venv && \
     if [ "$USE_CN_MIRROR" = "true" ]; then \
-        uv pip install --system -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \
+        uv pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \
     else \
-        uv pip install --system -e .; \
+        uv pip install -e .; \
     fi
 
 # Copy rest of application code

From bbaa153b9e611d2c9ddad3c0d082a05d53f84116 Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Fri, 21 Nov 2025 01:28:08 +0800
Subject: [PATCH 09/12] =?UTF-8?q?Docker=E6=B7=BB=E5=8A=A0temp=E7=9B=AE?=
 =?UTF-8?q?=E5=BD=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index fd9ed7c..976d0fa 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -64,8 +64,8 @@ COPY templates ./templates
 COPY workflows ./workflows
 COPY resources ./resources
 
-# Create output and data directories
-RUN mkdir -p /app/output /app/data
+# Create output, data and temp directories
+RUN mkdir -p /app/output /app/data /app/temp
 
 # Set environment variables for html2image to use chromium
 ENV BROWSER_EXECUTABLE_PATH=/usr/bin/chromium

From 8310183405b8177bbf568b3fa7f09ec7fd9cffbe Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Fri, 21 Nov 2025 10:47:02 +0800
Subject: [PATCH 10/12] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=9C=AC=E5=9C=B0Comfy?=
 =?UTF-8?q?UI=E7=9A=84index-tts2=E5=B7=A5=E4=BD=9C=E6=B5=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 workflows/selfhost/tts_index2.json | 64 ++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 workflows/selfhost/tts_index2.json

diff --git a/workflows/selfhost/tts_index2.json b/workflows/selfhost/tts_index2.json
new file mode 100644
index 0000000..fa72053
--- /dev/null
+++ b/workflows/selfhost/tts_index2.json
@@ -0,0 +1,64 @@
+{
+  "3": {
+    "inputs": {
+      "text": "床前明月光，疑是地上霜。"
+    },
+    "class_type": "Text _O",
+    "_meta": {
+      "title": "$text.text!"
+    }
+  },
+  "5": {
+    "inputs": {
+      "text": [
+        "3",
+        0
+      ],
+      "mode": "Auto",
+      "do_sample_mode": "on",
+      "temperature": 0.8,
+      "top_p": 0.9,
+      "top_k": 30,
+      "num_beams": 3,
+      "repetition_penalty": 10,
+      "length_penalty": 0,
+      "max_mel_tokens": 1815,
+      "max_tokens_per_sentence": 120,
+      "seed": 4266796044,
+      "reference_audio": [
+        "12",
+        0
+      ]
+    },
+    "class_type": "IndexTTS2BaseNode",
+    "_meta": {
+      "title": "Index TTS 2 - Base"
+    }
+  },
+  "8": {
+    "inputs": {
+      "filename_prefix": "audio/ComfyUI",
+      "quality": "V0",
+      "audioUI": "",
+      "audio": [
+        "5",
+        0
+      ]
+    },
+    "class_type": "SaveAudioMP3",
+    "_meta": {
+      "title": "Save Audio (MP3)"
+    }
+  },
+  "12": {
+    "inputs": {
+      "audio": "小裴钱.wav",
+      "start_time": 0,
+      "duration": 0
+    },
+    "class_type": "VHS_LoadAudioUpload",
+    "_meta": {
+      "title": "$ref_audio.audio"
+    }
+  }
+}
\ No newline at end of file

From 910d0cdf99ec6fbc910cc199fae97e3070a6c124 Mon Sep 17 00:00:00 2001
From: xianshi-yyds <150135158+xianshi-yyds@users.noreply.github.com>
Date: Fri, 21 Nov 2025 10:51:49 +0800
Subject: [PATCH 11/12] add wan2.2 workflow

---
 workflows/runninghub/video_wan2.2.json | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 workflows/runninghub/video_wan2.2.json

diff --git a/workflows/runninghub/video_wan2.2.json b/workflows/runninghub/video_wan2.2.json
new file mode 100644
index 0000000..6aff063
--- /dev/null
+++ b/workflows/runninghub/video_wan2.2.json
@@ -0,0 +1,4 @@
+{
+  "source": "runninghub",
+  "workflow_id": "1991693844100100097"
+}
\ No newline at end of file

From a9e12d539b3ce45e96be520999b32958d4c7cc6c Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Fri, 21 Nov 2025 11:22:06 +0800
Subject: [PATCH 12/12] =?UTF-8?q?=E7=A1=AE=E4=BF=9D=E4=B8=B4=E6=97=B6?=
 =?UTF-8?q?=E3=80=81=E6=95=B0=E6=8D=AE=E5=92=8C=E8=BE=93=E5=87=BA=E7=9B=AE?=
 =?UTF-8?q?=E5=BD=95=E5=9C=A8=E8=BF=94=E5=9B=9E=E8=B7=AF=E5=BE=84=E4=B9=8B?=
 =?UTF-8?q?=E5=89=8D=E5=AD=98=E5=9C=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pixelle_video/utils/os_util.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pixelle_video/utils/os_util.py b/pixelle_video/utils/os_util.py
index 12c26dc..bbec0ce 100644
--- a/pixelle_video/utils/os_util.py
+++ b/pixelle_video/utils/os_util.py
@@ -83,6 +83,8 @@ def get_temp_path(*paths: str) -> str:
     """
     Get path relative to Pixelle-Video temp folder
     
+    Ensures temp directory exists before returning path.
+    
     Args:
         *paths: Path components to join
     
@@ -94,6 +96,10 @@ def get_temp_path(*paths: str) -> str:
         # Returns: "/path/to/project/temp/audio.mp3"
     """
     temp_path = get_root_path("temp")
+    
+    # Ensure temp directory exists
+    os.makedirs(temp_path, exist_ok=True)
+    
     if paths:
         return os.path.join(temp_path, *paths)
     return temp_path
@@ -102,6 +108,8 @@ def get_temp_path(*paths: str) -> str:
 def get_data_path(*paths: str) -> str:
     """
     Get path relative to Pixelle-Video data folder
+
+    Ensures data directory exists before returning path.
     
     Args:
         *paths: Path components to join
@@ -114,6 +122,10 @@ def get_data_path(*paths: str) -> str:
         # Returns: "/path/to/project/data/videos/output.mp4"
     """
     data_path = get_root_path("data")
+
+    # Ensure data directory exists
+    os.makedirs(data_path, exist_ok=True)
+    
     if paths:
         return os.path.join(data_path, *paths)
     return data_path
@@ -122,6 +134,8 @@ def get_data_path(*paths: str) -> str:
 def get_output_path(*paths: str) -> str:
     """
     Get path relative to Pixelle-Video output folder
+
+    Ensures output directory exists before returning path.
     
     Args:
         *paths: Path components to join
@@ -134,6 +148,10 @@ def get_output_path(*paths: str) -> str:
         # Returns: "/path/to/project/output/video.mp4"
     """
     output_path = get_root_path("output")
+
+    # Ensure output directory exists
+    os.makedirs(output_path, exist_ok=True)
+    
     if paths:
         return os.path.join(output_path, *paths)
     return output_path