From 7443cbf9c2de4fc0db5a4f91984c0f6c5593d724 Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Wed, 12 Nov 2025 17:19:06 +0800
Subject: [PATCH] =?UTF-8?q?=E6=A8=A1=E6=9D=BF=E4=B8=AD=E5=AA=92=E4=BD=93?=
 =?UTF-8?q?=E5=B0=BA=E5=AF=B8=E6=94=B9=E4=B8=BA=E9=A2=84=E7=BD=AE=E6=96=B9?=
 =?UTF-8?q?=E6=A1=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api/routers/video.py                          |   6 +-
 api/schemas/video.py                          |   3 +-
 pixelle_video/pipelines/custom.py             |   7 +-
 pixelle_video/pipelines/standard.py           |  16 +-
 pixelle_video/services/frame_html.py          |  52 +++++
 pixelle_video/services/video.py               |  59 ++++--
 pyproject.toml                                |   1 +
 templates/1080x1080/minimal_framed.html       |   2 +
 templates/1080x1920/blur_card.html            |   2 +
 templates/1080x1920/cartoon.html              |   2 +
 templates/1080x1920/default.html              |   2 +
 templates/1080x1920/elegant.html              |   2 +
 templates/1080x1920/fashion_vintage.html      |   2 +
 templates/1080x1920/full.html                 |   2 +
 templates/1080x1920/life_insights.html        |   2 +
 templates/1080x1920/modern.html               |   2 +
 templates/1080x1920/neon.html                 |   2 +
 templates/1080x1920/psychology_card.html      |   2 +
 templates/1080x1920/purple.html               |   2 +
 templates/1080x1920/simple.html               |   2 +
 templates/1080x1920/video_simple.html         | 185 +++++++++++++++++
 templates/1920x1080/film.html                 |   2 +
 templates/1920x1080/full.html                 |   2 +
 templates/1920x1080/ultrawide_minimal.html    |   2 +
 templates/1920x1080/wide_darktech.html        |   2 +
 uv.lock                                       |  24 +++
 web/app.py                                    |  61 ++----
 web/i18n/locales/en_US.json                   |  13 +-
 web/i18n/locales/zh_CN.json                   |  13 +-
 .../runninghub/video_wan2.1_fusionx.json      |   5 +
 workflows/selfhost/video_wan2.1_fusionx.json  | 187 ++++++++++++++++++
 31 files changed, 576 insertions(+), 90 deletions(-)
 create mode 100644 templates/1080x1920/video_simple.html
 create mode 100644 workflows/runninghub/video_wan2.1_fusionx.json
 create mode 100644 workflows/selfhost/video_wan2.1_fusionx.json

diff --git a/api/routers/video.py b/api/routers/video.py
index e7a47cd..207e3c2 100644
--- a/api/routers/video.py
+++ b/api/routers/video.py
@@ -73,8 +73,7 @@ async def generate_video_sync(
             "max_narration_words": request_body.max_narration_words,
             "min_image_prompt_words": request_body.min_image_prompt_words,
             "max_image_prompt_words": request_body.max_image_prompt_words,
-            "image_width": request_body.image_width,
-            "image_height": request_body.image_height,
+            # Note: image_width and image_height are now auto-determined from template
             "image_workflow": request_body.image_workflow,
             "video_fps": request_body.video_fps,
             "frame_template": request_body.frame_template,
@@ -161,8 +160,7 @@ async def generate_video_async(
                 "max_narration_words": request_body.max_narration_words,
                 "min_image_prompt_words": request_body.min_image_prompt_words,
                 "max_image_prompt_words": request_body.max_image_prompt_words,
-                "image_width": request_body.image_width,
-                "image_height": request_body.image_height,
+                # Note: image_width and image_height are now auto-determined from template
                 "image_workflow": request_body.image_workflow,
                 "video_fps": request_body.video_fps,
                 "frame_template": request_body.frame_template,
diff --git a/api/schemas/video.py b/api/schemas/video.py
index 93070f9..d37dd80 100644
--- a/api/schemas/video.py
+++ b/api/schemas/video.py
@@ -57,8 +57,7 @@ class VideoGenerateRequest(BaseModel):
     max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words")
     
     # === Image Parameters ===
-    image_width: int = Field(1024, description="Image width")
-    image_height: int = Field(1024, description="Image height")
+    # Note: image_width and image_height are now auto-determined from template meta tags
     image_workflow: Optional[str] = Field(None, description="Custom image workflow")
     
     # === Video Parameters ===
diff --git a/pixelle_video/pipelines/custom.py b/pixelle_video/pipelines/custom.py
index e1779c4..ce6dbe3 100644
--- a/pixelle_video/pipelines/custom.py
+++ b/pixelle_video/pipelines/custom.py
@@ -92,8 +92,7 @@ class CustomPipeline(BasePipeline):
         ref_audio: Optional[str] = None,
         
         image_workflow: Optional[str] = None,
-        image_width: int = 1024,
-        image_height: int = 1024,
+        # Note: image_width and image_height are now auto-determined from template
         
         frame_template: Optional[str] = None,
         video_fps: int = 30,
@@ -161,6 +160,10 @@ class CustomPipeline(BasePipeline):
         generator = HTMLFrameGenerator(template_path)
         template_requires_image = generator.requires_image()
         
+        # Read media size from template meta tags
+        image_width, image_height = generator.get_media_size()
+        logger.info(f"📐 Media size from template: {image_width}x{image_height}")
+        
         if template_requires_image:
             logger.info(f"📸 Template requires image generation")
         else:
diff --git a/pixelle_video/pipelines/standard.py b/pixelle_video/pipelines/standard.py
index 5864659..f57b9ce 100644
--- a/pixelle_video/pipelines/standard.py
+++ b/pixelle_video/pipelines/standard.py
@@ -94,8 +94,7 @@ class StandardPipeline(BasePipeline):
         max_image_prompt_words: int = 60,
         
         # === Image Parameters ===
-        image_width: int = 1024,
-        image_height: int = 1024,
+        # Note: image_width and image_height are now auto-determined from template meta tags
         image_workflow: Optional[str] = None,
         
         # === Video Parameters ===
@@ -151,9 +150,8 @@ class StandardPipeline(BasePipeline):
             min_image_prompt_words: Min image prompt length
             max_image_prompt_words: Max image prompt length
             
-            image_width: Generated image width (default 1024)
-            image_height: Generated image height (default 1024)
             image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)
+                           Note: Image/video size is now auto-determined from template meta tags
             
             video_fps: Video frame rate (default 30)
             
@@ -239,6 +237,16 @@ class StandardPipeline(BasePipeline):
             template_config = self.core.config.get("template", {})
             frame_template = template_config.get("default_template", "1080x1920/default.html")
         
+        # Read media size from template meta tags
+        from pixelle_video.services.frame_html import HTMLFrameGenerator
+        from pixelle_video.utils.template_util import resolve_template_path
+        
+        template_path = resolve_template_path(frame_template)
+        temp_generator = HTMLFrameGenerator(template_path)
+        image_width, image_height = temp_generator.get_media_size()
+        
+        logger.info(f"📐 Media size from template: {image_width}x{image_height}")
+        
         # Create storyboard config
         config = StoryboardConfig(
             task_id=task_id,
diff --git a/pixelle_video/services/frame_html.py b/pixelle_video/services/frame_html.py
index 4efd02d..7629864 100644
--- a/pixelle_video/services/frame_html.py
+++ b/pixelle_video/services/frame_html.py
@@ -141,6 +141,58 @@ class HTMLFrameGenerator:
         logger.debug(f"Template loaded: {len(content)} chars")
         return content
     
+    def _parse_media_size_from_meta(self) -> tuple[Optional[int], Optional[int]]:
+        """
+        Parse media size from meta tags in template
+        
+        Looks for meta tags:
+        - <meta name="template:media-width" content="1024">
+        - <meta name="template:media-height" content="1024">
+        
+        Returns:
+            Tuple of (width, height) or (None, None) if not found
+        """
+        from bs4 import BeautifulSoup
+        
+        try:
+            soup = BeautifulSoup(self.template, 'html.parser')
+            
+            # Find width and height meta tags
+            width_meta = soup.find('meta', attrs={'name': 'template:media-width'})
+            height_meta = soup.find('meta', attrs={'name': 'template:media-height'})
+            
+            if width_meta and height_meta:
+                width = int(width_meta.get('content', 0))
+                height = int(height_meta.get('content', 0))
+                
+                if width > 0 and height > 0:
+                    logger.debug(f"Found media size in meta tags: {width}x{height}")
+                    return width, height
+            
+            return None, None
+            
+        except Exception as e:
+            logger.warning(f"Failed to parse media size from meta tags: {e}")
+            return None, None
+    
+    def get_media_size(self) -> tuple[int, int]:
+        """
+        Get media size for image/video generation
+        
+        Returns media size specified in template meta tags.
+        
+        Returns:
+            Tuple of (width, height)
+        """
+        media_width, media_height = self._parse_media_size_from_meta()
+        
+        if media_width and media_height:
+            return media_width, media_height
+        
+        # Fallback to default if not specified (should not happen with properly configured templates)
+        logger.warning(f"No media size meta tags found in template {self.template_path}, using fallback 1024x1024")
+        return 1024, 1024
+    
     def parse_template_parameters(self) -> Dict[str, Dict[str, Any]]:
         """
         Parse custom parameters from HTML template
diff --git a/pixelle_video/services/video.py b/pixelle_video/services/video.py
index 35e7a56..5cbe31c 100644
--- a/pixelle_video/services/video.py
+++ b/pixelle_video/services/video.py
@@ -224,20 +224,43 @@ class VideoService:
                    -map "[v]" -map "[a]" output.mp4
         """
         try:
-            inputs = [ffmpeg.input(v) for v in videos]
-            (
-                ffmpeg
-                .concat(*inputs, v=1, a=1)
-                .output(output)
-                .overwrite_output()
-                .run(capture_stdout=True, capture_stderr=True)
+            # Build filter_complex string manually
+            n = len(videos)
+            
+            # Build input stream labels: [0:v][0:a][1:v][1:a]...
+            stream_spec = "".join([f"[{i}:v][{i}:a]" for i in range(n)])
+            filter_complex = f"{stream_spec}concat=n={n}:v=1:a=1[v][a]"
+            
+            # Build ffmpeg command
+            cmd = ['ffmpeg']
+            for video in videos:
+                cmd.extend(['-i', video])
+            cmd.extend([
+                '-filter_complex', filter_complex,
+                '-map', '[v]',
+                '-map', '[a]',
+                '-y',  # Overwrite output
+                output
+            ])
+            
+            # Run command
+            import subprocess
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                check=True
             )
+            
             logger.success(f"Videos concatenated successfully: {output}")
             return output
-        except ffmpeg.Error as e:
-            error_msg = e.stderr.decode() if e.stderr else str(e)
+        except subprocess.CalledProcessError as e:
+            error_msg = e.stderr if e.stderr else str(e)
             logger.error(f"FFmpeg concat filter error: {error_msg}")
             raise RuntimeError(f"Failed to concatenate videos: {error_msg}")
+        except Exception as e:
+            logger.error(f"Concatenation error: {e}")
+            raise RuntimeError(f"Failed to concatenate videos: {e}")
     
     def _get_video_duration(self, video: str) -> float:
         """Get video duration in seconds"""
@@ -382,10 +405,17 @@ class VideoService:
                 # Concatenate original video with black padding
                 video_stream = ffmpeg.concat(video_stream, black_input.video, v=1, a=0)
         
-        # Prepare audio stream
+        # Prepare audio stream (pad if needed to match target duration)
         input_audio = ffmpeg.input(audio)
         audio_stream = input_audio.audio.filter('volume', audio_volume)
         
+        # Pad audio with silence if video is longer
+        if video_duration > audio_duration:
+            pad_duration = video_duration - audio_duration
+            logger.info(f"Video is longer, padding audio with {pad_duration:.2f}s silence")
+            # Use apad to add silence at the end
+            audio_stream = audio_stream.filter('apad', whole_dur=target_duration)
+        
         if not video_has_audio:
             logger.info(f"Video has no audio stream, adding audio track")
             # Video is silent, just add the audio
@@ -398,8 +428,7 @@ class VideoService:
                         output,
                         vcodec='libx264',  # Re-encode video if padded
                         acodec='aac',
-                        audio_bitrate='192k',
-                        t=target_duration  # Trim to target duration
+                        audio_bitrate='192k'
                     )
                     .overwrite_output()
                     .run(capture_stdout=True, capture_stderr=True)
@@ -426,8 +455,7 @@ class VideoService:
                         output,
                         vcodec='libx264',  # Re-encode video if padded
                         acodec='aac',
-                        audio_bitrate='192k',
-                        t=target_duration  # Trim to target duration
+                        audio_bitrate='192k'
                     )
                     .overwrite_output()
                     .run(capture_stdout=True, capture_stderr=True)
@@ -452,8 +480,7 @@ class VideoService:
                         output,
                         vcodec='libx264',  # Re-encode video if padded
                         acodec='aac',
-                        audio_bitrate='192k',
-                        t=target_duration  # Trim to target duration
+                        audio_bitrate='192k'
                     )
                     .overwrite_output()
                     .run(capture_stdout=True, capture_stderr=True)
diff --git a/pyproject.toml b/pyproject.toml
index 07c7eb7..d98dda3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
     "uvicorn[standard]>=0.32.0",
     "python-multipart>=0.0.12",
     "comfykit>=0.1.9",
+    "beautifulsoup4>=4.14.2",
 ]
 
 [project.optional-dependencies]
diff --git a/templates/1080x1080/minimal_framed.html b/templates/1080x1080/minimal_framed.html
index 5e8f20a..99a7212 100644
--- a/templates/1080x1080/minimal_framed.html
+++ b/templates/1080x1080/minimal_framed.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <meta name="viewport" content="width=1080, height=1080">
     <title>极简边框风格 - 1080x1080</title>
     <style>
diff --git a/templates/1080x1920/blur_card.html b/templates/1080x1920/blur_card.html
index 2986fd5..5848f53 100644
--- a/templates/1080x1920/blur_card.html
+++ b/templates/1080x1920/blur_card.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <meta name="viewport" content="width=1080, height=1920">
     <title>模糊背景卡片 - 1080x1920</title>
     <!-- Google Fonts - 中文字体 -->
diff --git a/templates/1080x1920/cartoon.html b/templates/1080x1920/cartoon.html
index 41dbf4a..2d6e742 100644
--- a/templates/1080x1920/cartoon.html
+++ b/templates/1080x1920/cartoon.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>{{title}}</title>
     <style>
diff --git a/templates/1080x1920/default.html b/templates/1080x1920/default.html
index 4fb40c8..2e0b26d 100644
--- a/templates/1080x1920/default.html
+++ b/templates/1080x1920/default.html
@@ -2,6 +2,8 @@
 <html>
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <style>
         html {
             margin: 0;
diff --git a/templates/1080x1920/elegant.html b/templates/1080x1920/elegant.html
index 04dc405..985899f 100644
--- a/templates/1080x1920/elegant.html
+++ b/templates/1080x1920/elegant.html
@@ -2,6 +2,8 @@
 <html>
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <style>
         html {
             margin: 0;
diff --git a/templates/1080x1920/fashion_vintage.html b/templates/1080x1920/fashion_vintage.html
index 7971dde..d78e4d6 100644
--- a/templates/1080x1920/fashion_vintage.html
+++ b/templates/1080x1920/fashion_vintage.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <meta name="viewport" content="width=1080, height=1920">
     <title>时尚复古风格 - 1080x1920</title>
     <style>
diff --git a/templates/1080x1920/full.html b/templates/1080x1920/full.html
index 59b1582..2ff1472 100644
--- a/templates/1080x1920/full.html
+++ b/templates/1080x1920/full.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <meta name="viewport" content="width=1080, height=1920">
     <title>全屏图片 - 1080x1920</title>
     <!-- Google Fonts - 中文字体 -->
diff --git a/templates/1080x1920/life_insights.html b/templates/1080x1920/life_insights.html
index 98b1aa0..716c003 100644
--- a/templates/1080x1920/life_insights.html
+++ b/templates/1080x1920/life_insights.html
@@ -2,6 +2,8 @@
 <html>
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <link rel="preconnect" href="https://fonts.googleapis.com">
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
     <link href="https://fonts.googleapis.com/css2?family=Ma+Shan+Zheng&family=ZCOOL+KuaiLe&display=swap" rel="stylesheet">
diff --git a/templates/1080x1920/modern.html b/templates/1080x1920/modern.html
index be16a96..de4217a 100644
--- a/templates/1080x1920/modern.html
+++ b/templates/1080x1920/modern.html
@@ -2,6 +2,8 @@
 <html>
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <style>
         html {
             margin: 0;
diff --git a/templates/1080x1920/neon.html b/templates/1080x1920/neon.html
index 2694607..b1154b5 100644
--- a/templates/1080x1920/neon.html
+++ b/templates/1080x1920/neon.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
   <meta charset="UTF-8" />
+  <meta name="template:media-width" content="1024">
+  <meta name="template:media-height" content="1024">
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <title>{{title}}</title>
   <style>
diff --git a/templates/1080x1920/psychology_card.html b/templates/1080x1920/psychology_card.html
index 91aded2..6c7ee3f 100644
--- a/templates/1080x1920/psychology_card.html
+++ b/templates/1080x1920/psychology_card.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <meta name="viewport" content="width=1080, height=1920">
     <title>心理卡片风 - 1080x1920</title>
     <style>
diff --git a/templates/1080x1920/purple.html b/templates/1080x1920/purple.html
index f159034..6d73ebf 100644
--- a/templates/1080x1920/purple.html
+++ b/templates/1080x1920/purple.html
@@ -2,6 +2,8 @@
 <html>
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <style>
         html {
             margin: 0;
diff --git a/templates/1080x1920/simple.html b/templates/1080x1920/simple.html
index 18a05ec..5d8f496 100644
--- a/templates/1080x1920/simple.html
+++ b/templates/1080x1920/simple.html
@@ -2,6 +2,8 @@
 <html>
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <style>
         html {
             margin: 0;
diff --git a/templates/1080x1920/video_simple.html b/templates/1080x1920/video_simple.html
new file mode 100644
index 0000000..d686000
--- /dev/null
+++ b/templates/1080x1920/video_simple.html
@@ -0,0 +1,185 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="template:media-width" content="512">
+    <meta name="template:media-height" content="288">
+    <style>
+        html {
+            margin: 0;
+            padding: 0;
+            height: 100%;
+        }
+        
+        body {
+            margin: 0;
+            padding: 0;
+            width: 100%;
+            height: 100vh;
+            font-family: 'PingFang SC', 'Source Han Sans', 'Microsoft YaHei', sans-serif;
+            overflow: hidden;
+            /* background-color: #000; */
+            display: flex;
+            justify-content: center;
+            align-items: center;
+        }
+        
+        /* 主容器 - 居中并包含所有内容 */
+        .main-container {
+            position: relative;
+            width: 1080px;
+            height: 1920px;
+        }
+        
+        /* Background image layer (customizable using <img> tag) */
+        .background-image {
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            z-index: 0;
+        }
+        
+        /* Video overlay - 相对于main-container居中 */
+        .video-overlay {
+            position: absolute;
+            top: 50%;
+            left: 50%;
+            transform: translate(-50%, -50%);
+            width: 1080px;
+            height: 607px;
+            /* background: #f00; */
+            z-index: 1;
+        }
+        
+        /* Title section - positioned above video */
+        .video-title-wrapper {
+            position: absolute;
+            top: calc(50% - 607px / 2 - 130px);
+            left: 50%;
+            transform: translateX(-50%);
+            max-width: 900px;
+            width: 900px;
+            text-align: center;
+            z-index: 2;
+        }
+        
+        .video-title {
+            font-size: 72px;
+            font-weight: 700;
+            color: #ffffff;
+            line-height: 1.3;
+            letter-spacing: 3px;
+            text-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
+            margin-bottom: 20px;
+        }
+        
+        /* 字幕区域 - 对齐视频底部 */
+        .content {
+            position: absolute;
+            bottom: calc(50% - 607px / 2 + 0px);
+            left: 50%;
+            transform: translateX(-50%);
+            width: 900px;
+            z-index: 4;
+        }
+        
+        .text {
+            font-size: 40px;
+            color: #ffffff;
+            text-align: center;
+            line-height: 1.6;
+            font-weight: 500;
+            text-shadow: 
+                2px 2px 4px rgba(0, 0, 0, 0.9),
+                0 0 8px rgba(0, 0, 0, 0.8),
+                0 0 16px rgba(0, 0, 0, 0.6);
+            padding: 10px 0px;
+            /* background-color: aqua; */
+        }
+        
+        /* Footer - positioned below video */
+        .footer {
+            position: absolute;
+            top: calc(50% + 607px / 2 + 50px);
+            left: 50%;
+            transform: translateX(-50%);
+            width: 900px;
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            padding-top: 40px;
+            border-top: 2px solid rgba(255, 255, 255, 0.3);
+            z-index: 2;
+        }
+        
+        .author-section {
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+        }
+        
+        .author {
+            font-size: 32px;
+            font-weight: 600;
+            color: #ffffff;
+            text-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
+        }
+        
+        .author-desc {
+            font-size: 24px;
+            color: rgba(255, 255, 255, 0.9);
+            font-weight: 400;
+        }
+        
+        .logo-section {
+            display: flex;
+            flex-direction: column;
+            align-items: flex-end;
+            gap: 10px;
+        }
+        
+        .logo {
+            font-size: 28px;
+            font-weight: 600;
+            color: #ffffff;
+            letter-spacing: 2px;
+            text-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
+        }
+    </style>
+</head>
+<body>
+    <!-- 主容器 - 所有元素都在这里面，相对于video-overlay定位 -->
+    <div class="main-container">
+        <!-- Background image layer (customizable via background parameter) -->
+        <div class="background-image">
+            
+        </div>
+        
+        <!-- Video overlay - 居中参考点 -->
+        <div class="video-overlay"></div>
+        
+        <!-- Video title - positioned above video -->
+        <div class="video-title-wrapper">
+            <div class="video-title">{{title}}</div>
+        </div>
+        
+        <!-- 字幕区域 - 独立定位在视频底部 -->
+        <div class="content">
+            <div class="text">{{text}}</div>
+        </div>
+        
+        <!-- Footer - positioned below video -->
+        <div class="footer">
+            <div class="author-section">
+                <div class="author">{{author=@Pixelle.AI}}</div>
+                <div class="author-desc">{{describe=Open Source Omnimodal AI Creative Agent}}</div>
+            </div>
+            <div class="logo-section">
+                <div class="logo">{{brand=Pixelle-Video}}</div>
+            </div>
+        </div>
+    </div>
+</body>
+</html>
\ No newline at end of file
diff --git a/templates/1920x1080/film.html b/templates/1920x1080/film.html
index 917fa0c..a16bd41 100644
--- a/templates/1920x1080/film.html
+++ b/templates/1920x1080/film.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <meta name="viewport" content="width=1920, height=1080">
     <title>视频模板 - 电影风格</title>
     <style>
diff --git a/templates/1920x1080/full.html b/templates/1920x1080/full.html
index 8ccb57f..9b6e193 100644
--- a/templates/1920x1080/full.html
+++ b/templates/1920x1080/full.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <meta name="viewport" content="width=1920, height=1080">
     <title>全屏图片 - 1920x1080</title>
     <!-- Google Fonts - 中文字体 -->
diff --git a/templates/1920x1080/ultrawide_minimal.html b/templates/1920x1080/ultrawide_minimal.html
index 76839f7..7dea2ae 100644
--- a/templates/1920x1080/ultrawide_minimal.html
+++ b/templates/1920x1080/ultrawide_minimal.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <meta name="viewport" content="width=1920, height=1080">
     <title>视频模板 - 极简风格</title>
     <style>
diff --git a/templates/1920x1080/wide_darktech.html b/templates/1920x1080/wide_darktech.html
index a6e54af..4098a4e 100644
--- a/templates/1920x1080/wide_darktech.html
+++ b/templates/1920x1080/wide_darktech.html
@@ -2,6 +2,8 @@
 <html lang="zh-CN">
 <head>
     <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
     <meta name="viewport" content="width=1920, height=1080">
     <title>视频模板 - 横屏科技风格</title>
     <style>
diff --git a/uv.lock b/uv.lock
index 98b1625..c7342c8 100644
--- a/uv.lock
+++ b/uv.lock
@@ -226,6 +226,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2f/eb/f25ad1a7726b2fe21005c3580b35fa7bfe09646faf7c8f41867747987a35/beartype-0.22.4-py3-none-any.whl", hash = "sha256:7967a1cee01fee42e47da69c58c92da10ba5bcfb8072686e48487be5201e3d10", size = 1318387 },
 ]
 
+[[package]]
+name = "beautifulsoup4"
+version = "4.14.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "soupsieve" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392 },
+]
+
 [[package]]
 name = "blinker"
 version = "1.9.0"
@@ -1653,6 +1666,7 @@ name = "pixelle-video"
 version = "0.1.2"
 source = { editable = "." }
 dependencies = [
+    { name = "beautifulsoup4" },
     { name = "certifi" },
     { name = "comfykit" },
     { name = "edge-tts" },
@@ -1680,6 +1694,7 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "beautifulsoup4", specifier = ">=4.14.2" },
     { name = "certifi", specifier = ">=2025.10.5" },
     { name = "comfykit", specifier = ">=0.1.9" },
     { name = "edge-tts", specifier = ">=7.2.3" },
@@ -2461,6 +2476,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
 ]
 
+[[package]]
+name = "soupsieve"
+version = "2.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679 },
+]
+
 [[package]]
 name = "sse-starlette"
 version = "3.0.3"
diff --git a/web/app.py b/web/app.py
index 8b26d2e..f39a8b1 100644
--- a/web/app.py
+++ b/web/app.py
@@ -782,6 +782,11 @@ def main():
             generator_for_params = HTMLFrameGenerator(template_path_for_params)
             custom_params_for_video = generator_for_params.parse_template_parameters()
             
+            # Get media size from template (for image/video generation)
+            media_width, media_height = generator_for_params.get_media_size()
+            st.session_state['template_media_width'] = media_width
+            st.session_state['template_media_height'] = media_height
+            
             # Detect template media type
             from pathlib import Path
             template_name = Path(frame_template).name
@@ -1023,43 +1028,18 @@ def main():
                 else:
                     workflow_key = "runninghub/image_flux.json"  # fallback
             
+                # Get media size from template
+                image_width = st.session_state.get('template_media_width', 1024)
+                image_height = st.session_state.get('template_media_height', 1024)
+                
+                # Display media size info (read-only)
+                if template_media_type == "video":
+                    size_info_text = tr('style.video_size_info', width=image_width, height=image_height)
+                else:
+                    size_info_text = tr('style.image_size_info', width=image_width, height=image_height)
+                st.info(f"📐 {size_info_text}")
             
-                # 2. Media size input
-                col1, col2 = st.columns(2)
-                with col1:
-                    if template_media_type == "video":
-                        width_label = tr('style.video_width')
-                        width_help = tr('style.video_width_help')
-                    else:
-                        width_label = tr('style.image_width')
-                        width_help = tr('style.image_width_help')
-                    
-                    image_width = st.number_input(
-                        width_label,
-                        min_value=128,
-                        value=1024,
-                        step=1,
-                        label_visibility="visible",
-                        help=width_help
-                    )
-                with col2:
-                    if template_media_type == "video":
-                        height_label = tr('style.video_height')
-                        height_help = tr('style.video_height_help')
-                    else:
-                        height_label = tr('style.image_height')
-                        height_help = tr('style.image_height_help')
-                    
-                    image_height = st.number_input(
-                        height_label,
-                        min_value=128,
-                        value=1024,
-                        step=1,
-                        label_visibility="visible",
-                        help=height_help
-                    )
-            
-                # 3. Prompt prefix input
+                # Prompt prefix input
                 # Get current prompt_prefix from config
                 current_prefix = comfyui_config["image"]["prompt_prefix"]
             
@@ -1152,10 +1132,12 @@ def main():
                 st.info("ℹ️ " + tr("image.not_required"))
                 st.caption(tr("image.not_required_hint"))
                 
+                # Get media size from template (even though not used, for consistency)
+                image_width = st.session_state.get('template_media_width', 1024)
+                image_height = st.session_state.get('template_media_height', 1024)
+                
                 # Set default values for later use
                 workflow_key = None
-                image_width = 1024
-                image_height = 1024
                 prompt_prefix = ""
         
 
@@ -1225,14 +1207,13 @@ def main():
                         progress_bar.progress(min(int(event.progress * 100), 99))  # Cap at 99% until complete
                     
                     # Generate video (directly pass parameters)
+                    # Note: image_width and image_height are now auto-determined from template
                     video_params = {
                         "text": text,
                         "mode": mode,
                         "title": title if title else None,
                         "n_scenes": n_scenes,
                         "image_workflow": workflow_key,
-                        "image_width": int(image_width),
-                        "image_height": int(image_height),
                         "frame_template": frame_template,
                         "prompt_prefix": prompt_prefix,
                         "bgm_path": bgm_path,
diff --git a/web/i18n/locales/en_US.json b/web/i18n/locales/en_US.json
index 36d75d2..32eef9a 100644
--- a/web/i18n/locales/en_US.json
+++ b/web/i18n/locales/en_US.json
@@ -49,17 +49,8 @@
     "style.workflow_how": "Place the exported image_xxx.json workflow file(API format) into the workflows/selfhost/ folder (for local ComfyUI) or the workflows/runninghub/ folder (for cloud)",
     "style.video_workflow_what": "Determines how each frame's video clip is generated and its effect (e.g., using different video generation models)",
     "style.video_workflow_how": "Place the exported video_xxx.json workflow file(API format) into the workflows/selfhost/ folder (for local ComfyUI) or the workflows/runninghub/ folder (for cloud)",
-    "style.image_size": "Image Size",
-    "style.image_width": "Width",
-    "style.image_height": "Height",
-    "style.image_width_help": "Width of AI-generated images (Note: This is the image size, not the final video size. Video size is determined by the template)",
-    "style.image_height_help": "Height of AI-generated images (Note: This is the image size, not the final video size. Video size is determined by the template)",
-    "style.video_width": "Video Width",
-    "style.video_height": "Video Height",
-    "style.video_width_help": "Width of AI-generated video (Note: This is the video clip size, will auto-adapt to template size)",
-    "style.video_height_help": "Height of AI-generated video (Note: This is the video clip size, will auto-adapt to template size)",
-    "style.image_size_note": "Image size controls the dimensions of AI-generated illustrations, and does not affect the final video size. Video size is determined by the Storyboard Template below.",
-    "style.video_size_note": "Video size will automatically adapt to the template size, no manual adjustment needed.",
+    "style.image_size_info": "Image Size: {width}x{height} (auto-determined by template)",
+    "style.video_size_info": "Video Size: {width}x{height} (auto-determined by template)",
     "style.prompt_prefix": "Prompt Prefix",
     "style.prompt_prefix_what": "Automatically added before all image prompts to control the illustration style uniformly (e.g., cartoon, realistic)",
     "style.prompt_prefix_how": "Enter style description in the input box below. To save permanently, edit the config.yaml file",
diff --git a/web/i18n/locales/zh_CN.json b/web/i18n/locales/zh_CN.json
index 348dac0..ebaff93 100644
--- a/web/i18n/locales/zh_CN.json
+++ b/web/i18n/locales/zh_CN.json
@@ -49,17 +49,8 @@
     "style.workflow_how": "将导出的 image_xxx.json 工作流文件（API格式）放入 workflows/selfhost/（本地 ComfyUI）或 workflows/runninghub/（云端）文件夹",
     "style.video_workflow_what": "决定视频中每帧视频片段的生成方式和效果（如使用不同的视频生成模型）",
     "style.video_workflow_how": "将导出的 video_xxx.json 工作流文件（API格式）放入 workflows/selfhost/（本地 ComfyUI）或 workflows/runninghub/（云端）文件夹",
-    "style.image_size": "图片尺寸",
-    "style.image_width": "宽度",
-    "style.image_height": "高度",
-    "style.image_width_help": "AI 生成插图的宽度（注意：这是插图尺寸，不是最终视频尺寸。视频尺寸由模板决定）",
-    "style.image_height_help": "AI 生成插图的高度（注意：这是插图尺寸，不是最终视频尺寸。视频尺寸由模板决定）",
-    "style.video_width": "视频宽度",
-    "style.video_height": "视频高度",
-    "style.video_width_help": "AI 生成视频的宽度（注意：这是视频片段尺寸，会自适应模板尺寸）",
-    "style.video_height_help": "AI 生成视频的高度（注意：这是视频片段尺寸，会自适应模板尺寸）",
-    "style.image_size_note": "图片尺寸控制 AI 生成的插图大小，不影响最终视频尺寸。视频尺寸由下方的「📐 分镜模板」决定。",
-    "style.video_size_note": "视频尺寸会自动适配模板尺寸，无需手动调整。",
+    "style.image_size_info": "插图尺寸：{width}x{height}（由模板自动决定）",
+    "style.video_size_info": "视频尺寸：{width}x{height}（由模板自动决定）",
     "style.prompt_prefix": "提示词前缀",
     "style.prompt_prefix_what": "自动添加到所有图片提示词前面，统一控制插图风格（如：卡通风格、写实风格等）",
     "style.prompt_prefix_how": "直接在下方输入框填写风格描述。若要永久保存，需编辑 config.yaml 文件",
diff --git a/workflows/runninghub/video_wan2.1_fusionx.json b/workflows/runninghub/video_wan2.1_fusionx.json
new file mode 100644
index 0000000..69c87a4
--- /dev/null
+++ b/workflows/runninghub/video_wan2.1_fusionx.json
@@ -0,0 +1,5 @@
+{
+  "source": "runninghub",
+  "workflow_id": "1985909483975188481"
+}
+
diff --git a/workflows/selfhost/video_wan2.1_fusionx.json b/workflows/selfhost/video_wan2.1_fusionx.json
new file mode 100644
index 0000000..b34a903
--- /dev/null
+++ b/workflows/selfhost/video_wan2.1_fusionx.json
@@ -0,0 +1,187 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 576600626757621,
+      "steps": 10,
+      "cfg": 1,
+      "sampler_name": "uni_pc",
+      "scheduler": "normal",
+      "denoise": 1,
+      "model": [
+        "48",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "7",
+        0
+      ],
+      "latent_image": [
+        "40",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "6": {
+    "inputs": {
+      "text": [
+        "49",
+        0
+      ],
+      "clip": [
+        "38",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Positive Prompt)"
+    }
+  },
+  "7": {
+    "inputs": {
+      "text": "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
+      "clip": [
+        "38",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Negative Prompt)"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "39",
+        0
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "30": {
+    "inputs": {
+      "frame_rate": 16,
+      "loop_count": 0,
+      "filename_prefix": "Video",
+      "format": "video/h264-mp4",
+      "pix_fmt": "yuv420p",
+      "crf": 19,
+      "save_metadata": true,
+      "trim_to_audio": false,
+      "pingpong": false,
+      "save_output": true,
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "VHS_VideoCombine",
+    "_meta": {
+      "title": "Video Combine 🎥🅥🅗🅢"
+    }
+  },
+  "37": {
+    "inputs": {
+      "unet_name": "wan-fusionx/WanT2V_MasterModel.safetensors",
+      "weight_dtype": "default"
+    },
+    "class_type": "UNETLoader",
+    "_meta": {
+      "title": "Load Diffusion Model"
+    }
+  },
+  "38": {
+    "inputs": {
+      "clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+      "type": "wan",
+      "device": "default"
+    },
+    "class_type": "CLIPLoader",
+    "_meta": {
+      "title": "Load CLIP"
+    }
+  },
+  "39": {
+    "inputs": {
+      "vae_name": "wan_2.1_vae.safetensors"
+    },
+    "class_type": "VAELoader",
+    "_meta": {
+      "title": "Load VAE"
+    }
+  },
+  "40": {
+    "inputs": {
+      "width": [
+        "50",
+        0
+      ],
+      "height": [
+        "51",
+        0
+      ],
+      "length": 81,
+      "batch_size": 1
+    },
+    "class_type": "EmptyHunyuanLatentVideo",
+    "_meta": {
+      "title": "EmptyHunyuanLatentVideo"
+    }
+  },
+  "48": {
+    "inputs": {
+      "shift": 1,
+      "model": [
+        "37",
+        0
+      ]
+    },
+    "class_type": "ModelSamplingSD3",
+    "_meta": {
+      "title": "Shift"
+    }
+  },
+  "49": {
+    "inputs": {
+      "value": "草地上有个小狗在奔跑"
+    },
+    "class_type": "PrimitiveStringMultiline",
+    "_meta": {
+      "title": "$prompt.value!"
+    }
+  },
+  "50": {
+    "inputs": {
+      "value": 512
+    },
+    "class_type": "easy int",
+    "_meta": {
+      "title": "$width.value"
+    }
+  },
+  "51": {
+    "inputs": {
+      "value": 288
+    },
+    "class_type": "easy int",
+    "_meta": {
+      "title": "$height.value"
+    }
+  }
+}
\ No newline at end of file