修复视频尺寸传参未生效的问题

2025-11-20 20:09:43 +08:00
parent 04f0754335
commit 7f904f6b19
10 changed files with 123 additions and 63 deletions
--- a/api/routers/resources.py
+++ b/api/routers/resources.py
@@ -76,12 +76,12 @@ async def list_tts_workflows(pixelle_video: PixelleVideoDep):
        raise HTTPException(status_code=500, detail=str(e))


-@router.get("/workflows/image", response_model=WorkflowListResponse)
-async def list_image_workflows(pixelle_video: PixelleVideoDep):
+@router.get("/workflows/media", response_model=WorkflowListResponse)
+async def list_media_workflows(pixelle_video: PixelleVideoDep):
    """
-    List available image generation workflows
+    List available media workflows (both image and video)
    
-    Returns list of image workflows from both RunningHub and self-hosted sources.
+    Returns list of all media workflows from both RunningHub and self-hosted sources.
    
    Example response:
    ```json
@@ -94,13 +94,41 @@ async def list_image_workflows(pixelle_video: PixelleVideoDep):
                "path": "workflows/runninghub/image_flux.json",
                "key": "runninghub/image_flux.json",
                "workflow_id": "123456"
+            },
+            {
+                "name": "video_wan2.1.json",
+                "display_name": "video_wan2.1.json - Runninghub",
+                "source": "runninghub",
+                "path": "workflows/runninghub/video_wan2.1.json",
+                "key": "runninghub/video_wan2.1.json",
+                "workflow_id": "123457"
            }
        ]
    }
    ```
    """
    try:
-        # Get all workflows from media service (image generation is handled by media service)
+        # Get all workflows from media service (includes both image and video)
+        all_workflows = pixelle_video.media.list_workflows()
+        
+        media_workflows = [WorkflowInfo(**wf) for wf in all_workflows]
+        
+        return WorkflowListResponse(workflows=media_workflows)
+        
+    except Exception as e:
+        logger.error(f"List media workflows error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# Keep old endpoint for backward compatibility
+@router.get("/workflows/image", response_model=WorkflowListResponse)
+async def list_image_workflows(pixelle_video: PixelleVideoDep):
+    """
+    List available image workflows (deprecated, use /workflows/media instead)
+    
+    This endpoint is kept for backward compatibility but will filter to image_ workflows only.
+    """
+    try:
        all_workflows = pixelle_video.media.list_workflows()
        
        # Filter to image workflows only (filename starts with "image_")
--- a/api/routers/video.py
+++ b/api/routers/video.py
@@ -63,6 +63,17 @@ async def generate_video_sync(
    try:
        logger.info(f"Sync video generation: {request_body.text[:50]}...")
        
+        # Auto-determine media_width and media_height from template meta tags (required)
+        if not request_body.frame_template:
+            raise ValueError("frame_template is required to determine media size")
+        
+        from pixelle_video.services.frame_html import HTMLFrameGenerator
+        from pixelle_video.utils.template_util import resolve_template_path
+        template_path = resolve_template_path(request_body.frame_template)
+        generator = HTMLFrameGenerator(template_path)
+        media_width, media_height = generator.get_media_size()
+        logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}")
+        
        # Build video generation parameters
        video_params = {
            "text": request_body.text,
@@ -73,8 +84,9 @@ async def generate_video_sync(
            "max_narration_words": request_body.max_narration_words,
            "min_image_prompt_words": request_body.min_image_prompt_words,
            "max_image_prompt_words": request_body.max_image_prompt_words,
-            # Note: image_width and image_height are now auto-determined from template
-            "image_workflow": request_body.image_workflow,
+            "media_width": media_width,
+            "media_height": media_height,
+            "media_workflow": request_body.media_workflow,
            "video_fps": request_body.video_fps,
            "frame_template": request_body.frame_template,
            "prompt_prefix": request_body.prompt_prefix,
@@ -150,6 +162,17 @@ async def generate_video_async(
        # Define async execution function
        async def execute_video_generation():
            """Execute video generation in background"""
+            # Auto-determine media_width and media_height from template meta tags (required)
+            if not request_body.frame_template:
+                raise ValueError("frame_template is required to determine media size")
+            
+            from pixelle_video.services.frame_html import HTMLFrameGenerator
+            from pixelle_video.utils.template_util import resolve_template_path
+            template_path = resolve_template_path(request_body.frame_template)
+            generator = HTMLFrameGenerator(template_path)
+            media_width, media_height = generator.get_media_size()
+            logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}")
+            
            # Build video generation parameters
            video_params = {
                "text": request_body.text,
@@ -160,8 +183,9 @@ async def generate_video_async(
                "max_narration_words": request_body.max_narration_words,
                "min_image_prompt_words": request_body.min_image_prompt_words,
                "max_image_prompt_words": request_body.max_image_prompt_words,
-                # Note: image_width and image_height are now auto-determined from template
-                "image_workflow": request_body.image_workflow,
+                "media_width": media_width,
+                "media_height": media_height,
+                "media_workflow": request_body.media_workflow,
                "video_fps": request_body.video_fps,
                "frame_template": request_body.frame_template,
                "prompt_prefix": request_body.prompt_prefix,
--- a/api/schemas/video.py
+++ b/api/schemas/video.py
@@ -56,9 +56,9 @@ class VideoGenerateRequest(BaseModel):
    min_image_prompt_words: int = Field(30, ge=10, le=100, description="Min image prompt words")
    max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words")
    
-    # === Image Parameters ===
-    # Note: image_width and image_height are now auto-determined from template meta tags
-    image_workflow: Optional[str] = Field(None, description="Custom image workflow")
+    # === Media Parameters ===
+    # Note: media_width and media_height are auto-determined from template meta tags
+    media_workflow: Optional[str] = Field(None, description="Custom media workflow (image or video)")
    
    # === Video Parameters ===
    video_fps: int = Field(30, ge=15, le=60, description="Video FPS")
--- a/pixelle_video/models/storyboard.py
+++ b/pixelle_video/models/storyboard.py
@@ -23,6 +23,10 @@ from typing import List, Optional, Dict, Any
 class StoryboardConfig:
    """Storyboard configuration parameters"""
    
+    # Required parameters (must come first in dataclass)
+    media_width: int                           # Media width (image or video, required)
+    media_height: int                          # Media height (image or video, required)
+    
    # Task isolation
    task_id: Optional[str] = None              # Task ID for file isolation (auto-generated if None)
    
@@ -42,10 +46,8 @@ class StoryboardConfig:
    tts_speed: Optional[float] = None          # TTS speed multiplier (0.5-2.0, 1.0 = normal)
    ref_audio: Optional[str] = None            # Reference audio for voice cloning (ComfyUI mode only)
    
-    # Image parameters
-    image_width: int = 1024
-    image_height: int = 1024
-    image_workflow: Optional[str] = None       # Image workflow filename (None = use default)
+    # Media workflow
+    media_workflow: Optional[str] = None       # Media workflow filename (image or video, None = use default)
    
    # Frame template (includes size information in path)
    frame_template: str = "1080x1920/default.html"  # Template path with size (e.g., "1080x1920/default.html")
--- a/pixelle_video/pipelines/custom.py
+++ b/pixelle_video/pipelines/custom.py
@@ -93,8 +93,8 @@ class CustomPipeline(BasePipeline):
        tts_speed: float = 1.2,
        ref_audio: Optional[str] = None,
        
-        image_workflow: Optional[str] = None,
-        # Note: image_width and image_height are now auto-determined from template
+        media_workflow: Optional[str] = None,
+        # Note: media_width and media_height are auto-determined from template
        
        frame_template: Optional[str] = None,
        video_fps: int = 30,
@@ -189,8 +189,8 @@ class CustomPipeline(BasePipeline):
        # Read media size from template meta tags
        template_path = resolve_template_path(frame_template)
        generator = HTMLFrameGenerator(template_path)
-        image_width, image_height = generator.get_media_size()
-        logger.info(f"📐 Media size from template: {image_width}x{image_height}")
+        media_width, media_height = generator.get_media_size()
+        logger.info(f"📐 Media size from template: {media_width}x{media_height}")
        
        if template_type == "image":
            logger.info(f"📸 Template requires image generation")
@@ -270,9 +270,9 @@ class CustomPipeline(BasePipeline):
            tts_workflow=final_tts_workflow,  # Use processed workflow
            tts_speed=tts_speed,
            ref_audio=ref_audio,
-            image_width=image_width,
-            image_height=image_height,
-            image_workflow=image_workflow,
+            media_width=media_width,
+            media_height=media_height,
+            media_workflow=media_workflow,
            frame_template=frame_template
        )
        
@@ -387,7 +387,7 @@ class CustomPipeline(BasePipeline):
                    "tts_workflow": tts_workflow,
                    "tts_speed": tts_speed,
                    "ref_audio": ref_audio,
-                    "image_workflow": image_workflow,
+                    "media_workflow": media_workflow,
                    "frame_template": frame_template,
                    "bgm_path": bgm_path,
                    "bgm_volume": bgm_volume,
--- a/pixelle_video/pipelines/standard.py
+++ b/pixelle_video/pipelines/standard.py
@@ -68,8 +68,10 @@ class StandardPipeline(BasePipeline):
    
    async def __call__(
        self,
-        # === Input ===
+        # === Input (Required) ===
        text: str,
+        media_width: int,  # Required: Media width (from template)
+        media_height: int,  # Required: Media height (from template)
        
        # === Processing Mode ===
        mode: Literal["generate", "fixed"] = "generate",
@@ -95,10 +97,8 @@ class StandardPipeline(BasePipeline):
        min_image_prompt_words: int = 30,
        max_image_prompt_words: int = 60,
        
-        # === Image Parameters ===
-        image_width: int = 1024,
-        image_height: int = 1024,
-        image_workflow: Optional[str] = None,
+        # === Media Workflow ===
+        media_workflow: Optional[str] = None,
        
        # === Video Parameters ===
        video_fps: int = 30,
@@ -155,9 +155,9 @@ class StandardPipeline(BasePipeline):
            min_image_prompt_words: Min image prompt length
            max_image_prompt_words: Max image prompt length
            
-            image_width: Generated image width (default 1024)
-            image_height: Generated image height (default 1024)
-            image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)
+            media_width: Media width (image or video, required)
+            media_height: Media height (image or video, required)
+            media_workflow: Media workflow filename (image or video, e.g., "image_flux.json", "video_wan.json", None = use default)
            
            video_fps: Video frame rate (default 30)
            
@@ -254,9 +254,9 @@ class StandardPipeline(BasePipeline):
            tts_workflow=final_tts_workflow,  # Use processed workflow
            tts_speed=tts_speed,
            ref_audio=ref_audio,
-            image_width=image_width,
-            image_height=image_height,
-            image_workflow=image_workflow,
+            media_width=media_width,
+            media_height=media_height,
+            media_workflow=media_workflow,
            frame_template=frame_template or "1080x1920/default.html",
            template_params=template_params  # Custom template parameters
        )
@@ -374,13 +374,13 @@ class StandardPipeline(BasePipeline):
            # Enable parallel if either TTS or Image uses RunningHub (most time-consuming parts)
            is_runninghub = (
                (config.tts_workflow and config.tts_workflow.startswith("runninghub/")) or
-                (config.image_workflow and config.image_workflow.startswith("runninghub/"))
+                (config.media_workflow and config.media_workflow.startswith("runninghub/"))
            )
            
            if is_runninghub and RUNNING_HUB_PARALLEL_LIMIT > 1:
                logger.info(f"🚀 Using parallel processing for RunningHub workflows (max {RUNNING_HUB_PARALLEL_LIMIT} concurrent)")
                logger.info(f"   TTS: {'runninghub' if config.tts_workflow and config.tts_workflow.startswith('runninghub/') else 'local'}")
-                logger.info(f"   Image: {'runninghub' if config.image_workflow and config.image_workflow.startswith('runninghub/') else 'local'}")
+                logger.info(f"   Media: {'runninghub' if config.media_workflow and config.media_workflow.startswith('runninghub/') else 'local'}")
                
                semaphore = asyncio.Semaphore(RUNNING_HUB_PARALLEL_LIMIT)
                completed_count = 0
@@ -541,7 +541,7 @@ class StandardPipeline(BasePipeline):
                    "tts_workflow": tts_workflow,
                    "tts_speed": tts_speed,
                    "ref_audio": ref_audio,
-                    "image_workflow": image_workflow,
+                    "media_workflow": media_workflow,
                    "prompt_prefix": prompt_prefix,
                    "frame_template": frame_template,
                    "template_params": template_params,
--- a/pixelle_video/services/frame_processor.py
+++ b/pixelle_video/services/frame_processor.py
@@ -187,7 +187,7 @@ class FrameProcessor:
        
        # Determine media type based on workflow
        # video_ prefix in workflow name indicates video generation
-        workflow_name = config.image_workflow or ""
+        workflow_name = config.media_workflow or ""
        is_video_workflow = "video_" in workflow_name.lower()
        media_type = "video" if is_video_workflow else "image"
        
@@ -196,10 +196,10 @@ class FrameProcessor:
        # Call Media generation (with optional preset)
        media_result = await self.core.media(
            prompt=frame.image_prompt,
-            workflow=config.image_workflow,  # Pass workflow from config (None = use default)
+            workflow=config.media_workflow,  # Pass workflow from config (None = use default)
            media_type=media_type,
-            width=config.image_width,
-            height=config.image_height
+            width=config.media_width,
+            height=config.media_height
        )
        
        # Store media type
--- a/pixelle_video/services/persistence.py
+++ b/pixelle_video/services/persistence.py
@@ -380,9 +380,9 @@ class PersistenceService:
            "tts_workflow": config.tts_workflow,
            "tts_speed": config.tts_speed,
            "ref_audio": config.ref_audio,
-            "image_width": config.image_width,
-            "image_height": config.image_height,
-            "image_workflow": config.image_workflow,
+            "media_width": config.media_width,
+            "media_height": config.media_height,
+            "media_workflow": config.media_workflow,
            "frame_template": config.frame_template,
            "template_params": config.template_params,
        }
@@ -402,9 +402,9 @@ class PersistenceService:
            tts_workflow=data.get("tts_workflow"),
            tts_speed=data.get("tts_speed"),
            ref_audio=data.get("ref_audio"),
-            image_width=data.get("image_width", 1024),
-            image_height=data.get("image_height", 1024),
-            image_workflow=data.get("image_workflow"),
+            media_width=data.get("media_width", data.get("image_width", 1024)),  # Backward compatibility
+            media_height=data.get("media_height", data.get("image_height", 1024)),  # Backward compatibility
+            media_workflow=data.get("media_workflow", data.get("image_workflow")),  # Backward compatibility
            frame_template=data.get("frame_template", "1080x1920/default.html"),
            template_params=data.get("template_params"),
        )
--- a/web/components/output_preview.py
+++ b/web/components/output_preview.py
@@ -58,7 +58,7 @@ def render_single_output(pixelle_video, video_params):
    
    frame_template = video_params.get("frame_template")
    custom_values_for_video = video_params.get("template_params", {})
-    workflow_key = video_params.get("image_workflow")
+    workflow_key = video_params.get("media_workflow")
    prompt_prefix = video_params.get("prompt_prefix", "")
    
    with st.container(border=True):
@@ -123,18 +123,20 @@ def render_single_output(pixelle_video, video_params):
                    progress_bar.progress(min(int(event.progress * 100), 99))  # Cap at 99% until complete
                
                # Generate video (directly pass parameters)
-                # Note: image_width and image_height are now auto-determined from template
+                # Note: media_width and media_height are auto-determined from template
                video_params = {
                    "text": text,
                    "mode": mode,
                    "title": title if title else None,
                    "n_scenes": n_scenes,
-                    "image_workflow": workflow_key,
+                    "media_workflow": workflow_key,
                    "frame_template": frame_template,
                    "prompt_prefix": prompt_prefix,
                    "bgm_path": bgm_path,
                    "bgm_volume": bgm_volume if bgm_path else 0.2,
                    "progress_callback": update_progress,
+                    "media_width": st.session_state.get('template_media_width'),
+                    "media_height": st.session_state.get('template_media_height'),
                }
                
                # Add TTS parameters based on mode
@@ -245,12 +247,14 @@ def render_batch_output(pixelle_video, video_params):
            shared_config = {
                "title_prefix": video_params.get("title_prefix"),
                "n_scenes": video_params.get("n_scenes") or 5,
-                "image_workflow": video_params.get("image_workflow"),
+                "media_workflow": video_params.get("media_workflow"),
                "frame_template": video_params.get("frame_template"),
                "prompt_prefix": video_params.get("prompt_prefix") or "",
                "bgm_path": video_params.get("bgm_path"),
                "bgm_volume": video_params.get("bgm_volume") or 0.2,
                "tts_inference_mode": video_params.get("tts_inference_mode") or "local",
+                "media_width": video_params.get("media_width"),
+                "media_height": video_params.get("media_height"),
            }
            
            # Add TTS parameters based on mode (only add non-None values)
--- a/web/components/style_config.py
+++ b/web/components/style_config.py
@@ -610,7 +610,7 @@ def render_style_config(pixelle_video):
                workflow_options if workflow_options else ["No workflows found"],
                index=default_workflow_index,
                label_visibility="collapsed",
-                key="image_workflow_select"
+                key="media_workflow_select"
            )
        
            # Get the actual workflow key (e.g., "runninghub/image_flux.json")
@@ -621,14 +621,14 @@ def render_style_config(pixelle_video):
                workflow_key = "runninghub/image_flux.json"  # fallback
        
            # Get media size from template
-            image_width = st.session_state.get('template_media_width', 1024)
-            image_height = st.session_state.get('template_media_height', 1024)
+            media_width = st.session_state.get('template_media_width')
+            media_height = st.session_state.get('template_media_height')
            
            # Display media size info (read-only)
            if template_media_type == "video":
-                size_info_text = tr('style.video_size_info', width=image_width, height=image_height)
+                size_info_text = tr('style.video_size_info', width=media_width, height=media_height)
            else:
-                size_info_text = tr('style.image_size_info', width=image_width, height=image_height)
+                size_info_text = tr('style.image_size_info', width=media_width, height=media_height)
            st.info(f"📐 {size_info_text}")
        
            # Prompt prefix input
@@ -679,8 +679,8 @@ def render_style_config(pixelle_video):
                                prompt=final_prompt,
                                workflow=workflow_key,
                                media_type=template_media_type,
-                                width=int(image_width),
-                                height=int(image_height)
+                                width=int(media_width),
+                                height=int(media_height)
                            ))
                            preview_media_path = media_result.url
                        
@@ -725,8 +725,8 @@ def render_style_config(pixelle_video):
            st.caption(tr("image.not_required_hint"))
            
            # Get media size from template (even though not used, for consistency)
-            image_width = st.session_state.get('template_media_width', 1024)
-            image_height = st.session_state.get('template_media_height', 1024)
+            media_width = st.session_state.get('template_media_width')
+            media_height = st.session_state.get('template_media_height')
            
            # Set default values for later use
            workflow_key = None
@@ -741,6 +741,8 @@ def render_style_config(pixelle_video):
        "ref_audio": str(ref_audio_path) if ref_audio_path else None,
        "frame_template": frame_template,
        "template_params": custom_values_for_video if custom_values_for_video else None,
-        "image_workflow": workflow_key,
-        "prompt_prefix": prompt_prefix if prompt_prefix else ""
+        "media_workflow": workflow_key,
+        "prompt_prefix": prompt_prefix if prompt_prefix else "",
+        "media_width": media_width,
+        "media_height": media_height
    }