修复视频尺寸传参未生效的问题

This commit is contained in:
puke
2025-11-20 20:09:43 +08:00
parent 04f0754335
commit 7f904f6b19
10 changed files with 123 additions and 63 deletions

View File

@@ -76,12 +76,12 @@ async def list_tts_workflows(pixelle_video: PixelleVideoDep):
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@router.get("/workflows/image", response_model=WorkflowListResponse) @router.get("/workflows/media", response_model=WorkflowListResponse)
async def list_image_workflows(pixelle_video: PixelleVideoDep): async def list_media_workflows(pixelle_video: PixelleVideoDep):
""" """
List available image generation workflows List available media workflows (both image and video)
Returns list of image workflows from both RunningHub and self-hosted sources. Returns list of all media workflows from both RunningHub and self-hosted sources.
Example response: Example response:
```json ```json
@@ -94,13 +94,41 @@ async def list_image_workflows(pixelle_video: PixelleVideoDep):
"path": "workflows/runninghub/image_flux.json", "path": "workflows/runninghub/image_flux.json",
"key": "runninghub/image_flux.json", "key": "runninghub/image_flux.json",
"workflow_id": "123456" "workflow_id": "123456"
},
{
"name": "video_wan2.1.json",
"display_name": "video_wan2.1.json - Runninghub",
"source": "runninghub",
"path": "workflows/runninghub/video_wan2.1.json",
"key": "runninghub/video_wan2.1.json",
"workflow_id": "123457"
} }
] ]
} }
``` ```
""" """
try: try:
# Get all workflows from media service (image generation is handled by media service) # Get all workflows from media service (includes both image and video)
all_workflows = pixelle_video.media.list_workflows()
media_workflows = [WorkflowInfo(**wf) for wf in all_workflows]
return WorkflowListResponse(workflows=media_workflows)
except Exception as e:
logger.error(f"List media workflows error: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Keep old endpoint for backward compatibility
@router.get("/workflows/image", response_model=WorkflowListResponse)
async def list_image_workflows(pixelle_video: PixelleVideoDep):
"""
List available image workflows (deprecated, use /workflows/media instead)
This endpoint is kept for backward compatibility but will filter to image_ workflows only.
"""
try:
all_workflows = pixelle_video.media.list_workflows() all_workflows = pixelle_video.media.list_workflows()
# Filter to image workflows only (filename starts with "image_") # Filter to image workflows only (filename starts with "image_")

View File

@@ -63,6 +63,17 @@ async def generate_video_sync(
try: try:
logger.info(f"Sync video generation: {request_body.text[:50]}...") logger.info(f"Sync video generation: {request_body.text[:50]}...")
# Auto-determine media_width and media_height from template meta tags (required)
if not request_body.frame_template:
raise ValueError("frame_template is required to determine media size")
from pixelle_video.services.frame_html import HTMLFrameGenerator
from pixelle_video.utils.template_util import resolve_template_path
template_path = resolve_template_path(request_body.frame_template)
generator = HTMLFrameGenerator(template_path)
media_width, media_height = generator.get_media_size()
logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}")
# Build video generation parameters # Build video generation parameters
video_params = { video_params = {
"text": request_body.text, "text": request_body.text,
@@ -73,8 +84,9 @@ async def generate_video_sync(
"max_narration_words": request_body.max_narration_words, "max_narration_words": request_body.max_narration_words,
"min_image_prompt_words": request_body.min_image_prompt_words, "min_image_prompt_words": request_body.min_image_prompt_words,
"max_image_prompt_words": request_body.max_image_prompt_words, "max_image_prompt_words": request_body.max_image_prompt_words,
# Note: image_width and image_height are now auto-determined from template "media_width": media_width,
"image_workflow": request_body.image_workflow, "media_height": media_height,
"media_workflow": request_body.media_workflow,
"video_fps": request_body.video_fps, "video_fps": request_body.video_fps,
"frame_template": request_body.frame_template, "frame_template": request_body.frame_template,
"prompt_prefix": request_body.prompt_prefix, "prompt_prefix": request_body.prompt_prefix,
@@ -150,6 +162,17 @@ async def generate_video_async(
# Define async execution function # Define async execution function
async def execute_video_generation(): async def execute_video_generation():
"""Execute video generation in background""" """Execute video generation in background"""
# Auto-determine media_width and media_height from template meta tags (required)
if not request_body.frame_template:
raise ValueError("frame_template is required to determine media size")
from pixelle_video.services.frame_html import HTMLFrameGenerator
from pixelle_video.utils.template_util import resolve_template_path
template_path = resolve_template_path(request_body.frame_template)
generator = HTMLFrameGenerator(template_path)
media_width, media_height = generator.get_media_size()
logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}")
# Build video generation parameters # Build video generation parameters
video_params = { video_params = {
"text": request_body.text, "text": request_body.text,
@@ -160,8 +183,9 @@ async def generate_video_async(
"max_narration_words": request_body.max_narration_words, "max_narration_words": request_body.max_narration_words,
"min_image_prompt_words": request_body.min_image_prompt_words, "min_image_prompt_words": request_body.min_image_prompt_words,
"max_image_prompt_words": request_body.max_image_prompt_words, "max_image_prompt_words": request_body.max_image_prompt_words,
# Note: image_width and image_height are now auto-determined from template "media_width": media_width,
"image_workflow": request_body.image_workflow, "media_height": media_height,
"media_workflow": request_body.media_workflow,
"video_fps": request_body.video_fps, "video_fps": request_body.video_fps,
"frame_template": request_body.frame_template, "frame_template": request_body.frame_template,
"prompt_prefix": request_body.prompt_prefix, "prompt_prefix": request_body.prompt_prefix,

View File

@@ -56,9 +56,9 @@ class VideoGenerateRequest(BaseModel):
min_image_prompt_words: int = Field(30, ge=10, le=100, description="Min image prompt words") min_image_prompt_words: int = Field(30, ge=10, le=100, description="Min image prompt words")
max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words") max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words")
# === Image Parameters === # === Media Parameters ===
# Note: image_width and image_height are now auto-determined from template meta tags # Note: media_width and media_height are auto-determined from template meta tags
image_workflow: Optional[str] = Field(None, description="Custom image workflow") media_workflow: Optional[str] = Field(None, description="Custom media workflow (image or video)")
# === Video Parameters === # === Video Parameters ===
video_fps: int = Field(30, ge=15, le=60, description="Video FPS") video_fps: int = Field(30, ge=15, le=60, description="Video FPS")

View File

@@ -23,6 +23,10 @@ from typing import List, Optional, Dict, Any
class StoryboardConfig: class StoryboardConfig:
"""Storyboard configuration parameters""" """Storyboard configuration parameters"""
# Required parameters (must come first in dataclass)
media_width: int # Media width (image or video, required)
media_height: int # Media height (image or video, required)
# Task isolation # Task isolation
task_id: Optional[str] = None # Task ID for file isolation (auto-generated if None) task_id: Optional[str] = None # Task ID for file isolation (auto-generated if None)
@@ -42,10 +46,8 @@ class StoryboardConfig:
tts_speed: Optional[float] = None # TTS speed multiplier (0.5-2.0, 1.0 = normal) tts_speed: Optional[float] = None # TTS speed multiplier (0.5-2.0, 1.0 = normal)
ref_audio: Optional[str] = None # Reference audio for voice cloning (ComfyUI mode only) ref_audio: Optional[str] = None # Reference audio for voice cloning (ComfyUI mode only)
# Image parameters # Media workflow
image_width: int = 1024 media_workflow: Optional[str] = None # Media workflow filename (image or video, None = use default)
image_height: int = 1024
image_workflow: Optional[str] = None # Image workflow filename (None = use default)
# Frame template (includes size information in path) # Frame template (includes size information in path)
frame_template: str = "1080x1920/default.html" # Template path with size (e.g., "1080x1920/default.html") frame_template: str = "1080x1920/default.html" # Template path with size (e.g., "1080x1920/default.html")

View File

@@ -93,8 +93,8 @@ class CustomPipeline(BasePipeline):
tts_speed: float = 1.2, tts_speed: float = 1.2,
ref_audio: Optional[str] = None, ref_audio: Optional[str] = None,
image_workflow: Optional[str] = None, media_workflow: Optional[str] = None,
# Note: image_width and image_height are now auto-determined from template # Note: media_width and media_height are auto-determined from template
frame_template: Optional[str] = None, frame_template: Optional[str] = None,
video_fps: int = 30, video_fps: int = 30,
@@ -189,8 +189,8 @@ class CustomPipeline(BasePipeline):
# Read media size from template meta tags # Read media size from template meta tags
template_path = resolve_template_path(frame_template) template_path = resolve_template_path(frame_template)
generator = HTMLFrameGenerator(template_path) generator = HTMLFrameGenerator(template_path)
image_width, image_height = generator.get_media_size() media_width, media_height = generator.get_media_size()
logger.info(f"📐 Media size from template: {image_width}x{image_height}") logger.info(f"📐 Media size from template: {media_width}x{media_height}")
if template_type == "image": if template_type == "image":
logger.info(f"📸 Template requires image generation") logger.info(f"📸 Template requires image generation")
@@ -270,9 +270,9 @@ class CustomPipeline(BasePipeline):
tts_workflow=final_tts_workflow, # Use processed workflow tts_workflow=final_tts_workflow, # Use processed workflow
tts_speed=tts_speed, tts_speed=tts_speed,
ref_audio=ref_audio, ref_audio=ref_audio,
image_width=image_width, media_width=media_width,
image_height=image_height, media_height=media_height,
image_workflow=image_workflow, media_workflow=media_workflow,
frame_template=frame_template frame_template=frame_template
) )
@@ -387,7 +387,7 @@ class CustomPipeline(BasePipeline):
"tts_workflow": tts_workflow, "tts_workflow": tts_workflow,
"tts_speed": tts_speed, "tts_speed": tts_speed,
"ref_audio": ref_audio, "ref_audio": ref_audio,
"image_workflow": image_workflow, "media_workflow": media_workflow,
"frame_template": frame_template, "frame_template": frame_template,
"bgm_path": bgm_path, "bgm_path": bgm_path,
"bgm_volume": bgm_volume, "bgm_volume": bgm_volume,

View File

@@ -68,8 +68,10 @@ class StandardPipeline(BasePipeline):
async def __call__( async def __call__(
self, self,
# === Input === # === Input (Required) ===
text: str, text: str,
media_width: int, # Required: Media width (from template)
media_height: int, # Required: Media height (from template)
# === Processing Mode === # === Processing Mode ===
mode: Literal["generate", "fixed"] = "generate", mode: Literal["generate", "fixed"] = "generate",
@@ -95,10 +97,8 @@ class StandardPipeline(BasePipeline):
min_image_prompt_words: int = 30, min_image_prompt_words: int = 30,
max_image_prompt_words: int = 60, max_image_prompt_words: int = 60,
# === Image Parameters === # === Media Workflow ===
image_width: int = 1024, media_workflow: Optional[str] = None,
image_height: int = 1024,
image_workflow: Optional[str] = None,
# === Video Parameters === # === Video Parameters ===
video_fps: int = 30, video_fps: int = 30,
@@ -155,9 +155,9 @@ class StandardPipeline(BasePipeline):
min_image_prompt_words: Min image prompt length min_image_prompt_words: Min image prompt length
max_image_prompt_words: Max image prompt length max_image_prompt_words: Max image prompt length
image_width: Generated image width (default 1024) media_width: Media width (image or video, required)
image_height: Generated image height (default 1024) media_height: Media height (image or video, required)
image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default) media_workflow: Media workflow filename (image or video, e.g., "image_flux.json", "video_wan.json", None = use default)
video_fps: Video frame rate (default 30) video_fps: Video frame rate (default 30)
@@ -254,9 +254,9 @@ class StandardPipeline(BasePipeline):
tts_workflow=final_tts_workflow, # Use processed workflow tts_workflow=final_tts_workflow, # Use processed workflow
tts_speed=tts_speed, tts_speed=tts_speed,
ref_audio=ref_audio, ref_audio=ref_audio,
image_width=image_width, media_width=media_width,
image_height=image_height, media_height=media_height,
image_workflow=image_workflow, media_workflow=media_workflow,
frame_template=frame_template or "1080x1920/default.html", frame_template=frame_template or "1080x1920/default.html",
template_params=template_params # Custom template parameters template_params=template_params # Custom template parameters
) )
@@ -374,13 +374,13 @@ class StandardPipeline(BasePipeline):
# Enable parallel if either TTS or Image uses RunningHub (most time-consuming parts) # Enable parallel if either TTS or Image uses RunningHub (most time-consuming parts)
is_runninghub = ( is_runninghub = (
(config.tts_workflow and config.tts_workflow.startswith("runninghub/")) or (config.tts_workflow and config.tts_workflow.startswith("runninghub/")) or
(config.image_workflow and config.image_workflow.startswith("runninghub/")) (config.media_workflow and config.media_workflow.startswith("runninghub/"))
) )
if is_runninghub and RUNNING_HUB_PARALLEL_LIMIT > 1: if is_runninghub and RUNNING_HUB_PARALLEL_LIMIT > 1:
logger.info(f"🚀 Using parallel processing for RunningHub workflows (max {RUNNING_HUB_PARALLEL_LIMIT} concurrent)") logger.info(f"🚀 Using parallel processing for RunningHub workflows (max {RUNNING_HUB_PARALLEL_LIMIT} concurrent)")
logger.info(f" TTS: {'runninghub' if config.tts_workflow and config.tts_workflow.startswith('runninghub/') else 'local'}") logger.info(f" TTS: {'runninghub' if config.tts_workflow and config.tts_workflow.startswith('runninghub/') else 'local'}")
logger.info(f" Image: {'runninghub' if config.image_workflow and config.image_workflow.startswith('runninghub/') else 'local'}") logger.info(f" Media: {'runninghub' if config.media_workflow and config.media_workflow.startswith('runninghub/') else 'local'}")
semaphore = asyncio.Semaphore(RUNNING_HUB_PARALLEL_LIMIT) semaphore = asyncio.Semaphore(RUNNING_HUB_PARALLEL_LIMIT)
completed_count = 0 completed_count = 0
@@ -541,7 +541,7 @@ class StandardPipeline(BasePipeline):
"tts_workflow": tts_workflow, "tts_workflow": tts_workflow,
"tts_speed": tts_speed, "tts_speed": tts_speed,
"ref_audio": ref_audio, "ref_audio": ref_audio,
"image_workflow": image_workflow, "media_workflow": media_workflow,
"prompt_prefix": prompt_prefix, "prompt_prefix": prompt_prefix,
"frame_template": frame_template, "frame_template": frame_template,
"template_params": template_params, "template_params": template_params,

View File

@@ -187,7 +187,7 @@ class FrameProcessor:
# Determine media type based on workflow # Determine media type based on workflow
# video_ prefix in workflow name indicates video generation # video_ prefix in workflow name indicates video generation
workflow_name = config.image_workflow or "" workflow_name = config.media_workflow or ""
is_video_workflow = "video_" in workflow_name.lower() is_video_workflow = "video_" in workflow_name.lower()
media_type = "video" if is_video_workflow else "image" media_type = "video" if is_video_workflow else "image"
@@ -196,10 +196,10 @@ class FrameProcessor:
# Call Media generation (with optional preset) # Call Media generation (with optional preset)
media_result = await self.core.media( media_result = await self.core.media(
prompt=frame.image_prompt, prompt=frame.image_prompt,
workflow=config.image_workflow, # Pass workflow from config (None = use default) workflow=config.media_workflow, # Pass workflow from config (None = use default)
media_type=media_type, media_type=media_type,
width=config.image_width, width=config.media_width,
height=config.image_height height=config.media_height
) )
# Store media type # Store media type

View File

@@ -380,9 +380,9 @@ class PersistenceService:
"tts_workflow": config.tts_workflow, "tts_workflow": config.tts_workflow,
"tts_speed": config.tts_speed, "tts_speed": config.tts_speed,
"ref_audio": config.ref_audio, "ref_audio": config.ref_audio,
"image_width": config.image_width, "media_width": config.media_width,
"image_height": config.image_height, "media_height": config.media_height,
"image_workflow": config.image_workflow, "media_workflow": config.media_workflow,
"frame_template": config.frame_template, "frame_template": config.frame_template,
"template_params": config.template_params, "template_params": config.template_params,
} }
@@ -402,9 +402,9 @@ class PersistenceService:
tts_workflow=data.get("tts_workflow"), tts_workflow=data.get("tts_workflow"),
tts_speed=data.get("tts_speed"), tts_speed=data.get("tts_speed"),
ref_audio=data.get("ref_audio"), ref_audio=data.get("ref_audio"),
image_width=data.get("image_width", 1024), media_width=data.get("media_width", data.get("image_width", 1024)), # Backward compatibility
image_height=data.get("image_height", 1024), media_height=data.get("media_height", data.get("image_height", 1024)), # Backward compatibility
image_workflow=data.get("image_workflow"), media_workflow=data.get("media_workflow", data.get("image_workflow")), # Backward compatibility
frame_template=data.get("frame_template", "1080x1920/default.html"), frame_template=data.get("frame_template", "1080x1920/default.html"),
template_params=data.get("template_params"), template_params=data.get("template_params"),
) )

View File

@@ -58,7 +58,7 @@ def render_single_output(pixelle_video, video_params):
frame_template = video_params.get("frame_template") frame_template = video_params.get("frame_template")
custom_values_for_video = video_params.get("template_params", {}) custom_values_for_video = video_params.get("template_params", {})
workflow_key = video_params.get("image_workflow") workflow_key = video_params.get("media_workflow")
prompt_prefix = video_params.get("prompt_prefix", "") prompt_prefix = video_params.get("prompt_prefix", "")
with st.container(border=True): with st.container(border=True):
@@ -123,18 +123,20 @@ def render_single_output(pixelle_video, video_params):
progress_bar.progress(min(int(event.progress * 100), 99)) # Cap at 99% until complete progress_bar.progress(min(int(event.progress * 100), 99)) # Cap at 99% until complete
# Generate video (directly pass parameters) # Generate video (directly pass parameters)
# Note: image_width and image_height are now auto-determined from template # Note: media_width and media_height are auto-determined from template
video_params = { video_params = {
"text": text, "text": text,
"mode": mode, "mode": mode,
"title": title if title else None, "title": title if title else None,
"n_scenes": n_scenes, "n_scenes": n_scenes,
"image_workflow": workflow_key, "media_workflow": workflow_key,
"frame_template": frame_template, "frame_template": frame_template,
"prompt_prefix": prompt_prefix, "prompt_prefix": prompt_prefix,
"bgm_path": bgm_path, "bgm_path": bgm_path,
"bgm_volume": bgm_volume if bgm_path else 0.2, "bgm_volume": bgm_volume if bgm_path else 0.2,
"progress_callback": update_progress, "progress_callback": update_progress,
"media_width": st.session_state.get('template_media_width'),
"media_height": st.session_state.get('template_media_height'),
} }
# Add TTS parameters based on mode # Add TTS parameters based on mode
@@ -245,12 +247,14 @@ def render_batch_output(pixelle_video, video_params):
shared_config = { shared_config = {
"title_prefix": video_params.get("title_prefix"), "title_prefix": video_params.get("title_prefix"),
"n_scenes": video_params.get("n_scenes") or 5, "n_scenes": video_params.get("n_scenes") or 5,
"image_workflow": video_params.get("image_workflow"), "media_workflow": video_params.get("media_workflow"),
"frame_template": video_params.get("frame_template"), "frame_template": video_params.get("frame_template"),
"prompt_prefix": video_params.get("prompt_prefix") or "", "prompt_prefix": video_params.get("prompt_prefix") or "",
"bgm_path": video_params.get("bgm_path"), "bgm_path": video_params.get("bgm_path"),
"bgm_volume": video_params.get("bgm_volume") or 0.2, "bgm_volume": video_params.get("bgm_volume") or 0.2,
"tts_inference_mode": video_params.get("tts_inference_mode") or "local", "tts_inference_mode": video_params.get("tts_inference_mode") or "local",
"media_width": video_params.get("media_width"),
"media_height": video_params.get("media_height"),
} }
# Add TTS parameters based on mode (only add non-None values) # Add TTS parameters based on mode (only add non-None values)

View File

@@ -610,7 +610,7 @@ def render_style_config(pixelle_video):
workflow_options if workflow_options else ["No workflows found"], workflow_options if workflow_options else ["No workflows found"],
index=default_workflow_index, index=default_workflow_index,
label_visibility="collapsed", label_visibility="collapsed",
key="image_workflow_select" key="media_workflow_select"
) )
# Get the actual workflow key (e.g., "runninghub/image_flux.json") # Get the actual workflow key (e.g., "runninghub/image_flux.json")
@@ -621,14 +621,14 @@ def render_style_config(pixelle_video):
workflow_key = "runninghub/image_flux.json" # fallback workflow_key = "runninghub/image_flux.json" # fallback
# Get media size from template # Get media size from template
image_width = st.session_state.get('template_media_width', 1024) media_width = st.session_state.get('template_media_width')
image_height = st.session_state.get('template_media_height', 1024) media_height = st.session_state.get('template_media_height')
# Display media size info (read-only) # Display media size info (read-only)
if template_media_type == "video": if template_media_type == "video":
size_info_text = tr('style.video_size_info', width=image_width, height=image_height) size_info_text = tr('style.video_size_info', width=media_width, height=media_height)
else: else:
size_info_text = tr('style.image_size_info', width=image_width, height=image_height) size_info_text = tr('style.image_size_info', width=media_width, height=media_height)
st.info(f"📐 {size_info_text}") st.info(f"📐 {size_info_text}")
# Prompt prefix input # Prompt prefix input
@@ -679,8 +679,8 @@ def render_style_config(pixelle_video):
prompt=final_prompt, prompt=final_prompt,
workflow=workflow_key, workflow=workflow_key,
media_type=template_media_type, media_type=template_media_type,
width=int(image_width), width=int(media_width),
height=int(image_height) height=int(media_height)
)) ))
preview_media_path = media_result.url preview_media_path = media_result.url
@@ -725,8 +725,8 @@ def render_style_config(pixelle_video):
st.caption(tr("image.not_required_hint")) st.caption(tr("image.not_required_hint"))
# Get media size from template (even though not used, for consistency) # Get media size from template (even though not used, for consistency)
image_width = st.session_state.get('template_media_width', 1024) media_width = st.session_state.get('template_media_width')
image_height = st.session_state.get('template_media_height', 1024) media_height = st.session_state.get('template_media_height')
# Set default values for later use # Set default values for later use
workflow_key = None workflow_key = None
@@ -741,6 +741,8 @@ def render_style_config(pixelle_video):
"ref_audio": str(ref_audio_path) if ref_audio_path else None, "ref_audio": str(ref_audio_path) if ref_audio_path else None,
"frame_template": frame_template, "frame_template": frame_template,
"template_params": custom_values_for_video if custom_values_for_video else None, "template_params": custom_values_for_video if custom_values_for_video else None,
"image_workflow": workflow_key, "media_workflow": workflow_key,
"prompt_prefix": prompt_prefix if prompt_prefix else "" "prompt_prefix": prompt_prefix if prompt_prefix else "",
"media_width": media_width,
"media_height": media_height
} }