修复视频参数设置失效问题
This commit is contained in:
@@ -86,7 +86,9 @@ class CustomPipeline(BasePipeline):
|
||||
custom_param_example: str = "default_value",
|
||||
|
||||
# === Standard Parameters (keep these for compatibility) ===
|
||||
voice_id: str = "[Chinese] zh-CN Yunjian",
|
||||
tts_inference_mode: Optional[str] = None, # "local" or "comfyui"
|
||||
voice_id: Optional[str] = None, # Deprecated, use tts_voice
|
||||
tts_voice: Optional[str] = None, # Voice ID for local mode
|
||||
tts_workflow: Optional[str] = None,
|
||||
tts_speed: float = 1.2,
|
||||
ref_audio: Optional[str] = None,
|
||||
@@ -126,6 +128,29 @@ class CustomPipeline(BasePipeline):
|
||||
logger.info(f"Input text length: {len(text)} chars")
|
||||
logger.info(f"Custom parameter: {custom_param_example}")
|
||||
|
||||
# === Handle TTS parameter compatibility ===
|
||||
# Support both old API (voice_id) and new API (tts_inference_mode + tts_voice)
|
||||
final_voice_id = None
|
||||
final_tts_workflow = tts_workflow
|
||||
|
||||
if tts_inference_mode:
|
||||
# New API from web UI
|
||||
if tts_inference_mode == "local":
|
||||
# Local Edge TTS mode - use tts_voice
|
||||
final_voice_id = tts_voice or "zh-CN-YunjianNeural"
|
||||
final_tts_workflow = None # Don't use workflow in local mode
|
||||
logger.debug(f"TTS Mode: local (voice={final_voice_id})")
|
||||
elif tts_inference_mode == "comfyui":
|
||||
# ComfyUI workflow mode
|
||||
final_voice_id = None # Don't use voice_id in ComfyUI mode
|
||||
# tts_workflow already set from parameter
|
||||
logger.debug(f"TTS Mode: comfyui (workflow={final_tts_workflow})")
|
||||
else:
|
||||
# Old API (backward compatibility)
|
||||
final_voice_id = voice_id or tts_voice or "zh-CN-YunjianNeural"
|
||||
# tts_workflow already set from parameter
|
||||
logger.debug(f"TTS Mode: legacy (voice_id={final_voice_id}, workflow={final_tts_workflow})")
|
||||
|
||||
# ========== Step 0: Setup ==========
|
||||
self._report_progress(progress_callback, "initializing", 0.05)
|
||||
|
||||
@@ -240,8 +265,9 @@ class CustomPipeline(BasePipeline):
|
||||
min_image_prompt_words=30,
|
||||
max_image_prompt_words=60,
|
||||
video_fps=video_fps,
|
||||
voice_id=voice_id,
|
||||
tts_workflow=tts_workflow,
|
||||
tts_inference_mode=tts_inference_mode or "local", # TTS inference mode (CRITICAL FIX)
|
||||
voice_id=final_voice_id, # Use processed voice_id
|
||||
tts_workflow=final_tts_workflow, # Use processed workflow
|
||||
tts_speed=tts_speed,
|
||||
ref_audio=ref_audio,
|
||||
image_width=image_width,
|
||||
|
||||
@@ -251,6 +251,7 @@ class StandardPipeline(BasePipeline):
|
||||
min_image_prompt_words=min_image_prompt_words,
|
||||
max_image_prompt_words=max_image_prompt_words,
|
||||
video_fps=video_fps,
|
||||
tts_inference_mode=tts_inference_mode or "local", # TTS inference mode (CRITICAL FIX)
|
||||
voice_id=final_voice_id, # Use processed voice_id
|
||||
tts_workflow=final_tts_workflow, # Use processed workflow
|
||||
tts_speed=tts_speed,
|
||||
@@ -288,54 +289,77 @@ class StandardPipeline(BasePipeline):
|
||||
logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
|
||||
logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode")
|
||||
|
||||
# ========== Step 2: Generate image prompts ==========
|
||||
self._report_progress(progress_callback, "generating_image_prompts", 0.15)
|
||||
# ========== Step 2: Check template type and conditionally generate image prompts ==========
|
||||
# Detect template type to determine if media generation is needed
|
||||
from pathlib import Path
|
||||
from pixelle_video.utils.template_util import get_template_type
|
||||
|
||||
# Override prompt_prefix if provided
|
||||
original_prefix = None
|
||||
if prompt_prefix is not None:
|
||||
image_config = self.core.config.get("comfyui", {}).get("image", {})
|
||||
original_prefix = image_config.get("prompt_prefix")
|
||||
image_config["prompt_prefix"] = prompt_prefix
|
||||
logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
|
||||
template_name = Path(config.frame_template).name
|
||||
template_type = get_template_type(template_name)
|
||||
template_requires_media = (template_type in ["image", "video"])
|
||||
|
||||
try:
|
||||
# Create progress callback wrapper for image prompt generation
|
||||
def image_prompt_progress(completed: int, total: int, message: str):
|
||||
batch_progress = completed / total if total > 0 else 0
|
||||
overall_progress = 0.15 + (batch_progress * 0.15)
|
||||
self._report_progress(
|
||||
progress_callback,
|
||||
"generating_image_prompts",
|
||||
overall_progress,
|
||||
extra_info=message
|
||||
if template_type == "image":
|
||||
logger.info(f"📸 Template requires image generation")
|
||||
elif template_type == "video":
|
||||
logger.info(f"🎬 Template requires video generation")
|
||||
else: # static
|
||||
logger.info(f"⚡ Static template - skipping media generation pipeline")
|
||||
logger.info(f" 💡 Benefits: Faster generation + Lower cost + No ComfyUI dependency")
|
||||
|
||||
# Only generate image prompts if template requires media
|
||||
if template_requires_media:
|
||||
self._report_progress(progress_callback, "generating_image_prompts", 0.15)
|
||||
|
||||
# Override prompt_prefix if provided
|
||||
original_prefix = None
|
||||
if prompt_prefix is not None:
|
||||
image_config = self.core.config.get("comfyui", {}).get("image", {})
|
||||
original_prefix = image_config.get("prompt_prefix")
|
||||
image_config["prompt_prefix"] = prompt_prefix
|
||||
logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
|
||||
|
||||
try:
|
||||
# Create progress callback wrapper for image prompt generation
|
||||
def image_prompt_progress(completed: int, total: int, message: str):
|
||||
batch_progress = completed / total if total > 0 else 0
|
||||
overall_progress = 0.15 + (batch_progress * 0.15)
|
||||
self._report_progress(
|
||||
progress_callback,
|
||||
"generating_image_prompts",
|
||||
overall_progress,
|
||||
extra_info=message
|
||||
)
|
||||
|
||||
# Generate base image prompts
|
||||
base_image_prompts = await generate_image_prompts(
|
||||
self.llm,
|
||||
narrations=narrations,
|
||||
min_words=min_image_prompt_words,
|
||||
max_words=max_image_prompt_words,
|
||||
progress_callback=image_prompt_progress
|
||||
)
|
||||
|
||||
# Apply prompt prefix
|
||||
from pixelle_video.utils.prompt_helper import build_image_prompt
|
||||
image_config = self.core.config.get("comfyui", {}).get("image", {})
|
||||
prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
|
||||
|
||||
image_prompts = []
|
||||
for base_prompt in base_image_prompts:
|
||||
final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
|
||||
image_prompts.append(final_prompt)
|
||||
|
||||
finally:
|
||||
# Restore original prompt_prefix
|
||||
if original_prefix is not None:
|
||||
image_config["prompt_prefix"] = original_prefix
|
||||
|
||||
# Generate base image prompts
|
||||
base_image_prompts = await generate_image_prompts(
|
||||
self.llm,
|
||||
narrations=narrations,
|
||||
min_words=min_image_prompt_words,
|
||||
max_words=max_image_prompt_words,
|
||||
progress_callback=image_prompt_progress
|
||||
)
|
||||
|
||||
# Apply prompt prefix
|
||||
from pixelle_video.utils.prompt_helper import build_image_prompt
|
||||
image_config = self.core.config.get("comfyui", {}).get("image", {})
|
||||
prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
|
||||
|
||||
image_prompts = []
|
||||
for base_prompt in base_image_prompts:
|
||||
final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
|
||||
image_prompts.append(final_prompt)
|
||||
|
||||
finally:
|
||||
# Restore original prompt_prefix
|
||||
if original_prefix is not None:
|
||||
image_config["prompt_prefix"] = original_prefix
|
||||
|
||||
logger.info(f"✅ Generated {len(image_prompts)} image prompts")
|
||||
logger.info(f"✅ Generated {len(image_prompts)} image prompts")
|
||||
else:
|
||||
# Static template - skip image prompt generation entirely
|
||||
image_prompts = [None] * len(narrations)
|
||||
logger.info(f"⚡ Skipped image prompt generation (static template)")
|
||||
logger.info(f" 💡 Savings: {len(narrations)} LLM calls + {len(narrations)} media generations")
|
||||
|
||||
# ========== Step 3: Create frames ==========
|
||||
for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
|
||||
|
||||
Reference in New Issue
Block a user