开发基于图片素材生成视频的webui功能

This commit is contained in:
puke
2025-12-04 11:14:14 +08:00
parent ea784e0d06
commit 7425b9d23d
8 changed files with 896 additions and 104 deletions

View File

@@ -27,23 +27,27 @@ Example:
result = await pipeline( result = await pipeline(
assets=["/path/img1.jpg", "/path/img2.jpg"], assets=["/path/img1.jpg", "/path/img2.jpg"],
video_title="Pet Store Year-End Sale", video_title="Pet Store Year-End Sale",
style="warm and friendly", intent="Promote our pet store's year-end sale with a warm and friendly tone",
duration=30 duration=30
) )
""" """
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional, Callable
from pathlib import Path from pathlib import Path
from loguru import logger from loguru import logger
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from pixelle_video.pipelines.linear import LinearVideoPipeline, PipelineContext from pixelle_video.pipelines.linear import LinearVideoPipeline, PipelineContext
from pixelle_video.models.progress import ProgressEvent
from pixelle_video.utils.os_util import ( from pixelle_video.utils.os_util import (
create_task_output_dir, create_task_output_dir,
get_task_final_video_path get_task_final_video_path
) )
# Type alias for progress callback
ProgressCallback = Optional[Callable[[ProgressEvent], None]]
# ==================== Structured Output Models ==================== # ==================== Structured Output Models ====================
@@ -82,12 +86,12 @@ class AssetBasedPipeline(LinearVideoPipeline):
assets: List[str], assets: List[str],
video_title: str = "", video_title: str = "",
intent: Optional[str] = None, intent: Optional[str] = None,
style: str = "professional and engaging",
duration: int = 30, duration: int = 30,
source: str = "runninghub", source: str = "runninghub",
bgm_path: Optional[str] = None, bgm_path: Optional[str] = None,
bgm_volume: float = 0.2, bgm_volume: float = 0.2,
bgm_mode: str = "loop", bgm_mode: str = "loop",
progress_callback: ProgressCallback = None,
**kwargs **kwargs
) -> PipelineContext: ) -> PipelineContext:
""" """
@@ -97,12 +101,12 @@ class AssetBasedPipeline(LinearVideoPipeline):
assets: List of asset file paths assets: List of asset file paths
video_title: Video title video_title: Video title
intent: Video intent/purpose (defaults to video_title) intent: Video intent/purpose (defaults to video_title)
style: Video style
duration: Target duration in seconds duration: Target duration in seconds
source: Workflow source ("runninghub" or "selfhost") source: Workflow source ("runninghub" or "selfhost")
bgm_path: Path to background music file (optional) bgm_path: Path to background music file (optional)
bgm_volume: BGM volume (0.0-1.0, default 0.2) bgm_volume: BGM volume (0.0-1.0, default 0.2)
bgm_mode: BGM mode ("loop" or "once", default "loop") bgm_mode: BGM mode ("loop" or "once", default "loop")
progress_callback: Optional callback for progress updates
**kwargs: Additional parameters **kwargs: Additional parameters
Returns: Returns:
@@ -110,6 +114,9 @@ class AssetBasedPipeline(LinearVideoPipeline):
""" """
from pixelle_video.pipelines.linear import PipelineContext from pixelle_video.pipelines.linear import PipelineContext
# Store progress callback
self._progress_callback = progress_callback
# Create custom context with asset-specific parameters # Create custom context with asset-specific parameters
ctx = PipelineContext( ctx = PipelineContext(
input_text=intent or video_title, # Use intent or title as input_text input_text=intent or video_title, # Use intent or title as input_text
@@ -117,7 +124,6 @@ class AssetBasedPipeline(LinearVideoPipeline):
"assets": assets, "assets": assets,
"video_title": video_title, "video_title": video_title,
"intent": intent or video_title, "intent": intent or video_title,
"style": style,
"duration": duration, "duration": duration,
"source": source, "source": source,
"bgm_path": bgm_path, "bgm_path": bgm_path,
@@ -147,6 +153,11 @@ class AssetBasedPipeline(LinearVideoPipeline):
await self.handle_exception(ctx, e) await self.handle_exception(ctx, e)
raise raise
def _emit_progress(self, event: ProgressEvent):
"""Emit progress event to callback if available"""
if self._progress_callback:
self._progress_callback(event)
async def setup_environment(self, context: PipelineContext) -> PipelineContext: async def setup_environment(self, context: PipelineContext) -> PipelineContext:
""" """
Analyze uploaded assets and build asset index Analyze uploaded assets and build asset index
@@ -172,7 +183,17 @@ class AssetBasedPipeline(LinearVideoPipeline):
if not assets: if not assets:
raise ValueError("No assets provided. Please upload at least one image or video.") raise ValueError("No assets provided. Please upload at least one image or video.")
logger.info(f"Found {len(assets)} assets to analyze") total_assets = len(assets)
logger.info(f"Found {total_assets} assets to analyze")
# Emit initial progress (0-15% for asset analysis)
self._emit_progress(ProgressEvent(
event_type="analyzing_assets",
progress=0.01,
frame_current=0,
frame_total=total_assets,
extra_info="start"
))
self.asset_index = {} self.asset_index = {}
@@ -183,7 +204,17 @@ class AssetBasedPipeline(LinearVideoPipeline):
logger.warning(f"Asset not found: {asset_path}") logger.warning(f"Asset not found: {asset_path}")
continue continue
logger.info(f"Analyzing asset {i}/{len(assets)}: {asset_path_obj.name}") logger.info(f"Analyzing asset {i}/{total_assets}: {asset_path_obj.name}")
# Emit progress for this asset
progress = 0.01 + (i - 1) / total_assets * 0.14 # 1% - 15%
self._emit_progress(ProgressEvent(
event_type="analyzing_asset",
progress=progress,
frame_current=i,
frame_total=total_assets,
extra_info=asset_path_obj.name
))
# Determine asset type # Determine asset type
asset_type = self._get_asset_type(asset_path_obj) asset_type = self._get_asset_type(asset_path_obj)
@@ -222,34 +253,35 @@ class AssetBasedPipeline(LinearVideoPipeline):
# Store asset index in context # Store asset index in context
context.asset_index = self.asset_index context.asset_index = self.asset_index
# Emit completion of asset analysis
self._emit_progress(ProgressEvent(
event_type="analyzing_assets",
progress=0.15,
frame_current=total_assets,
frame_total=total_assets,
extra_info="complete"
))
return context return context
async def determine_title(self, context: PipelineContext) -> PipelineContext: async def determine_title(self, context: PipelineContext) -> PipelineContext:
""" """
Use user-provided title or generate one via LLM Use user-provided title if available, otherwise leave empty
Args: Args:
context: Pipeline context context: Pipeline context
Returns: Returns:
Updated context with title Updated context with title (may be empty)
""" """
from pixelle_video.utils.content_generators import generate_title
title = context.request.get("video_title") title = context.request.get("video_title")
if title: if title:
context.title = title context.title = title
logger.info(f"📝 Video title: {title} (user-specified)") logger.info(f"📝 Video title: {title} (user-specified)")
else: else:
# Generate title from intent using LLM context.title = ""
intent = context.request.get("intent", context.input_text) logger.info(f"📝 No video title specified (will be hidden in template)")
context.title = await generate_title(
self.core.llm,
content=intent,
strategy="llm"
)
logger.info(f"📝 Video title: {context.title} (LLM-generated)")
return context return context
@@ -267,10 +299,16 @@ class AssetBasedPipeline(LinearVideoPipeline):
""" """
logger.info("🤖 Generating video script with LLM...") logger.info("🤖 Generating video script with LLM...")
# Emit progress for script generation (15% - 25%)
self._emit_progress(ProgressEvent(
event_type="generating_script",
progress=0.16
))
# Build prompt for LLM # Build prompt for LLM
intent = context.request.get("intent", context.title) intent = context.request.get("intent", context.input_text)
style = context.request.get("style", "professional and engaging")
duration = context.request.get("duration", 30) duration = context.request.get("duration", 30)
title = context.title # May be empty if user didn't provide one
# Prepare asset descriptions with full paths for LLM to reference # Prepare asset descriptions with full paths for LLM to reference
asset_info = [] asset_info = []
@@ -279,11 +317,13 @@ class AssetBasedPipeline(LinearVideoPipeline):
assets_text = "\n".join(asset_info) assets_text = "\n".join(asset_info)
# Build title section for prompt (only if title is provided)
title_section = f"- Video Title: {title}\n" if title else ""
prompt = f"""You are a video script writer. Generate a {duration}-second video script. prompt = f"""You are a video script writer. Generate a {duration}-second video script.
## Requirements ## Requirements
- Intent: {intent} {title_section}- Intent: {intent}
- Style: {style}
- Target Duration: {duration} seconds - Target Duration: {duration} seconds
## Available Assets (use the exact path in your response) ## Available Assets (use the exact path in your response)
@@ -295,6 +335,7 @@ class AssetBasedPipeline(LinearVideoPipeline):
3. Each scene can have 1-5 narration sentences 3. Each scene can have 1-5 narration sentences
4. Try to use all available assets, but it's OK to reuse if needed 4. Try to use all available assets, but it's OK to reuse if needed
5. Total duration of all scenes should be approximately {duration} seconds 5. Total duration of all scenes should be approximately {duration} seconds
{f"6. The narrations should align with the video title: {title}" if title else ""}
## Output Requirements ## Output Requirements
For each scene, provide: For each scene, provide:
@@ -337,6 +378,13 @@ Generate the video script now:"""
logger.success(f"✅ Generated script with {len(context.script)} scenes") logger.success(f"✅ Generated script with {len(context.script)} scenes")
# Emit progress after script generation
self._emit_progress(ProgressEvent(
event_type="generating_script",
progress=0.25,
extra_info="complete"
))
# Log script preview # Log script preview
for scene in context.script: for scene in context.script:
narrations = scene.get("narrations", []) narrations = scene.get("narrations", [])
@@ -413,7 +461,7 @@ Generate the video script now:"""
context.narrations = all_narrations context.narrations = all_narrations
# Get template dimensions # Get template dimensions
template_name = context.params.get("frame_template", "1080x1920/image_default.html") template_name = "1080x1920/image_pure.html"
# Extract dimensions from template name (e.g., "1080x1920") # Extract dimensions from template name (e.g., "1080x1920")
try: try:
dims = template_name.split("/")[0].split("x") dims = template_name.split("/")[0].split("x")
@@ -492,9 +540,25 @@ Generate the video script now:"""
storyboard = context.storyboard storyboard = context.storyboard
config = context.config config = context.config
total_frames = len(storyboard.frames)
# Progress range: 30% - 85% for frame production
base_progress = 0.30
progress_range = 0.55 # 85% - 30%
for i, frame in enumerate(storyboard.frames, 1): for i, frame in enumerate(storyboard.frames, 1):
logger.info(f"Producing scene {i}/{len(storyboard.frames)}...") logger.info(f"Producing scene {i}/{total_frames}...")
# Emit progress for this frame (each frame has 4 steps: audio, combine, duration, compose)
frame_progress = base_progress + (i - 1) / total_frames * progress_range
self._emit_progress(ProgressEvent(
event_type="frame_step",
progress=frame_progress,
frame_current=i,
frame_total=total_frames,
step=1,
action="audio"
))
# Get scene data with narrations # Get scene data with narrations
scene = frame._scene_data scene = frame._scene_data
@@ -524,6 +588,17 @@ Generate the video script now:"""
if len(narration_audios) > 1: if len(narration_audios) > 1:
from pixelle_video.utils.os_util import get_task_frame_path from pixelle_video.utils.os_util import get_task_frame_path
# Emit progress for combining audio
frame_progress = base_progress + ((i - 1) + 0.25) / total_frames * progress_range
self._emit_progress(ProgressEvent(
event_type="frame_step",
progress=frame_progress,
frame_current=i,
frame_total=total_frames,
step=2,
action="audio"
))
combined_audio_path = Path(context.task_dir) / "frames" / f"{i:02d}_audio.mp3" combined_audio_path = Path(context.task_dir) / "frames" / f"{i:02d}_audio.mp3"
# Use FFmpeg to concatenate audio files # Use FFmpeg to concatenate audio files
@@ -564,6 +639,17 @@ Generate the video script now:"""
# Since we already have the audio and image, we bypass some steps # Since we already have the audio and image, we bypass some steps
# by manually calling the composition steps # by manually calling the composition steps
# Emit progress for duration calculation
frame_progress = base_progress + ((i - 1) + 0.5) / total_frames * progress_range
self._emit_progress(ProgressEvent(
event_type="frame_step",
progress=frame_progress,
frame_current=i,
frame_total=total_frames,
step=3,
action="compose"
))
# Get audio duration for frame duration # Get audio duration for frame duration
import subprocess import subprocess
duration_cmd = [ duration_cmd = [
@@ -576,16 +662,35 @@ Generate the video script now:"""
duration_result = subprocess.run(duration_cmd, capture_output=True, text=True, check=True) duration_result = subprocess.run(duration_cmd, capture_output=True, text=True, check=True)
frame.duration = float(duration_result.stdout.strip()) frame.duration = float(duration_result.stdout.strip())
# Emit progress for video composition
frame_progress = base_progress + ((i - 1) + 0.75) / total_frames * progress_range
self._emit_progress(ProgressEvent(
event_type="frame_step",
progress=frame_progress,
frame_current=i,
frame_total=total_frames,
step=4,
action="video"
))
# Use FrameProcessor for proper composition # Use FrameProcessor for proper composition
processed_frame = await self.core.frame_processor( processed_frame = await self.core.frame_processor(
frame=frame, frame=frame,
storyboard=storyboard, storyboard=storyboard,
config=config, config=config,
total_frames=len(storyboard.frames) total_frames=total_frames
) )
logger.success(f"✅ Scene {i} complete") logger.success(f"✅ Scene {i} complete")
# Emit completion of frame production
self._emit_progress(ProgressEvent(
event_type="processing_frame",
progress=0.85,
frame_current=total_frames,
frame_total=total_frames
))
return context return context
async def post_production(self, context: PipelineContext) -> PipelineContext: async def post_production(self, context: PipelineContext) -> PipelineContext:
@@ -600,6 +705,12 @@ Generate the video script now:"""
""" """
logger.info("🎞️ Concatenating scenes...") logger.info("🎞️ Concatenating scenes...")
# Emit progress for concatenation (85% - 95%)
self._emit_progress(ProgressEvent(
event_type="concatenating",
progress=0.86
))
# Collect video segments from storyboard frames # Collect video segments from storyboard frames
scene_videos = [frame.video_segment_path for frame in context.storyboard.frames] scene_videos = [frame.video_segment_path for frame in context.storyboard.frames]
@@ -626,6 +737,13 @@ Generate the video script now:"""
logger.success(f"✅ Final video: {final_video_path}") logger.success(f"✅ Final video: {final_video_path}")
# Emit completion of concatenation
self._emit_progress(ProgressEvent(
event_type="concatenating",
progress=0.95,
extra_info="complete"
))
return context return context
async def finalize(self, context: PipelineContext) -> PipelineContext: async def finalize(self, context: PipelineContext) -> PipelineContext:
@@ -641,8 +759,84 @@ Generate the video script now:"""
logger.success(f"🎉 Asset-based video generation complete!") logger.success(f"🎉 Asset-based video generation complete!")
logger.info(f"Video: {context.final_video_path}") logger.info(f"Video: {context.final_video_path}")
# Emit completion
self._emit_progress(ProgressEvent(
event_type="completed",
progress=1.0
))
# Persist metadata for history tracking
await self._persist_task_data(context)
return context return context
async def _persist_task_data(self, ctx: PipelineContext):
"""
Persist task metadata and storyboard to filesystem for history tracking
"""
from pathlib import Path
try:
storyboard = ctx.storyboard
task_id = ctx.task_id
if not task_id:
logger.warning("No task_id in context, skipping persistence")
return
# Get file size
video_path_obj = Path(ctx.final_video_path)
file_size = video_path_obj.stat().st_size if video_path_obj.exists() else 0
# Build metadata
input_params = {
"text": ctx.input_text,
"mode": "asset_based",
"title": ctx.title or "",
"n_scenes": len(storyboard.frames) if storyboard else 0,
"assets": ctx.request.get("assets", []),
"intent": ctx.request.get("intent"),
"duration": ctx.request.get("duration"),
"source": ctx.request.get("source"),
"voice_id": ctx.request.get("voice_id"),
"tts_speed": ctx.request.get("tts_speed"),
}
metadata = {
"task_id": task_id,
"created_at": storyboard.created_at.isoformat() if storyboard and storyboard.created_at else None,
"completed_at": storyboard.completed_at.isoformat() if storyboard and storyboard.completed_at else None,
"status": "completed",
"input": input_params,
"result": {
"video_path": ctx.final_video_path,
"duration": storyboard.total_duration if storyboard else 0,
"file_size": file_size,
"n_frames": len(storyboard.frames) if storyboard else 0
},
"config": {
"llm_model": self.core.config.get("llm", {}).get("model", "unknown"),
"llm_base_url": self.core.config.get("llm", {}).get("base_url", "unknown"),
"source": ctx.request.get("source", "runninghub"),
}
}
# Save metadata
await self.core.persistence.save_task_metadata(task_id, metadata)
logger.info(f"💾 Saved task metadata: {task_id}")
# Save storyboard
if storyboard:
await self.core.persistence.save_storyboard(task_id, storyboard)
logger.info(f"💾 Saved storyboard: {task_id}")
except Exception as e:
logger.error(f"Failed to persist task data: {e}")
# Don't raise - persistence failure shouldn't break video generation
# Helper methods # Helper methods
def _get_asset_type(self, path: Path) -> str: def _get_asset_type(self, path: Path) -> str:

View File

@@ -0,0 +1,145 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<style>
html {
margin: 0;
padding: 0;
}
body {
margin: 0;
padding: 0;
width: 1080px;
height: 1920px;
font-family: 'PingFang SC', 'Source Han Sans', 'Microsoft YaHei', sans-serif;
background: #000;
overflow: hidden;
}
.page-container {
width: 1080px;
height: 1920px;
position: relative;
overflow: hidden;
}
/* 1. Background Image Layer (垫底图片) */
.background-layer {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
z-index: 0;
}
.background-layer img {
width: 100%;
height: 100%;
object-fit: contain;
display: block;
}
/* 2. Gradient Overlay (渐变遮罩)
Ensures text readability regardless of image brightness
Top: Darker for Title
Middle: Transparent for Image visibility
Bottom: Darker for Subtitles
*/
.gradient-overlay {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
z-index: 1;
background: linear-gradient(
to bottom,
rgba(0,0,0,0.6) 0%,
rgba(0,0,0,0.1) 25%,
rgba(0,0,0,0.1) 60%,
rgba(0,0,0,0.8) 100%
);
}
/* 3. Content Layer (内容层) */
.content-layer {
position: relative;
z-index: 2;
width: 100%;
height: 100%;
padding: 120px 80px 0px 80px; /* Top, Right, Bottom, Left */
box-sizing: border-box;
display: flex;
flex-direction: column;
justify-content: flex-start;
color: #ffffff;
}
/* Title Styling */
.video-title {
font-size: 80px;
font-weight: 700;
line-height: 1.2;
text-shadow: 0 4px 12px rgba(0,0,0,0.5);
margin-bottom: 40px;
text-align: center;
}
/* Hide title when empty */
.video-title:empty {
display: none;
}
/* Flex spacer to push subtitle to bottom */
.spacer {
flex-grow: 1;
}
/* Narration/Subtitle Styling */
.subtitle-wrapper {
margin-bottom: 60px;
}
.text {
font-size: 52px;
font-weight: 500;
line-height: 1.6;
text-align: center;
text-shadow: 0 2px 8px rgba(0,0,0,0.6);
backdrop-filter: blur(4px);
}
</style>
</head>
<body>
<div class="page-container">
<!-- Background Image -->
<div class="background-layer">
<img src="{{image}}" alt="Background">
</div>
<!-- Shadow Overlay for Text Readability -->
<div class="gradient-overlay"></div>
<!-- Main Content -->
<div class="content-layer">
<!-- Top Section: Title -->
<div class="video-title">
{{title}}
</div>
<!-- Spacer pushes content apart -->
<div class="spacer"></div>
<!-- Bottom Section: Narration/Text -->
<div class="subtitle-wrapper">
<div class="text">{{text}}</div>
</div>
</div>
</div>
</body>
</html>

View File

@@ -169,7 +169,7 @@ def render_content_input():
} }
def render_bgm_section(): def render_bgm_section(key_prefix=""):
"""Render BGM selection section""" """Render BGM selection section"""
with st.container(border=True): with st.container(border=True):
st.markdown(f"**{tr('section.bgm')}**") st.markdown(f"**{tr('section.bgm')}**")
@@ -204,7 +204,8 @@ def render_bgm_section():
"BGM", "BGM",
bgm_options, bgm_options,
index=default_index, index=default_index,
label_visibility="collapsed" label_visibility="collapsed",
key=f"{key_prefix}bgm_selector"
) )
# BGM volume slider (only show when BGM is selected) # BGM volume slider (only show when BGM is selected)
@@ -216,7 +217,7 @@ def render_bgm_section():
value=0.2, value=0.2,
step=0.01, step=0.01,
format="%.2f", format="%.2f",
key="bgm_volume_slider", key=f"{key_prefix}bgm_volume_slider",
help=tr("bgm.volume_help") help=tr("bgm.volume_help")
) )
else: else:
@@ -224,7 +225,7 @@ def render_bgm_section():
# BGM preview button (only if BGM is not "None") # BGM preview button (only if BGM is not "None")
if bgm_choice != tr("bgm.none"): if bgm_choice != tr("bgm.none"):
if st.button(tr("bgm.preview"), key="preview_bgm", use_container_width=True): if st.button(tr("bgm.preview"), key=f"{key_prefix}preview_bgm", use_container_width=True):
from pixelle_video.utils.os_util import get_resource_path, resource_exists from pixelle_video.utils.os_util import get_resource_path, resource_exists
try: try:
if resource_exists("bgm", bgm_choice): if resource_exists("bgm", bgm_choice):

View File

@@ -332,7 +332,44 @@
"batch.error": "Error", "batch.error": "Error",
"batch.error_detail": "View detailed error stack", "batch.error_detail": "View detailed error stack",
"pipeline.standard.name": "Standard Video", "pipeline.standard.name": "Standard Video",
"pipeline.demo.name": "Demo Feature", "pipeline.asset_based.name": "Asset-Based Video",
"pipeline.demo.description": "A demo pipeline with a custom layout" "pipeline.asset_based.description": "Generate videos from user-provided assets",
"asset_based.section.assets": "📦 Asset Upload",
"asset_based.section.video_info": "📝 Video Information",
"asset_based.section.source": "⚙️ Service Configuration",
"asset_based.assets.what": "Upload your images or video assets, AI will automatically analyze them and generate a video script",
"asset_based.assets.how": "Supports JPG/PNG/GIF/WebP images and MP4/MOV/AVI videos. Each asset should be clear and relevant",
"asset_based.assets.upload": "Upload Assets",
"asset_based.assets.upload_help": "Supports multiple image or video files",
"asset_based.assets.count": "✅ Uploaded {count} assets",
"asset_based.assets.preview": "📷 Asset Preview",
"asset_based.assets.empty_hint": "💡 Please upload at least one image or video asset",
"asset_based.video_title": "Video Title (Optional)",
"asset_based.video_title_placeholder": "e.g., Pet Store Year-End Sale",
"asset_based.video_title_help": "Main title for the video, leave empty to hide title",
"asset_based.intent": "Video Intent",
"asset_based.intent_placeholder": "e.g., Promote our pet store's year-end special offers to attract more customers, use a warm and friendly tone",
"asset_based.intent_help": "Describe the purpose, message, and desired style of this video",
"asset_based.duration": "Target Duration (seconds)",
"asset_based.duration_help": "Expected video duration, AI will adjust based on asset count",
"asset_based.duration_label": "Target Duration: {seconds}s",
"asset_based.source.what": "Select the service provider for image analysis",
"asset_based.source.how": "RunningHub is a cloud service requiring API Key; SelfHost uses local ComfyUI",
"asset_based.source.select": "Select Service",
"asset_based.source.runninghub": "☁️ RunningHub (Cloud)",
"asset_based.source.selfhost": "🖥️ SelfHost (Local)",
"asset_based.source.runninghub_hint": "💡 Using RunningHub cloud service for asset analysis",
"asset_based.source.selfhost_hint": "💡 Using local ComfyUI service for asset analysis",
"asset_based.source.runninghub_not_configured": "⚠️ RunningHub API Key not configured",
"asset_based.source.selfhost_not_configured": "⚠️ Local ComfyUI URL not configured",
"asset_based.output.no_assets": "💡 Please upload assets on the left first",
"asset_based.output.ready": "📦 {count} assets ready, you can start generating",
"asset_based.progress.analyzing": "🔍 Analyzing assets...",
"asset_based.progress.analyzing_start": "🔍 Starting to analyze {total} assets...",
"asset_based.progress.analyzing_asset": "🔍 Analyzing asset {current}/{total}: {name}",
"asset_based.progress.analyzing_complete": "✅ Asset analysis complete ({count} total)",
"asset_based.progress.generating_script": "📝 Generating video script...",
"asset_based.progress.script_complete": "✅ Script generation complete",
"asset_based.progress.concat_complete": "✅ Video concatenation complete"
} }
} }

View File

@@ -332,7 +332,44 @@
"batch.error": "错误信息", "batch.error": "错误信息",
"batch.error_detail": "查看详细错误堆栈", "batch.error_detail": "查看详细错误堆栈",
"pipeline.standard.name": "标准视频", "pipeline.standard.name": "标准视频",
"pipeline.demo.name": "演示功能", "pipeline.asset_based.name": "素材视频",
"pipeline.demo.description": "具有自定义布局的演示 Pipeline" "pipeline.asset_based.description": "基于用户上传的素材生成视频",
"asset_based.section.assets": "📦 素材上传",
"asset_based.section.video_info": "📝 视频信息",
"asset_based.section.source": "⚙️ 服务配置",
"asset_based.assets.what": "上传您的图片或视频素材AI 将自动分析并生成视频脚本",
"asset_based.assets.how": "支持 JPG/PNG/GIF/WebP 图片和 MP4/MOV/AVI 等视频格式,建议每个素材清晰且内容相关",
"asset_based.assets.upload": "上传素材",
"asset_based.assets.upload_help": "支持多个图片或视频文件",
"asset_based.assets.count": "✅ 已上传 {count} 个素材",
"asset_based.assets.preview": "📷 素材预览",
"asset_based.assets.empty_hint": "💡 请上传至少一个图片或视频素材",
"asset_based.video_title": "视频标题(选填)",
"asset_based.video_title_placeholder": "例如:宠物店年终大促",
"asset_based.video_title_help": "视频的主标题,留空则不显示标题",
"asset_based.intent": "视频意图",
"asset_based.intent_placeholder": "例如:宣传我们的宠物店年终特惠活动,吸引更多客户到店消费,风格要温馨亲切",
"asset_based.intent_help": "描述这个视频的目的、想传达的信息以及期望的风格",
"asset_based.duration": "目标时长(秒)",
"asset_based.duration_help": "视频的预期时长AI 会根据素材数量和时长进行调整",
"asset_based.duration_label": "目标时长:{seconds} 秒",
"asset_based.source.what": "选择用于图像分析的服务提供商",
"asset_based.source.how": "RunningHub 是云端服务,需配置 API KeySelfHost 是本地 ComfyUI 服务",
"asset_based.source.select": "选择服务",
"asset_based.source.runninghub": "☁️ RunningHub云端",
"asset_based.source.selfhost": "🖥️ SelfHost本地",
"asset_based.source.runninghub_hint": "💡 使用 RunningHub 云端服务分析素材",
"asset_based.source.selfhost_hint": "💡 使用本地 ComfyUI 服务分析素材",
"asset_based.source.runninghub_not_configured": "⚠️ 未配置 RunningHub API Key",
"asset_based.source.selfhost_not_configured": "⚠️ 未配置本地 ComfyUI 地址",
"asset_based.output.no_assets": "💡 请先在左侧上传素材",
"asset_based.output.ready": "📦 已准备好 {count} 个素材,可以开始生成",
"asset_based.progress.analyzing": "🔍 正在分析素材...",
"asset_based.progress.analyzing_start": "🔍 开始分析 {total} 个素材...",
"asset_based.progress.analyzing_asset": "🔍 分析素材 {current}/{total}{name}",
"asset_based.progress.analyzing_complete": "✅ 素材分析完成(共 {count} 个)",
"asset_based.progress.generating_script": "📝 正在生成视频脚本...",
"asset_based.progress.script_complete": "✅ 脚本生成完成",
"asset_based.progress.concat_complete": "✅ 视频合成完成"
} }
} }

View File

@@ -25,7 +25,7 @@ from web.pipelines.base import (
# Import all pipeline UI modules to ensure they register themselves # Import all pipeline UI modules to ensure they register themselves
from web.pipelines import standard from web.pipelines import standard
from web.pipelines import demo from web.pipelines import asset_based
__all__ = [ __all__ = [
"PipelineUI", "PipelineUI",

View File

@@ -0,0 +1,447 @@
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Asset-Based Pipeline UI
Implements the UI for generating videos from user-provided assets.
"""
import os
import time
from pathlib import Path
from typing import Any
import streamlit as st
from loguru import logger
from web.i18n import tr, get_language
from web.pipelines.base import PipelineUI, register_pipeline_ui
from web.components.content_input import render_bgm_section, render_version_info
from web.utils.async_helpers import run_async
from pixelle_video.config import config_manager
from pixelle_video.models.progress import ProgressEvent
class AssetBasedPipelineUI(PipelineUI):
"""
UI for the Asset-Based Video Generation Pipeline.
Generates videos from user-provided assets (images/videos).
"""
name = "asset_based"
icon = "📦"
@property
def display_name(self):
return tr("pipeline.asset_based.name")
@property
def description(self):
return tr("pipeline.asset_based.description")
def render(self, pixelle_video: Any):
# Three-column layout
left_col, middle_col, right_col = st.columns([1, 1, 1])
# ====================================================================
# Left Column: Asset Upload & Video Info
# ====================================================================
with left_col:
asset_params = self._render_asset_input()
bgm_params = render_bgm_section(key_prefix="asset_")
render_version_info()
# ====================================================================
# Middle Column: Video Configuration
# ====================================================================
with middle_col:
config_params = self._render_video_config(pixelle_video)
# ====================================================================
# Right Column: Output Preview
# ====================================================================
with right_col:
# Combine all parameters
video_params = {
"pipeline": self.name,
**asset_params,
**bgm_params,
**config_params
}
self._render_output_preview(pixelle_video, video_params)
def _render_asset_input(self) -> dict:
"""Render asset upload section"""
with st.container(border=True):
st.markdown(f"**{tr('asset_based.section.assets')}**")
with st.expander(tr("help.feature_description"), expanded=False):
st.markdown(f"**{tr('help.what')}**")
st.markdown(tr("asset_based.assets.what"))
st.markdown(f"**{tr('help.how')}**")
st.markdown(tr("asset_based.assets.how"))
# File uploader for multiple files
uploaded_files = st.file_uploader(
tr("asset_based.assets.upload"),
type=["jpg", "jpeg", "png", "gif", "webp", "mp4", "mov", "avi", "mkv", "webm"],
accept_multiple_files=True,
help=tr("asset_based.assets.upload_help"),
key="asset_files"
)
# Save uploaded files to temp directory with unique session ID
asset_paths = []
if uploaded_files:
import uuid
session_id = str(uuid.uuid4()).replace('-', '')[:12]
temp_dir = Path(f"temp/assets_{session_id}")
temp_dir.mkdir(parents=True, exist_ok=True)
for uploaded_file in uploaded_files:
file_path = temp_dir / uploaded_file.name
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
asset_paths.append(str(file_path.absolute()))
st.success(tr("asset_based.assets.count", count=len(asset_paths)))
# Preview uploaded assets
with st.expander(tr("asset_based.assets.preview"), expanded=True):
# Show in a grid (3 columns)
cols = st.columns(3)
for i, (file, path) in enumerate(zip(uploaded_files, asset_paths)):
with cols[i % 3]:
# Check if image or video
ext = Path(path).suffix.lower()
if ext in [".jpg", ".jpeg", ".png", ".gif", ".webp"]:
st.image(file, caption=file.name, use_container_width=True)
elif ext in [".mp4", ".mov", ".avi", ".mkv", ".webm"]:
st.video(file)
st.caption(file.name)
else:
st.info(tr("asset_based.assets.empty_hint"))
# Video title & intent
with st.container(border=True):
st.markdown(f"**{tr('asset_based.section.video_info')}**")
video_title = st.text_input(
tr("asset_based.video_title"),
placeholder=tr("asset_based.video_title_placeholder"),
help=tr("asset_based.video_title_help"),
key="asset_video_title"
)
intent = st.text_area(
tr("asset_based.intent"),
placeholder=tr("asset_based.intent_placeholder"),
help=tr("asset_based.intent_help"),
height=100,
key="asset_intent"
)
return {
"assets": asset_paths,
"video_title": video_title,
"intent": intent if intent else None
}
def _render_video_config(self, pixelle_video: Any) -> dict:
"""Render video configuration section"""
# Duration configuration
with st.container(border=True):
st.markdown(f"**{tr('video.title')}**")
# Duration slider
duration = st.slider(
tr("asset_based.duration"),
min_value=15,
max_value=120,
value=30,
step=5,
help=tr("asset_based.duration_help"),
key="asset_duration"
)
st.caption(tr("asset_based.duration_label", seconds=duration))
# Workflow source selection
with st.container(border=True):
st.markdown(f"**{tr('asset_based.section.source')}**")
with st.expander(tr("help.feature_description"), expanded=False):
st.markdown(f"**{tr('help.what')}**")
st.markdown(tr("asset_based.source.what"))
st.markdown(f"**{tr('help.how')}**")
st.markdown(tr("asset_based.source.how"))
source_options = {
"runninghub": tr("asset_based.source.runninghub"),
"selfhost": tr("asset_based.source.selfhost")
}
# Check if RunningHub API key is configured
comfyui_config = config_manager.get_comfyui_config()
has_runninghub = bool(comfyui_config.get("runninghub_api_key"))
has_selfhost = bool(comfyui_config.get("comfyui_url"))
# Default to available source
if has_runninghub:
default_source_index = 0
elif has_selfhost:
default_source_index = 1
else:
default_source_index = 0
source = st.radio(
tr("asset_based.source.select"),
options=list(source_options.keys()),
format_func=lambda x: source_options[x],
index=default_source_index,
horizontal=True,
key="asset_source",
label_visibility="collapsed"
)
# Show hint based on selection
if source == "runninghub":
if not has_runninghub:
st.warning(tr("asset_based.source.runninghub_not_configured"))
else:
st.info(tr("asset_based.source.runninghub_hint"))
else:
if not has_selfhost:
st.warning(tr("asset_based.source.selfhost_not_configured"))
else:
st.info(tr("asset_based.source.selfhost_hint"))
# TTS configuration
with st.container(border=True):
st.markdown(f"**{tr('section.tts')}**")
# Import voice configuration
from pixelle_video.tts_voices import EDGE_TTS_VOICES, get_voice_display_name
# Get saved voice from config
comfyui_config = config_manager.get_comfyui_config()
tts_config = comfyui_config.get("tts", {})
local_config = tts_config.get("local", {})
saved_voice = local_config.get("voice", "zh-CN-YunjianNeural")
saved_speed = local_config.get("speed", 1.2)
# Build voice options with i18n
voice_options = []
voice_ids = []
default_voice_index = 0
for idx, voice_config in enumerate(EDGE_TTS_VOICES):
voice_id = voice_config["id"]
display_name = get_voice_display_name(voice_id, tr, get_language())
voice_options.append(display_name)
voice_ids.append(voice_id)
if voice_id == saved_voice:
default_voice_index = idx
# Two-column layout
voice_col, speed_col = st.columns([1, 1])
with voice_col:
selected_voice_display = st.selectbox(
tr("tts.voice_selector"),
voice_options,
index=default_voice_index,
key="asset_tts_voice"
)
selected_voice_index = voice_options.index(selected_voice_display)
voice_id = voice_ids[selected_voice_index]
with speed_col:
tts_speed = st.slider(
tr("tts.speed"),
min_value=0.5,
max_value=2.0,
value=saved_speed,
step=0.1,
format="%.1fx",
key="asset_tts_speed"
)
st.caption(tr("tts.speed_label", speed=f"{tts_speed:.1f}"))
return {
"duration": duration,
"source": source,
"voice_id": voice_id,
"tts_speed": tts_speed
}
def _render_output_preview(self, pixelle_video: Any, video_params: dict):
"""Render output preview section"""
with st.container(border=True):
st.markdown(f"**{tr('section.video_generation')}**")
# Check configuration
if not config_manager.validate():
st.warning(tr("settings.not_configured"))
# Check if assets are provided
assets = video_params.get("assets", [])
if not assets:
st.info(tr("asset_based.output.no_assets"))
st.button(
tr("btn.generate"),
type="primary",
use_container_width=True,
disabled=True,
key="asset_generate_disabled"
)
return
# Show asset summary
st.info(tr("asset_based.output.ready", count=len(assets)))
# Generate button
if st.button(tr("btn.generate"), type="primary", use_container_width=True, key="asset_generate"):
# Validate
if not config_manager.validate():
st.error(tr("settings.not_configured"))
st.stop()
# Show progress
progress_bar = st.progress(0)
status_text = st.empty()
start_time = time.time()
try:
# Import pipeline
from pixelle_video.pipelines.asset_based import AssetBasedPipeline
# Create pipeline
pipeline = AssetBasedPipeline(pixelle_video)
# Progress callback
def update_progress(event: ProgressEvent):
if event.event_type == "analyzing_assets":
if event.extra_info == "start":
message = tr("asset_based.progress.analyzing_start", total=event.frame_total)
else:
message = tr("asset_based.progress.analyzing_complete", count=event.frame_total)
elif event.event_type == "analyzing_asset":
message = tr(
"asset_based.progress.analyzing_asset",
current=event.frame_current,
total=event.frame_total,
name=event.extra_info or ""
)
elif event.event_type == "generating_script":
if event.extra_info == "complete":
message = tr("asset_based.progress.script_complete")
else:
message = tr("asset_based.progress.generating_script")
elif event.event_type == "frame_step":
action_key = f"progress.step_{event.action}"
action_text = tr(action_key)
message = tr(
"progress.frame_step",
current=event.frame_current,
total=event.frame_total,
step=event.step,
action=action_text
)
elif event.event_type == "processing_frame":
message = tr(
"progress.frame",
current=event.frame_current,
total=event.frame_total
)
elif event.event_type == "concatenating":
if event.extra_info == "complete":
message = tr("asset_based.progress.concat_complete")
else:
message = tr("progress.concatenating")
elif event.event_type == "completed":
message = tr("progress.completed")
else:
message = tr(f"progress.{event.event_type}")
status_text.text(message)
progress_bar.progress(min(int(event.progress * 100), 99))
# Execute pipeline with progress callback
ctx = run_async(pipeline(
assets=video_params["assets"],
video_title=video_params.get("video_title", ""),
intent=video_params.get("intent"),
duration=video_params.get("duration", 30),
source=video_params.get("source", "runninghub"),
bgm_path=video_params.get("bgm_path"),
bgm_volume=video_params.get("bgm_volume", 0.2),
bgm_mode=video_params.get("bgm_mode", "loop"),
voice_id=video_params.get("voice_id", "zh-CN-YunjianNeural"),
tts_speed=video_params.get("tts_speed", 1.2),
progress_callback=update_progress
))
total_time = time.time() - start_time
progress_bar.progress(100)
status_text.text(tr("status.success"))
# Display result
st.success(tr("status.video_generated", path=ctx.final_video_path))
st.markdown("---")
# Video info
if os.path.exists(ctx.final_video_path):
file_size_mb = os.path.getsize(ctx.final_video_path) / (1024 * 1024)
n_scenes = len(ctx.storyboard.frames) if ctx.storyboard else 0
info_text = (
f"⏱️ {tr('info.generation_time')} {total_time:.1f}s "
f"📦 {file_size_mb:.2f}MB "
f"🎬 {n_scenes}{tr('info.scenes_unit')}"
)
st.caption(info_text)
st.markdown("---")
# Video preview
st.video(ctx.final_video_path)
# Download button
with open(ctx.final_video_path, "rb") as video_file:
video_bytes = video_file.read()
video_filename = os.path.basename(ctx.final_video_path)
st.download_button(
label="⬇️ 下载视频" if get_language() == "zh_CN" else "⬇️ Download Video",
data=video_bytes,
file_name=video_filename,
mime="video/mp4",
use_container_width=True
)
else:
st.error(tr("status.video_not_found", path=ctx.final_video_path))
except Exception as e:
status_text.text("")
progress_bar.empty()
st.error(tr("status.error", error=str(e)))
logger.exception(e)
st.stop()
# Register self
register_pipeline_ui(AssetBasedPipelineUI)

View File

@@ -1,69 +0,0 @@
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Demo Pipeline UI
Implements a custom layout for the Demo Pipeline.
"""
import streamlit as st
from typing import Any
from web.i18n import tr
from web.pipelines.base import PipelineUI, register_pipeline_ui
class DemoPipelineUI(PipelineUI):
"""
Demo UI to verify the full-page plugin system.
Uses a completely different layout (2 columns).
"""
name = "demo"
icon = ""
@property
def display_name(self):
return tr("pipeline.demo.name")
@property
def description(self):
return tr("pipeline.demo.description")
def render(self, pixelle_video: Any):
st.markdown("### ✨ Demo Pipeline Custom Layout")
st.info("This pipeline uses a custom 2-column layout, demonstrating full UI control.")
col1, col2 = st.columns([2, 1])
with col1:
with st.container(border=True):
st.subheader("1. Input")
topic = st.text_input("Enter Topic", placeholder="e.g. AI News")
mood = st.selectbox("Mood", ["Happy", "Serious", "Funny"])
st.markdown("---")
st.subheader("2. Settings")
# Simplified settings for demo
n_scenes = st.slider("Scenes", 3, 10, 5)
with col2:
with st.container(border=True):
st.subheader("3. Generate")
if st.button("🚀 Generate Demo Video", type="primary", use_container_width=True):
# Mock generation logic or call backend
st.success(f"Generating video for '{topic}' ({mood}) with {n_scenes} scenes...")
st.balloons()
# Register self
register_pipeline_ui(DemoPipelineUI)