开发基于图片素材生成视频的webui功能

This commit is contained in:
puke
2025-12-04 11:14:14 +08:00
parent ea784e0d06
commit 7425b9d23d
8 changed files with 896 additions and 104 deletions

View File

@@ -169,7 +169,7 @@ def render_content_input():
}
def render_bgm_section():
def render_bgm_section(key_prefix=""):
"""Render BGM selection section"""
with st.container(border=True):
st.markdown(f"**{tr('section.bgm')}**")
@@ -204,7 +204,8 @@ def render_bgm_section():
"BGM",
bgm_options,
index=default_index,
label_visibility="collapsed"
label_visibility="collapsed",
key=f"{key_prefix}bgm_selector"
)
# BGM volume slider (only show when BGM is selected)
@@ -216,7 +217,7 @@ def render_bgm_section():
value=0.2,
step=0.01,
format="%.2f",
key="bgm_volume_slider",
key=f"{key_prefix}bgm_volume_slider",
help=tr("bgm.volume_help")
)
else:
@@ -224,7 +225,7 @@ def render_bgm_section():
# BGM preview button (only if BGM is not "None")
if bgm_choice != tr("bgm.none"):
if st.button(tr("bgm.preview"), key="preview_bgm", use_container_width=True):
if st.button(tr("bgm.preview"), key=f"{key_prefix}preview_bgm", use_container_width=True):
from pixelle_video.utils.os_util import get_resource_path, resource_exists
try:
if resource_exists("bgm", bgm_choice):

View File

@@ -332,7 +332,44 @@
"batch.error": "Error",
"batch.error_detail": "View detailed error stack",
"pipeline.standard.name": "Standard Video",
"pipeline.demo.name": "Demo Feature",
"pipeline.demo.description": "A demo pipeline with a custom layout"
"pipeline.asset_based.name": "Asset-Based Video",
"pipeline.asset_based.description": "Generate videos from user-provided assets",
"asset_based.section.assets": "📦 Asset Upload",
"asset_based.section.video_info": "📝 Video Information",
"asset_based.section.source": "⚙️ Service Configuration",
"asset_based.assets.what": "Upload your images or video assets, AI will automatically analyze them and generate a video script",
"asset_based.assets.how": "Supports JPG/PNG/GIF/WebP images and MP4/MOV/AVI videos. Each asset should be clear and relevant",
"asset_based.assets.upload": "Upload Assets",
"asset_based.assets.upload_help": "Supports multiple image or video files",
"asset_based.assets.count": "✅ Uploaded {count} assets",
"asset_based.assets.preview": "📷 Asset Preview",
"asset_based.assets.empty_hint": "💡 Please upload at least one image or video asset",
"asset_based.video_title": "Video Title (Optional)",
"asset_based.video_title_placeholder": "e.g., Pet Store Year-End Sale",
"asset_based.video_title_help": "Main title for the video, leave empty to hide title",
"asset_based.intent": "Video Intent",
"asset_based.intent_placeholder": "e.g., Promote our pet store's year-end special offers to attract more customers, use a warm and friendly tone",
"asset_based.intent_help": "Describe the purpose, message, and desired style of this video",
"asset_based.duration": "Target Duration (seconds)",
"asset_based.duration_help": "Expected video duration, AI will adjust based on asset count",
"asset_based.duration_label": "Target Duration: {seconds}s",
"asset_based.source.what": "Select the service provider for image analysis",
"asset_based.source.how": "RunningHub is a cloud service requiring API Key; SelfHost uses local ComfyUI",
"asset_based.source.select": "Select Service",
"asset_based.source.runninghub": "☁️ RunningHub (Cloud)",
"asset_based.source.selfhost": "🖥️ SelfHost (Local)",
"asset_based.source.runninghub_hint": "💡 Using RunningHub cloud service for asset analysis",
"asset_based.source.selfhost_hint": "💡 Using local ComfyUI service for asset analysis",
"asset_based.source.runninghub_not_configured": "⚠️ RunningHub API Key not configured",
"asset_based.source.selfhost_not_configured": "⚠️ Local ComfyUI URL not configured",
"asset_based.output.no_assets": "💡 Please upload assets on the left first",
"asset_based.output.ready": "📦 {count} assets ready, you can start generating",
"asset_based.progress.analyzing": "🔍 Analyzing assets...",
"asset_based.progress.analyzing_start": "🔍 Starting to analyze {total} assets...",
"asset_based.progress.analyzing_asset": "🔍 Analyzing asset {current}/{total}: {name}",
"asset_based.progress.analyzing_complete": "✅ Asset analysis complete ({count} total)",
"asset_based.progress.generating_script": "📝 Generating video script...",
"asset_based.progress.script_complete": "✅ Script generation complete",
"asset_based.progress.concat_complete": "✅ Video concatenation complete"
}
}

View File

@@ -332,7 +332,44 @@
"batch.error": "错误信息",
"batch.error_detail": "查看详细错误堆栈",
"pipeline.standard.name": "标准视频",
"pipeline.demo.name": "演示功能",
"pipeline.demo.description": "具有自定义布局的演示 Pipeline"
"pipeline.asset_based.name": "素材视频",
"pipeline.asset_based.description": "基于用户上传的素材生成视频",
"asset_based.section.assets": "📦 素材上传",
"asset_based.section.video_info": "📝 视频信息",
"asset_based.section.source": "⚙️ 服务配置",
"asset_based.assets.what": "上传您的图片或视频素材AI 将自动分析并生成视频脚本",
"asset_based.assets.how": "支持 JPG/PNG/GIF/WebP 图片和 MP4/MOV/AVI 等视频格式,建议每个素材清晰且内容相关",
"asset_based.assets.upload": "上传素材",
"asset_based.assets.upload_help": "支持多个图片或视频文件",
"asset_based.assets.count": "✅ 已上传 {count} 个素材",
"asset_based.assets.preview": "📷 素材预览",
"asset_based.assets.empty_hint": "💡 请上传至少一个图片或视频素材",
"asset_based.video_title": "视频标题(选填)",
"asset_based.video_title_placeholder": "例如:宠物店年终大促",
"asset_based.video_title_help": "视频的主标题,留空则不显示标题",
"asset_based.intent": "视频意图",
"asset_based.intent_placeholder": "例如:宣传我们的宠物店年终特惠活动,吸引更多客户到店消费,风格要温馨亲切",
"asset_based.intent_help": "描述这个视频的目的、想传达的信息以及期望的风格",
"asset_based.duration": "目标时长(秒)",
"asset_based.duration_help": "视频的预期时长AI 会根据素材数量和时长进行调整",
"asset_based.duration_label": "目标时长:{seconds} 秒",
"asset_based.source.what": "选择用于图像分析的服务提供商",
"asset_based.source.how": "RunningHub 是云端服务,需配置 API KeySelfHost 是本地 ComfyUI 服务",
"asset_based.source.select": "选择服务",
"asset_based.source.runninghub": "☁️ RunningHub云端",
"asset_based.source.selfhost": "🖥️ SelfHost本地",
"asset_based.source.runninghub_hint": "💡 使用 RunningHub 云端服务分析素材",
"asset_based.source.selfhost_hint": "💡 使用本地 ComfyUI 服务分析素材",
"asset_based.source.runninghub_not_configured": "⚠️ 未配置 RunningHub API Key",
"asset_based.source.selfhost_not_configured": "⚠️ 未配置本地 ComfyUI 地址",
"asset_based.output.no_assets": "💡 请先在左侧上传素材",
"asset_based.output.ready": "📦 已准备好 {count} 个素材,可以开始生成",
"asset_based.progress.analyzing": "🔍 正在分析素材...",
"asset_based.progress.analyzing_start": "🔍 开始分析 {total} 个素材...",
"asset_based.progress.analyzing_asset": "🔍 分析素材 {current}/{total}{name}",
"asset_based.progress.analyzing_complete": "✅ 素材分析完成(共 {count} 个)",
"asset_based.progress.generating_script": "📝 正在生成视频脚本...",
"asset_based.progress.script_complete": "✅ 脚本生成完成",
"asset_based.progress.concat_complete": "✅ 视频合成完成"
}
}

View File

@@ -25,7 +25,7 @@ from web.pipelines.base import (
# Import all pipeline UI modules to ensure they register themselves
from web.pipelines import standard
from web.pipelines import demo
from web.pipelines import asset_based
__all__ = [
"PipelineUI",

View File

@@ -0,0 +1,447 @@
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Asset-Based Pipeline UI
Implements the UI for generating videos from user-provided assets.
"""
import os
import time
from pathlib import Path
from typing import Any
import streamlit as st
from loguru import logger
from web.i18n import tr, get_language
from web.pipelines.base import PipelineUI, register_pipeline_ui
from web.components.content_input import render_bgm_section, render_version_info
from web.utils.async_helpers import run_async
from pixelle_video.config import config_manager
from pixelle_video.models.progress import ProgressEvent
class AssetBasedPipelineUI(PipelineUI):
"""
UI for the Asset-Based Video Generation Pipeline.
Generates videos from user-provided assets (images/videos).
"""
name = "asset_based"
icon = "📦"
@property
def display_name(self):
return tr("pipeline.asset_based.name")
@property
def description(self):
return tr("pipeline.asset_based.description")
def render(self, pixelle_video: Any):
# Three-column layout
left_col, middle_col, right_col = st.columns([1, 1, 1])
# ====================================================================
# Left Column: Asset Upload & Video Info
# ====================================================================
with left_col:
asset_params = self._render_asset_input()
bgm_params = render_bgm_section(key_prefix="asset_")
render_version_info()
# ====================================================================
# Middle Column: Video Configuration
# ====================================================================
with middle_col:
config_params = self._render_video_config(pixelle_video)
# ====================================================================
# Right Column: Output Preview
# ====================================================================
with right_col:
# Combine all parameters
video_params = {
"pipeline": self.name,
**asset_params,
**bgm_params,
**config_params
}
self._render_output_preview(pixelle_video, video_params)
def _render_asset_input(self) -> dict:
"""Render asset upload section"""
with st.container(border=True):
st.markdown(f"**{tr('asset_based.section.assets')}**")
with st.expander(tr("help.feature_description"), expanded=False):
st.markdown(f"**{tr('help.what')}**")
st.markdown(tr("asset_based.assets.what"))
st.markdown(f"**{tr('help.how')}**")
st.markdown(tr("asset_based.assets.how"))
# File uploader for multiple files
uploaded_files = st.file_uploader(
tr("asset_based.assets.upload"),
type=["jpg", "jpeg", "png", "gif", "webp", "mp4", "mov", "avi", "mkv", "webm"],
accept_multiple_files=True,
help=tr("asset_based.assets.upload_help"),
key="asset_files"
)
# Save uploaded files to temp directory with unique session ID
asset_paths = []
if uploaded_files:
import uuid
session_id = str(uuid.uuid4()).replace('-', '')[:12]
temp_dir = Path(f"temp/assets_{session_id}")
temp_dir.mkdir(parents=True, exist_ok=True)
for uploaded_file in uploaded_files:
file_path = temp_dir / uploaded_file.name
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
asset_paths.append(str(file_path.absolute()))
st.success(tr("asset_based.assets.count", count=len(asset_paths)))
# Preview uploaded assets
with st.expander(tr("asset_based.assets.preview"), expanded=True):
# Show in a grid (3 columns)
cols = st.columns(3)
for i, (file, path) in enumerate(zip(uploaded_files, asset_paths)):
with cols[i % 3]:
# Check if image or video
ext = Path(path).suffix.lower()
if ext in [".jpg", ".jpeg", ".png", ".gif", ".webp"]:
st.image(file, caption=file.name, use_container_width=True)
elif ext in [".mp4", ".mov", ".avi", ".mkv", ".webm"]:
st.video(file)
st.caption(file.name)
else:
st.info(tr("asset_based.assets.empty_hint"))
# Video title & intent
with st.container(border=True):
st.markdown(f"**{tr('asset_based.section.video_info')}**")
video_title = st.text_input(
tr("asset_based.video_title"),
placeholder=tr("asset_based.video_title_placeholder"),
help=tr("asset_based.video_title_help"),
key="asset_video_title"
)
intent = st.text_area(
tr("asset_based.intent"),
placeholder=tr("asset_based.intent_placeholder"),
help=tr("asset_based.intent_help"),
height=100,
key="asset_intent"
)
return {
"assets": asset_paths,
"video_title": video_title,
"intent": intent if intent else None
}
def _render_video_config(self, pixelle_video: Any) -> dict:
"""Render video configuration section"""
# Duration configuration
with st.container(border=True):
st.markdown(f"**{tr('video.title')}**")
# Duration slider
duration = st.slider(
tr("asset_based.duration"),
min_value=15,
max_value=120,
value=30,
step=5,
help=tr("asset_based.duration_help"),
key="asset_duration"
)
st.caption(tr("asset_based.duration_label", seconds=duration))
# Workflow source selection
with st.container(border=True):
st.markdown(f"**{tr('asset_based.section.source')}**")
with st.expander(tr("help.feature_description"), expanded=False):
st.markdown(f"**{tr('help.what')}**")
st.markdown(tr("asset_based.source.what"))
st.markdown(f"**{tr('help.how')}**")
st.markdown(tr("asset_based.source.how"))
source_options = {
"runninghub": tr("asset_based.source.runninghub"),
"selfhost": tr("asset_based.source.selfhost")
}
# Check if RunningHub API key is configured
comfyui_config = config_manager.get_comfyui_config()
has_runninghub = bool(comfyui_config.get("runninghub_api_key"))
has_selfhost = bool(comfyui_config.get("comfyui_url"))
# Default to available source
if has_runninghub:
default_source_index = 0
elif has_selfhost:
default_source_index = 1
else:
default_source_index = 0
source = st.radio(
tr("asset_based.source.select"),
options=list(source_options.keys()),
format_func=lambda x: source_options[x],
index=default_source_index,
horizontal=True,
key="asset_source",
label_visibility="collapsed"
)
# Show hint based on selection
if source == "runninghub":
if not has_runninghub:
st.warning(tr("asset_based.source.runninghub_not_configured"))
else:
st.info(tr("asset_based.source.runninghub_hint"))
else:
if not has_selfhost:
st.warning(tr("asset_based.source.selfhost_not_configured"))
else:
st.info(tr("asset_based.source.selfhost_hint"))
# TTS configuration
with st.container(border=True):
st.markdown(f"**{tr('section.tts')}**")
# Import voice configuration
from pixelle_video.tts_voices import EDGE_TTS_VOICES, get_voice_display_name
# Get saved voice from config
comfyui_config = config_manager.get_comfyui_config()
tts_config = comfyui_config.get("tts", {})
local_config = tts_config.get("local", {})
saved_voice = local_config.get("voice", "zh-CN-YunjianNeural")
saved_speed = local_config.get("speed", 1.2)
# Build voice options with i18n
voice_options = []
voice_ids = []
default_voice_index = 0
for idx, voice_config in enumerate(EDGE_TTS_VOICES):
voice_id = voice_config["id"]
display_name = get_voice_display_name(voice_id, tr, get_language())
voice_options.append(display_name)
voice_ids.append(voice_id)
if voice_id == saved_voice:
default_voice_index = idx
# Two-column layout
voice_col, speed_col = st.columns([1, 1])
with voice_col:
selected_voice_display = st.selectbox(
tr("tts.voice_selector"),
voice_options,
index=default_voice_index,
key="asset_tts_voice"
)
selected_voice_index = voice_options.index(selected_voice_display)
voice_id = voice_ids[selected_voice_index]
with speed_col:
tts_speed = st.slider(
tr("tts.speed"),
min_value=0.5,
max_value=2.0,
value=saved_speed,
step=0.1,
format="%.1fx",
key="asset_tts_speed"
)
st.caption(tr("tts.speed_label", speed=f"{tts_speed:.1f}"))
return {
"duration": duration,
"source": source,
"voice_id": voice_id,
"tts_speed": tts_speed
}
def _render_output_preview(self, pixelle_video: Any, video_params: dict):
"""Render output preview section"""
with st.container(border=True):
st.markdown(f"**{tr('section.video_generation')}**")
# Check configuration
if not config_manager.validate():
st.warning(tr("settings.not_configured"))
# Check if assets are provided
assets = video_params.get("assets", [])
if not assets:
st.info(tr("asset_based.output.no_assets"))
st.button(
tr("btn.generate"),
type="primary",
use_container_width=True,
disabled=True,
key="asset_generate_disabled"
)
return
# Show asset summary
st.info(tr("asset_based.output.ready", count=len(assets)))
# Generate button
if st.button(tr("btn.generate"), type="primary", use_container_width=True, key="asset_generate"):
# Validate
if not config_manager.validate():
st.error(tr("settings.not_configured"))
st.stop()
# Show progress
progress_bar = st.progress(0)
status_text = st.empty()
start_time = time.time()
try:
# Import pipeline
from pixelle_video.pipelines.asset_based import AssetBasedPipeline
# Create pipeline
pipeline = AssetBasedPipeline(pixelle_video)
# Progress callback
def update_progress(event: ProgressEvent):
if event.event_type == "analyzing_assets":
if event.extra_info == "start":
message = tr("asset_based.progress.analyzing_start", total=event.frame_total)
else:
message = tr("asset_based.progress.analyzing_complete", count=event.frame_total)
elif event.event_type == "analyzing_asset":
message = tr(
"asset_based.progress.analyzing_asset",
current=event.frame_current,
total=event.frame_total,
name=event.extra_info or ""
)
elif event.event_type == "generating_script":
if event.extra_info == "complete":
message = tr("asset_based.progress.script_complete")
else:
message = tr("asset_based.progress.generating_script")
elif event.event_type == "frame_step":
action_key = f"progress.step_{event.action}"
action_text = tr(action_key)
message = tr(
"progress.frame_step",
current=event.frame_current,
total=event.frame_total,
step=event.step,
action=action_text
)
elif event.event_type == "processing_frame":
message = tr(
"progress.frame",
current=event.frame_current,
total=event.frame_total
)
elif event.event_type == "concatenating":
if event.extra_info == "complete":
message = tr("asset_based.progress.concat_complete")
else:
message = tr("progress.concatenating")
elif event.event_type == "completed":
message = tr("progress.completed")
else:
message = tr(f"progress.{event.event_type}")
status_text.text(message)
progress_bar.progress(min(int(event.progress * 100), 99))
# Execute pipeline with progress callback
ctx = run_async(pipeline(
assets=video_params["assets"],
video_title=video_params.get("video_title", ""),
intent=video_params.get("intent"),
duration=video_params.get("duration", 30),
source=video_params.get("source", "runninghub"),
bgm_path=video_params.get("bgm_path"),
bgm_volume=video_params.get("bgm_volume", 0.2),
bgm_mode=video_params.get("bgm_mode", "loop"),
voice_id=video_params.get("voice_id", "zh-CN-YunjianNeural"),
tts_speed=video_params.get("tts_speed", 1.2),
progress_callback=update_progress
))
total_time = time.time() - start_time
progress_bar.progress(100)
status_text.text(tr("status.success"))
# Display result
st.success(tr("status.video_generated", path=ctx.final_video_path))
st.markdown("---")
# Video info
if os.path.exists(ctx.final_video_path):
file_size_mb = os.path.getsize(ctx.final_video_path) / (1024 * 1024)
n_scenes = len(ctx.storyboard.frames) if ctx.storyboard else 0
info_text = (
f"⏱️ {tr('info.generation_time')} {total_time:.1f}s "
f"📦 {file_size_mb:.2f}MB "
f"🎬 {n_scenes}{tr('info.scenes_unit')}"
)
st.caption(info_text)
st.markdown("---")
# Video preview
st.video(ctx.final_video_path)
# Download button
with open(ctx.final_video_path, "rb") as video_file:
video_bytes = video_file.read()
video_filename = os.path.basename(ctx.final_video_path)
st.download_button(
label="⬇️ 下载视频" if get_language() == "zh_CN" else "⬇️ Download Video",
data=video_bytes,
file_name=video_filename,
mime="video/mp4",
use_container_width=True
)
else:
st.error(tr("status.video_not_found", path=ctx.final_video_path))
except Exception as e:
status_text.text("")
progress_bar.empty()
st.error(tr("status.error", error=str(e)))
logger.exception(e)
st.stop()
# Register self
register_pipeline_ui(AssetBasedPipelineUI)

View File

@@ -1,69 +0,0 @@
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Demo Pipeline UI
Implements a custom layout for the Demo Pipeline.
"""
import streamlit as st
from typing import Any
from web.i18n import tr
from web.pipelines.base import PipelineUI, register_pipeline_ui
class DemoPipelineUI(PipelineUI):
"""
Demo UI to verify the full-page plugin system.
Uses a completely different layout (2 columns).
"""
name = "demo"
icon = ""
@property
def display_name(self):
return tr("pipeline.demo.name")
@property
def description(self):
return tr("pipeline.demo.description")
def render(self, pixelle_video: Any):
st.markdown("### ✨ Demo Pipeline Custom Layout")
st.info("This pipeline uses a custom 2-column layout, demonstrating full UI control.")
col1, col2 = st.columns([2, 1])
with col1:
with st.container(border=True):
st.subheader("1. Input")
topic = st.text_input("Enter Topic", placeholder="e.g. AI News")
mood = st.selectbox("Mood", ["Happy", "Serious", "Funny"])
st.markdown("---")
st.subheader("2. Settings")
# Simplified settings for demo
n_scenes = st.slider("Scenes", 3, 10, 5)
with col2:
with st.container(border=True):
st.subheader("3. Generate")
if st.button("🚀 Generate Demo Video", type="primary", use_container_width=True):
# Mock generation logic or call backend
st.success(f"Generating video for '{topic}' ({mood}) with {n_scenes} scenes...")
st.balloons()
# Register self
register_pipeline_ui(DemoPipelineUI)