diff --git a/pixelle_video/pipelines/asset_based.py b/pixelle_video/pipelines/asset_based.py index 082ddb8..88c511b 100644 --- a/pixelle_video/pipelines/asset_based.py +++ b/pixelle_video/pipelines/asset_based.py @@ -27,23 +27,27 @@ Example: result = await pipeline( assets=["/path/img1.jpg", "/path/img2.jpg"], video_title="Pet Store Year-End Sale", - style="warm and friendly", + intent="Promote our pet store's year-end sale with a warm and friendly tone", duration=30 ) """ -from typing import List, Dict, Any, Optional +from typing import List, Dict, Any, Optional, Callable from pathlib import Path from loguru import logger from pydantic import BaseModel, Field from pixelle_video.pipelines.linear import LinearVideoPipeline, PipelineContext +from pixelle_video.models.progress import ProgressEvent from pixelle_video.utils.os_util import ( create_task_output_dir, get_task_final_video_path ) +# Type alias for progress callback +ProgressCallback = Optional[Callable[[ProgressEvent], None]] + # ==================== Structured Output Models ==================== @@ -82,12 +86,12 @@ class AssetBasedPipeline(LinearVideoPipeline): assets: List[str], video_title: str = "", intent: Optional[str] = None, - style: str = "professional and engaging", duration: int = 30, source: str = "runninghub", bgm_path: Optional[str] = None, bgm_volume: float = 0.2, bgm_mode: str = "loop", + progress_callback: ProgressCallback = None, **kwargs ) -> PipelineContext: """ @@ -97,12 +101,12 @@ class AssetBasedPipeline(LinearVideoPipeline): assets: List of asset file paths video_title: Video title intent: Video intent/purpose (defaults to video_title) - style: Video style duration: Target duration in seconds source: Workflow source ("runninghub" or "selfhost") bgm_path: Path to background music file (optional) bgm_volume: BGM volume (0.0-1.0, default 0.2) bgm_mode: BGM mode ("loop" or "once", default "loop") + progress_callback: Optional callback for progress updates **kwargs: Additional parameters Returns: @@ -110,6 +114,9 @@ class AssetBasedPipeline(LinearVideoPipeline): """ from pixelle_video.pipelines.linear import PipelineContext + # Store progress callback + self._progress_callback = progress_callback + # Create custom context with asset-specific parameters ctx = PipelineContext( input_text=intent or video_title, # Use intent or title as input_text @@ -117,7 +124,6 @@ class AssetBasedPipeline(LinearVideoPipeline): "assets": assets, "video_title": video_title, "intent": intent or video_title, - "style": style, "duration": duration, "source": source, "bgm_path": bgm_path, @@ -147,6 +153,11 @@ class AssetBasedPipeline(LinearVideoPipeline): await self.handle_exception(ctx, e) raise + def _emit_progress(self, event: ProgressEvent): + """Emit progress event to callback if available""" + if self._progress_callback: + self._progress_callback(event) + async def setup_environment(self, context: PipelineContext) -> PipelineContext: """ Analyze uploaded assets and build asset index @@ -172,7 +183,17 @@ class AssetBasedPipeline(LinearVideoPipeline): if not assets: raise ValueError("No assets provided. Please upload at least one image or video.") - logger.info(f"Found {len(assets)} assets to analyze") + total_assets = len(assets) + logger.info(f"Found {total_assets} assets to analyze") + + # Emit initial progress (0-15% for asset analysis) + self._emit_progress(ProgressEvent( + event_type="analyzing_assets", + progress=0.01, + frame_current=0, + frame_total=total_assets, + extra_info="start" + )) self.asset_index = {} @@ -183,7 +204,17 @@ class AssetBasedPipeline(LinearVideoPipeline): logger.warning(f"Asset not found: {asset_path}") continue - logger.info(f"Analyzing asset {i}/{len(assets)}: {asset_path_obj.name}") + logger.info(f"Analyzing asset {i}/{total_assets}: {asset_path_obj.name}") + + # Emit progress for this asset + progress = 0.01 + (i - 1) / total_assets * 0.14 # 1% - 15% + self._emit_progress(ProgressEvent( + event_type="analyzing_asset", + progress=progress, + frame_current=i, + frame_total=total_assets, + extra_info=asset_path_obj.name + )) # Determine asset type asset_type = self._get_asset_type(asset_path_obj) @@ -222,34 +253,35 @@ class AssetBasedPipeline(LinearVideoPipeline): # Store asset index in context context.asset_index = self.asset_index + # Emit completion of asset analysis + self._emit_progress(ProgressEvent( + event_type="analyzing_assets", + progress=0.15, + frame_current=total_assets, + frame_total=total_assets, + extra_info="complete" + )) + return context async def determine_title(self, context: PipelineContext) -> PipelineContext: """ - Use user-provided title or generate one via LLM + Use user-provided title if available, otherwise leave empty Args: context: Pipeline context Returns: - Updated context with title + Updated context with title (may be empty) """ - from pixelle_video.utils.content_generators import generate_title - title = context.request.get("video_title") if title: context.title = title logger.info(f"📝 Video title: {title} (user-specified)") else: - # Generate title from intent using LLM - intent = context.request.get("intent", context.input_text) - context.title = await generate_title( - self.core.llm, - content=intent, - strategy="llm" - ) - logger.info(f"📝 Video title: {context.title} (LLM-generated)") + context.title = "" + logger.info(f"📝 No video title specified (will be hidden in template)") return context @@ -267,10 +299,16 @@ class AssetBasedPipeline(LinearVideoPipeline): """ logger.info("🤖 Generating video script with LLM...") + # Emit progress for script generation (15% - 25%) + self._emit_progress(ProgressEvent( + event_type="generating_script", + progress=0.16 + )) + # Build prompt for LLM - intent = context.request.get("intent", context.title) - style = context.request.get("style", "professional and engaging") + intent = context.request.get("intent", context.input_text) duration = context.request.get("duration", 30) + title = context.title # May be empty if user didn't provide one # Prepare asset descriptions with full paths for LLM to reference asset_info = [] @@ -279,11 +317,13 @@ class AssetBasedPipeline(LinearVideoPipeline): assets_text = "\n".join(asset_info) + # Build title section for prompt (only if title is provided) + title_section = f"- Video Title: {title}\n" if title else "" + prompt = f"""You are a video script writer. Generate a {duration}-second video script. ## Requirements -- Intent: {intent} -- Style: {style} +{title_section}- Intent: {intent} - Target Duration: {duration} seconds ## Available Assets (use the exact path in your response) @@ -295,6 +335,7 @@ class AssetBasedPipeline(LinearVideoPipeline): 3. Each scene can have 1-5 narration sentences 4. Try to use all available assets, but it's OK to reuse if needed 5. Total duration of all scenes should be approximately {duration} seconds +{f"6. The narrations should align with the video title: {title}" if title else ""} ## Output Requirements For each scene, provide: @@ -337,6 +378,13 @@ Generate the video script now:""" logger.success(f"✅ Generated script with {len(context.script)} scenes") + # Emit progress after script generation + self._emit_progress(ProgressEvent( + event_type="generating_script", + progress=0.25, + extra_info="complete" + )) + # Log script preview for scene in context.script: narrations = scene.get("narrations", []) @@ -413,7 +461,7 @@ Generate the video script now:""" context.narrations = all_narrations # Get template dimensions - template_name = context.params.get("frame_template", "1080x1920/image_default.html") + template_name = "1080x1920/image_pure.html" # Extract dimensions from template name (e.g., "1080x1920") try: dims = template_name.split("/")[0].split("x") @@ -492,9 +540,25 @@ Generate the video script now:""" storyboard = context.storyboard config = context.config + total_frames = len(storyboard.frames) + + # Progress range: 30% - 85% for frame production + base_progress = 0.30 + progress_range = 0.55 # 85% - 30% for i, frame in enumerate(storyboard.frames, 1): - logger.info(f"Producing scene {i}/{len(storyboard.frames)}...") + logger.info(f"Producing scene {i}/{total_frames}...") + + # Emit progress for this frame (each frame has 4 steps: audio, combine, duration, compose) + frame_progress = base_progress + (i - 1) / total_frames * progress_range + self._emit_progress(ProgressEvent( + event_type="frame_step", + progress=frame_progress, + frame_current=i, + frame_total=total_frames, + step=1, + action="audio" + )) # Get scene data with narrations scene = frame._scene_data @@ -524,6 +588,17 @@ Generate the video script now:""" if len(narration_audios) > 1: from pixelle_video.utils.os_util import get_task_frame_path + # Emit progress for combining audio + frame_progress = base_progress + ((i - 1) + 0.25) / total_frames * progress_range + self._emit_progress(ProgressEvent( + event_type="frame_step", + progress=frame_progress, + frame_current=i, + frame_total=total_frames, + step=2, + action="audio" + )) + combined_audio_path = Path(context.task_dir) / "frames" / f"{i:02d}_audio.mp3" # Use FFmpeg to concatenate audio files @@ -564,6 +639,17 @@ Generate the video script now:""" # Since we already have the audio and image, we bypass some steps # by manually calling the composition steps + # Emit progress for duration calculation + frame_progress = base_progress + ((i - 1) + 0.5) / total_frames * progress_range + self._emit_progress(ProgressEvent( + event_type="frame_step", + progress=frame_progress, + frame_current=i, + frame_total=total_frames, + step=3, + action="compose" + )) + # Get audio duration for frame duration import subprocess duration_cmd = [ @@ -576,16 +662,35 @@ Generate the video script now:""" duration_result = subprocess.run(duration_cmd, capture_output=True, text=True, check=True) frame.duration = float(duration_result.stdout.strip()) + # Emit progress for video composition + frame_progress = base_progress + ((i - 1) + 0.75) / total_frames * progress_range + self._emit_progress(ProgressEvent( + event_type="frame_step", + progress=frame_progress, + frame_current=i, + frame_total=total_frames, + step=4, + action="video" + )) + # Use FrameProcessor for proper composition processed_frame = await self.core.frame_processor( frame=frame, storyboard=storyboard, config=config, - total_frames=len(storyboard.frames) + total_frames=total_frames ) logger.success(f"✅ Scene {i} complete") + # Emit completion of frame production + self._emit_progress(ProgressEvent( + event_type="processing_frame", + progress=0.85, + frame_current=total_frames, + frame_total=total_frames + )) + return context async def post_production(self, context: PipelineContext) -> PipelineContext: @@ -600,6 +705,12 @@ Generate the video script now:""" """ logger.info("🎞️ Concatenating scenes...") + # Emit progress for concatenation (85% - 95%) + self._emit_progress(ProgressEvent( + event_type="concatenating", + progress=0.86 + )) + # Collect video segments from storyboard frames scene_videos = [frame.video_segment_path for frame in context.storyboard.frames] @@ -626,6 +737,13 @@ Generate the video script now:""" logger.success(f"✅ Final video: {final_video_path}") + # Emit completion of concatenation + self._emit_progress(ProgressEvent( + event_type="concatenating", + progress=0.95, + extra_info="complete" + )) + return context async def finalize(self, context: PipelineContext) -> PipelineContext: @@ -641,8 +759,84 @@ Generate the video script now:""" logger.success(f"🎉 Asset-based video generation complete!") logger.info(f"Video: {context.final_video_path}") + # Emit completion + self._emit_progress(ProgressEvent( + event_type="completed", + progress=1.0 + )) + + # Persist metadata for history tracking + await self._persist_task_data(context) + return context + async def _persist_task_data(self, ctx: PipelineContext): + """ + Persist task metadata and storyboard to filesystem for history tracking + """ + from pathlib import Path + + try: + storyboard = ctx.storyboard + task_id = ctx.task_id + + if not task_id: + logger.warning("No task_id in context, skipping persistence") + return + + # Get file size + video_path_obj = Path(ctx.final_video_path) + file_size = video_path_obj.stat().st_size if video_path_obj.exists() else 0 + + # Build metadata + input_params = { + "text": ctx.input_text, + "mode": "asset_based", + "title": ctx.title or "", + "n_scenes": len(storyboard.frames) if storyboard else 0, + "assets": ctx.request.get("assets", []), + "intent": ctx.request.get("intent"), + "duration": ctx.request.get("duration"), + "source": ctx.request.get("source"), + "voice_id": ctx.request.get("voice_id"), + "tts_speed": ctx.request.get("tts_speed"), + } + + metadata = { + "task_id": task_id, + "created_at": storyboard.created_at.isoformat() if storyboard and storyboard.created_at else None, + "completed_at": storyboard.completed_at.isoformat() if storyboard and storyboard.completed_at else None, + "status": "completed", + + "input": input_params, + + "result": { + "video_path": ctx.final_video_path, + "duration": storyboard.total_duration if storyboard else 0, + "file_size": file_size, + "n_frames": len(storyboard.frames) if storyboard else 0 + }, + + "config": { + "llm_model": self.core.config.get("llm", {}).get("model", "unknown"), + "llm_base_url": self.core.config.get("llm", {}).get("base_url", "unknown"), + "source": ctx.request.get("source", "runninghub"), + } + } + + # Save metadata + await self.core.persistence.save_task_metadata(task_id, metadata) + logger.info(f"💾 Saved task metadata: {task_id}") + + # Save storyboard + if storyboard: + await self.core.persistence.save_storyboard(task_id, storyboard) + logger.info(f"💾 Saved storyboard: {task_id}") + + except Exception as e: + logger.error(f"Failed to persist task data: {e}") + # Don't raise - persistence failure shouldn't break video generation + # Helper methods def _get_asset_type(self, path: Path) -> str: diff --git a/templates/1080x1920/image_pure.html b/templates/1080x1920/image_pure.html new file mode 100644 index 0000000..880d42f --- /dev/null +++ b/templates/1080x1920/image_pure.html @@ -0,0 +1,145 @@ + + + + + + + + + +
+ +
+ Background +
+ + +
+ + +
+ +
+ {{title}} +
+ + +
+ + +
+
{{text}}
+
+
+
+ + \ No newline at end of file diff --git a/web/components/content_input.py b/web/components/content_input.py index 02363b0..a283c90 100644 --- a/web/components/content_input.py +++ b/web/components/content_input.py @@ -169,7 +169,7 @@ def render_content_input(): } -def render_bgm_section(): +def render_bgm_section(key_prefix=""): """Render BGM selection section""" with st.container(border=True): st.markdown(f"**{tr('section.bgm')}**") @@ -204,7 +204,8 @@ def render_bgm_section(): "BGM", bgm_options, index=default_index, - label_visibility="collapsed" + label_visibility="collapsed", + key=f"{key_prefix}bgm_selector" ) # BGM volume slider (only show when BGM is selected) @@ -216,7 +217,7 @@ def render_bgm_section(): value=0.2, step=0.01, format="%.2f", - key="bgm_volume_slider", + key=f"{key_prefix}bgm_volume_slider", help=tr("bgm.volume_help") ) else: @@ -224,7 +225,7 @@ def render_bgm_section(): # BGM preview button (only if BGM is not "None") if bgm_choice != tr("bgm.none"): - if st.button(tr("bgm.preview"), key="preview_bgm", use_container_width=True): + if st.button(tr("bgm.preview"), key=f"{key_prefix}preview_bgm", use_container_width=True): from pixelle_video.utils.os_util import get_resource_path, resource_exists try: if resource_exists("bgm", bgm_choice): diff --git a/web/i18n/locales/en_US.json b/web/i18n/locales/en_US.json index 7925a7f..28156a6 100644 --- a/web/i18n/locales/en_US.json +++ b/web/i18n/locales/en_US.json @@ -332,7 +332,44 @@ "batch.error": "Error", "batch.error_detail": "View detailed error stack", "pipeline.standard.name": "Standard Video", - "pipeline.demo.name": "Demo Feature", - "pipeline.demo.description": "A demo pipeline with a custom layout" + "pipeline.asset_based.name": "Asset-Based Video", + "pipeline.asset_based.description": "Generate videos from user-provided assets", + "asset_based.section.assets": "📦 Asset Upload", + "asset_based.section.video_info": "📝 Video Information", + "asset_based.section.source": "⚙️ Service Configuration", + "asset_based.assets.what": "Upload your images or video assets, AI will automatically analyze them and generate a video script", + "asset_based.assets.how": "Supports JPG/PNG/GIF/WebP images and MP4/MOV/AVI videos. Each asset should be clear and relevant", + "asset_based.assets.upload": "Upload Assets", + "asset_based.assets.upload_help": "Supports multiple image or video files", + "asset_based.assets.count": "✅ Uploaded {count} assets", + "asset_based.assets.preview": "📷 Asset Preview", + "asset_based.assets.empty_hint": "💡 Please upload at least one image or video asset", + "asset_based.video_title": "Video Title (Optional)", + "asset_based.video_title_placeholder": "e.g., Pet Store Year-End Sale", + "asset_based.video_title_help": "Main title for the video, leave empty to hide title", + "asset_based.intent": "Video Intent", + "asset_based.intent_placeholder": "e.g., Promote our pet store's year-end special offers to attract more customers, use a warm and friendly tone", + "asset_based.intent_help": "Describe the purpose, message, and desired style of this video", + "asset_based.duration": "Target Duration (seconds)", + "asset_based.duration_help": "Expected video duration, AI will adjust based on asset count", + "asset_based.duration_label": "Target Duration: {seconds}s", + "asset_based.source.what": "Select the service provider for image analysis", + "asset_based.source.how": "RunningHub is a cloud service requiring API Key; SelfHost uses local ComfyUI", + "asset_based.source.select": "Select Service", + "asset_based.source.runninghub": "☁️ RunningHub (Cloud)", + "asset_based.source.selfhost": "🖥️ SelfHost (Local)", + "asset_based.source.runninghub_hint": "💡 Using RunningHub cloud service for asset analysis", + "asset_based.source.selfhost_hint": "💡 Using local ComfyUI service for asset analysis", + "asset_based.source.runninghub_not_configured": "⚠️ RunningHub API Key not configured", + "asset_based.source.selfhost_not_configured": "⚠️ Local ComfyUI URL not configured", + "asset_based.output.no_assets": "💡 Please upload assets on the left first", + "asset_based.output.ready": "📦 {count} assets ready, you can start generating", + "asset_based.progress.analyzing": "🔍 Analyzing assets...", + "asset_based.progress.analyzing_start": "🔍 Starting to analyze {total} assets...", + "asset_based.progress.analyzing_asset": "🔍 Analyzing asset {current}/{total}: {name}", + "asset_based.progress.analyzing_complete": "✅ Asset analysis complete ({count} total)", + "asset_based.progress.generating_script": "📝 Generating video script...", + "asset_based.progress.script_complete": "✅ Script generation complete", + "asset_based.progress.concat_complete": "✅ Video concatenation complete" } } \ No newline at end of file diff --git a/web/i18n/locales/zh_CN.json b/web/i18n/locales/zh_CN.json index 836562c..21d1ae0 100644 --- a/web/i18n/locales/zh_CN.json +++ b/web/i18n/locales/zh_CN.json @@ -332,7 +332,44 @@ "batch.error": "错误信息", "batch.error_detail": "查看详细错误堆栈", "pipeline.standard.name": "标准视频", - "pipeline.demo.name": "演示功能", - "pipeline.demo.description": "具有自定义布局的演示 Pipeline" + "pipeline.asset_based.name": "素材视频", + "pipeline.asset_based.description": "基于用户上传的素材生成视频", + "asset_based.section.assets": "📦 素材上传", + "asset_based.section.video_info": "📝 视频信息", + "asset_based.section.source": "⚙️ 服务配置", + "asset_based.assets.what": "上传您的图片或视频素材,AI 将自动分析并生成视频脚本", + "asset_based.assets.how": "支持 JPG/PNG/GIF/WebP 图片和 MP4/MOV/AVI 等视频格式,建议每个素材清晰且内容相关", + "asset_based.assets.upload": "上传素材", + "asset_based.assets.upload_help": "支持多个图片或视频文件", + "asset_based.assets.count": "✅ 已上传 {count} 个素材", + "asset_based.assets.preview": "📷 素材预览", + "asset_based.assets.empty_hint": "💡 请上传至少一个图片或视频素材", + "asset_based.video_title": "视频标题(选填)", + "asset_based.video_title_placeholder": "例如:宠物店年终大促", + "asset_based.video_title_help": "视频的主标题,留空则不显示标题", + "asset_based.intent": "视频意图", + "asset_based.intent_placeholder": "例如:宣传我们的宠物店年终特惠活动,吸引更多客户到店消费,风格要温馨亲切", + "asset_based.intent_help": "描述这个视频的目的、想传达的信息以及期望的风格", + "asset_based.duration": "目标时长(秒)", + "asset_based.duration_help": "视频的预期时长,AI 会根据素材数量和时长进行调整", + "asset_based.duration_label": "目标时长:{seconds} 秒", + "asset_based.source.what": "选择用于图像分析的服务提供商", + "asset_based.source.how": "RunningHub 是云端服务,需配置 API Key;SelfHost 是本地 ComfyUI 服务", + "asset_based.source.select": "选择服务", + "asset_based.source.runninghub": "☁️ RunningHub(云端)", + "asset_based.source.selfhost": "🖥️ SelfHost(本地)", + "asset_based.source.runninghub_hint": "💡 使用 RunningHub 云端服务分析素材", + "asset_based.source.selfhost_hint": "💡 使用本地 ComfyUI 服务分析素材", + "asset_based.source.runninghub_not_configured": "⚠️ 未配置 RunningHub API Key", + "asset_based.source.selfhost_not_configured": "⚠️ 未配置本地 ComfyUI 地址", + "asset_based.output.no_assets": "💡 请先在左侧上传素材", + "asset_based.output.ready": "📦 已准备好 {count} 个素材,可以开始生成", + "asset_based.progress.analyzing": "🔍 正在分析素材...", + "asset_based.progress.analyzing_start": "🔍 开始分析 {total} 个素材...", + "asset_based.progress.analyzing_asset": "🔍 分析素材 {current}/{total}:{name}", + "asset_based.progress.analyzing_complete": "✅ 素材分析完成(共 {count} 个)", + "asset_based.progress.generating_script": "📝 正在生成视频脚本...", + "asset_based.progress.script_complete": "✅ 脚本生成完成", + "asset_based.progress.concat_complete": "✅ 视频合成完成" } } \ No newline at end of file diff --git a/web/pipelines/__init__.py b/web/pipelines/__init__.py index 03b722d..1c5efa7 100644 --- a/web/pipelines/__init__.py +++ b/web/pipelines/__init__.py @@ -25,7 +25,7 @@ from web.pipelines.base import ( # Import all pipeline UI modules to ensure they register themselves from web.pipelines import standard -from web.pipelines import demo +from web.pipelines import asset_based __all__ = [ "PipelineUI", diff --git a/web/pipelines/asset_based.py b/web/pipelines/asset_based.py new file mode 100644 index 0000000..cf19c25 --- /dev/null +++ b/web/pipelines/asset_based.py @@ -0,0 +1,447 @@ +# Copyright (C) 2025 AIDC-AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Asset-Based Pipeline UI + +Implements the UI for generating videos from user-provided assets. +""" + +import os +import time +from pathlib import Path +from typing import Any + +import streamlit as st +from loguru import logger + +from web.i18n import tr, get_language +from web.pipelines.base import PipelineUI, register_pipeline_ui +from web.components.content_input import render_bgm_section, render_version_info +from web.utils.async_helpers import run_async +from pixelle_video.config import config_manager +from pixelle_video.models.progress import ProgressEvent + + +class AssetBasedPipelineUI(PipelineUI): + """ + UI for the Asset-Based Video Generation Pipeline. + Generates videos from user-provided assets (images/videos). + """ + name = "asset_based" + icon = "📦" + + @property + def display_name(self): + return tr("pipeline.asset_based.name") + + @property + def description(self): + return tr("pipeline.asset_based.description") + + def render(self, pixelle_video: Any): + # Three-column layout + left_col, middle_col, right_col = st.columns([1, 1, 1]) + + # ==================================================================== + # Left Column: Asset Upload & Video Info + # ==================================================================== + with left_col: + asset_params = self._render_asset_input() + bgm_params = render_bgm_section(key_prefix="asset_") + render_version_info() + + # ==================================================================== + # Middle Column: Video Configuration + # ==================================================================== + with middle_col: + config_params = self._render_video_config(pixelle_video) + + # ==================================================================== + # Right Column: Output Preview + # ==================================================================== + with right_col: + # Combine all parameters + video_params = { + "pipeline": self.name, + **asset_params, + **bgm_params, + **config_params + } + + self._render_output_preview(pixelle_video, video_params) + + def _render_asset_input(self) -> dict: + """Render asset upload section""" + with st.container(border=True): + st.markdown(f"**{tr('asset_based.section.assets')}**") + + with st.expander(tr("help.feature_description"), expanded=False): + st.markdown(f"**{tr('help.what')}**") + st.markdown(tr("asset_based.assets.what")) + st.markdown(f"**{tr('help.how')}**") + st.markdown(tr("asset_based.assets.how")) + + # File uploader for multiple files + uploaded_files = st.file_uploader( + tr("asset_based.assets.upload"), + type=["jpg", "jpeg", "png", "gif", "webp", "mp4", "mov", "avi", "mkv", "webm"], + accept_multiple_files=True, + help=tr("asset_based.assets.upload_help"), + key="asset_files" + ) + + # Save uploaded files to temp directory with unique session ID + asset_paths = [] + if uploaded_files: + import uuid + session_id = str(uuid.uuid4()).replace('-', '')[:12] + temp_dir = Path(f"temp/assets_{session_id}") + temp_dir.mkdir(parents=True, exist_ok=True) + + for uploaded_file in uploaded_files: + file_path = temp_dir / uploaded_file.name + with open(file_path, "wb") as f: + f.write(uploaded_file.getbuffer()) + asset_paths.append(str(file_path.absolute())) + + st.success(tr("asset_based.assets.count", count=len(asset_paths))) + + # Preview uploaded assets + with st.expander(tr("asset_based.assets.preview"), expanded=True): + # Show in a grid (3 columns) + cols = st.columns(3) + for i, (file, path) in enumerate(zip(uploaded_files, asset_paths)): + with cols[i % 3]: + # Check if image or video + ext = Path(path).suffix.lower() + if ext in [".jpg", ".jpeg", ".png", ".gif", ".webp"]: + st.image(file, caption=file.name, use_container_width=True) + elif ext in [".mp4", ".mov", ".avi", ".mkv", ".webm"]: + st.video(file) + st.caption(file.name) + else: + st.info(tr("asset_based.assets.empty_hint")) + + # Video title & intent + with st.container(border=True): + st.markdown(f"**{tr('asset_based.section.video_info')}**") + + video_title = st.text_input( + tr("asset_based.video_title"), + placeholder=tr("asset_based.video_title_placeholder"), + help=tr("asset_based.video_title_help"), + key="asset_video_title" + ) + + intent = st.text_area( + tr("asset_based.intent"), + placeholder=tr("asset_based.intent_placeholder"), + help=tr("asset_based.intent_help"), + height=100, + key="asset_intent" + ) + + return { + "assets": asset_paths, + "video_title": video_title, + "intent": intent if intent else None + } + + def _render_video_config(self, pixelle_video: Any) -> dict: + """Render video configuration section""" + # Duration configuration + with st.container(border=True): + st.markdown(f"**{tr('video.title')}**") + + # Duration slider + duration = st.slider( + tr("asset_based.duration"), + min_value=15, + max_value=120, + value=30, + step=5, + help=tr("asset_based.duration_help"), + key="asset_duration" + ) + st.caption(tr("asset_based.duration_label", seconds=duration)) + + # Workflow source selection + with st.container(border=True): + st.markdown(f"**{tr('asset_based.section.source')}**") + + with st.expander(tr("help.feature_description"), expanded=False): + st.markdown(f"**{tr('help.what')}**") + st.markdown(tr("asset_based.source.what")) + st.markdown(f"**{tr('help.how')}**") + st.markdown(tr("asset_based.source.how")) + + source_options = { + "runninghub": tr("asset_based.source.runninghub"), + "selfhost": tr("asset_based.source.selfhost") + } + + # Check if RunningHub API key is configured + comfyui_config = config_manager.get_comfyui_config() + has_runninghub = bool(comfyui_config.get("runninghub_api_key")) + has_selfhost = bool(comfyui_config.get("comfyui_url")) + + # Default to available source + if has_runninghub: + default_source_index = 0 + elif has_selfhost: + default_source_index = 1 + else: + default_source_index = 0 + + source = st.radio( + tr("asset_based.source.select"), + options=list(source_options.keys()), + format_func=lambda x: source_options[x], + index=default_source_index, + horizontal=True, + key="asset_source", + label_visibility="collapsed" + ) + + # Show hint based on selection + if source == "runninghub": + if not has_runninghub: + st.warning(tr("asset_based.source.runninghub_not_configured")) + else: + st.info(tr("asset_based.source.runninghub_hint")) + else: + if not has_selfhost: + st.warning(tr("asset_based.source.selfhost_not_configured")) + else: + st.info(tr("asset_based.source.selfhost_hint")) + + # TTS configuration + with st.container(border=True): + st.markdown(f"**{tr('section.tts')}**") + + # Import voice configuration + from pixelle_video.tts_voices import EDGE_TTS_VOICES, get_voice_display_name + + # Get saved voice from config + comfyui_config = config_manager.get_comfyui_config() + tts_config = comfyui_config.get("tts", {}) + local_config = tts_config.get("local", {}) + saved_voice = local_config.get("voice", "zh-CN-YunjianNeural") + saved_speed = local_config.get("speed", 1.2) + + # Build voice options with i18n + voice_options = [] + voice_ids = [] + default_voice_index = 0 + + for idx, voice_config in enumerate(EDGE_TTS_VOICES): + voice_id = voice_config["id"] + display_name = get_voice_display_name(voice_id, tr, get_language()) + voice_options.append(display_name) + voice_ids.append(voice_id) + + if voice_id == saved_voice: + default_voice_index = idx + + # Two-column layout + voice_col, speed_col = st.columns([1, 1]) + + with voice_col: + selected_voice_display = st.selectbox( + tr("tts.voice_selector"), + voice_options, + index=default_voice_index, + key="asset_tts_voice" + ) + selected_voice_index = voice_options.index(selected_voice_display) + voice_id = voice_ids[selected_voice_index] + + with speed_col: + tts_speed = st.slider( + tr("tts.speed"), + min_value=0.5, + max_value=2.0, + value=saved_speed, + step=0.1, + format="%.1fx", + key="asset_tts_speed" + ) + st.caption(tr("tts.speed_label", speed=f"{tts_speed:.1f}")) + + return { + "duration": duration, + "source": source, + "voice_id": voice_id, + "tts_speed": tts_speed + } + + def _render_output_preview(self, pixelle_video: Any, video_params: dict): + """Render output preview section""" + with st.container(border=True): + st.markdown(f"**{tr('section.video_generation')}**") + + # Check configuration + if not config_manager.validate(): + st.warning(tr("settings.not_configured")) + + # Check if assets are provided + assets = video_params.get("assets", []) + if not assets: + st.info(tr("asset_based.output.no_assets")) + st.button( + tr("btn.generate"), + type="primary", + use_container_width=True, + disabled=True, + key="asset_generate_disabled" + ) + return + + # Show asset summary + st.info(tr("asset_based.output.ready", count=len(assets))) + + # Generate button + if st.button(tr("btn.generate"), type="primary", use_container_width=True, key="asset_generate"): + # Validate + if not config_manager.validate(): + st.error(tr("settings.not_configured")) + st.stop() + + # Show progress + progress_bar = st.progress(0) + status_text = st.empty() + + start_time = time.time() + + try: + # Import pipeline + from pixelle_video.pipelines.asset_based import AssetBasedPipeline + + # Create pipeline + pipeline = AssetBasedPipeline(pixelle_video) + + # Progress callback + def update_progress(event: ProgressEvent): + if event.event_type == "analyzing_assets": + if event.extra_info == "start": + message = tr("asset_based.progress.analyzing_start", total=event.frame_total) + else: + message = tr("asset_based.progress.analyzing_complete", count=event.frame_total) + elif event.event_type == "analyzing_asset": + message = tr( + "asset_based.progress.analyzing_asset", + current=event.frame_current, + total=event.frame_total, + name=event.extra_info or "" + ) + elif event.event_type == "generating_script": + if event.extra_info == "complete": + message = tr("asset_based.progress.script_complete") + else: + message = tr("asset_based.progress.generating_script") + elif event.event_type == "frame_step": + action_key = f"progress.step_{event.action}" + action_text = tr(action_key) + message = tr( + "progress.frame_step", + current=event.frame_current, + total=event.frame_total, + step=event.step, + action=action_text + ) + elif event.event_type == "processing_frame": + message = tr( + "progress.frame", + current=event.frame_current, + total=event.frame_total + ) + elif event.event_type == "concatenating": + if event.extra_info == "complete": + message = tr("asset_based.progress.concat_complete") + else: + message = tr("progress.concatenating") + elif event.event_type == "completed": + message = tr("progress.completed") + else: + message = tr(f"progress.{event.event_type}") + + status_text.text(message) + progress_bar.progress(min(int(event.progress * 100), 99)) + + # Execute pipeline with progress callback + ctx = run_async(pipeline( + assets=video_params["assets"], + video_title=video_params.get("video_title", ""), + intent=video_params.get("intent"), + duration=video_params.get("duration", 30), + source=video_params.get("source", "runninghub"), + bgm_path=video_params.get("bgm_path"), + bgm_volume=video_params.get("bgm_volume", 0.2), + bgm_mode=video_params.get("bgm_mode", "loop"), + voice_id=video_params.get("voice_id", "zh-CN-YunjianNeural"), + tts_speed=video_params.get("tts_speed", 1.2), + progress_callback=update_progress + )) + + total_time = time.time() - start_time + + progress_bar.progress(100) + status_text.text(tr("status.success")) + + # Display result + st.success(tr("status.video_generated", path=ctx.final_video_path)) + + st.markdown("---") + + # Video info + if os.path.exists(ctx.final_video_path): + file_size_mb = os.path.getsize(ctx.final_video_path) / (1024 * 1024) + n_scenes = len(ctx.storyboard.frames) if ctx.storyboard else 0 + + info_text = ( + f"⏱️ {tr('info.generation_time')} {total_time:.1f}s " + f"📦 {file_size_mb:.2f}MB " + f"🎬 {n_scenes}{tr('info.scenes_unit')}" + ) + st.caption(info_text) + + st.markdown("---") + + # Video preview + st.video(ctx.final_video_path) + + # Download button + with open(ctx.final_video_path, "rb") as video_file: + video_bytes = video_file.read() + video_filename = os.path.basename(ctx.final_video_path) + st.download_button( + label="⬇️ 下载视频" if get_language() == "zh_CN" else "⬇️ Download Video", + data=video_bytes, + file_name=video_filename, + mime="video/mp4", + use_container_width=True + ) + else: + st.error(tr("status.video_not_found", path=ctx.final_video_path)) + + except Exception as e: + status_text.text("") + progress_bar.empty() + st.error(tr("status.error", error=str(e))) + logger.exception(e) + st.stop() + + +# Register self +register_pipeline_ui(AssetBasedPipelineUI) + diff --git a/web/pipelines/demo.py b/web/pipelines/demo.py deleted file mode 100644 index 03cc1a6..0000000 --- a/web/pipelines/demo.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2025 AIDC-AI -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Demo Pipeline UI - -Implements a custom layout for the Demo Pipeline. -""" - -import streamlit as st -from typing import Any -from web.i18n import tr - -from web.pipelines.base import PipelineUI, register_pipeline_ui - - -class DemoPipelineUI(PipelineUI): - """ - Demo UI to verify the full-page plugin system. - Uses a completely different layout (2 columns). - """ - name = "demo" - icon = "✨" - - @property - def display_name(self): - return tr("pipeline.demo.name") - - @property - def description(self): - return tr("pipeline.demo.description") - - def render(self, pixelle_video: Any): - st.markdown("### ✨ Demo Pipeline Custom Layout") - st.info("This pipeline uses a custom 2-column layout, demonstrating full UI control.") - - col1, col2 = st.columns([2, 1]) - - with col1: - with st.container(border=True): - st.subheader("1. Input") - topic = st.text_input("Enter Topic", placeholder="e.g. AI News") - mood = st.selectbox("Mood", ["Happy", "Serious", "Funny"]) - - st.markdown("---") - st.subheader("2. Settings") - # Simplified settings for demo - n_scenes = st.slider("Scenes", 3, 10, 5) - - with col2: - with st.container(border=True): - st.subheader("3. Generate") - if st.button("🚀 Generate Demo Video", type="primary", use_container_width=True): - # Mock generation logic or call backend - st.success(f"Generating video for '{topic}' ({mood}) with {n_scenes} scenes...") - st.balloons() - - -# Register self -register_pipeline_ui(DemoPipelineUI)