开发基于图片素材生成视频的webui功能

2025-12-04 11:14:14 +08:00
parent ea784e0d06
commit 7425b9d23d
8 changed files with 896 additions and 104 deletions
--- a/pixelle_video/pipelines/asset_based.py
+++ b/pixelle_video/pipelines/asset_based.py
@@ -27,23 +27,27 @@ Example:
    result = await pipeline(
        assets=["/path/img1.jpg", "/path/img2.jpg"],
        video_title="Pet Store Year-End Sale",
-        style="warm and friendly",
+        intent="Promote our pet store's year-end sale with a warm and friendly tone",
        duration=30
    )
 """

-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, Callable
 from pathlib import Path

 from loguru import logger
 from pydantic import BaseModel, Field

 from pixelle_video.pipelines.linear import LinearVideoPipeline, PipelineContext
+from pixelle_video.models.progress import ProgressEvent
 from pixelle_video.utils.os_util import (
    create_task_output_dir,
    get_task_final_video_path
 )

+# Type alias for progress callback
+ProgressCallback = Optional[Callable[[ProgressEvent], None]]
+

 # ==================== Structured Output Models ====================

@@ -82,12 +86,12 @@ class AssetBasedPipeline(LinearVideoPipeline):
        assets: List[str],
        video_title: str = "",
        intent: Optional[str] = None,
-        style: str = "professional and engaging",
        duration: int = 30,
        source: str = "runninghub",
        bgm_path: Optional[str] = None,
        bgm_volume: float = 0.2,
        bgm_mode: str = "loop",
+        progress_callback: ProgressCallback = None,
        **kwargs
    ) -> PipelineContext:
        """
@@ -97,12 +101,12 @@ class AssetBasedPipeline(LinearVideoPipeline):
            assets: List of asset file paths
            video_title: Video title
            intent: Video intent/purpose (defaults to video_title)
-            style: Video style
            duration: Target duration in seconds
            source: Workflow source ("runninghub" or "selfhost")
            bgm_path: Path to background music file (optional)
            bgm_volume: BGM volume (0.0-1.0, default 0.2)
            bgm_mode: BGM mode ("loop" or "once", default "loop")
+            progress_callback: Optional callback for progress updates
            **kwargs: Additional parameters
        
        Returns:
@@ -110,6 +114,9 @@ class AssetBasedPipeline(LinearVideoPipeline):
        """
        from pixelle_video.pipelines.linear import PipelineContext
        
+        # Store progress callback
+        self._progress_callback = progress_callback
+        
        # Create custom context with asset-specific parameters
        ctx = PipelineContext(
            input_text=intent or video_title,  # Use intent or title as input_text
@@ -117,7 +124,6 @@ class AssetBasedPipeline(LinearVideoPipeline):
                "assets": assets,
                "video_title": video_title,
                "intent": intent or video_title,
-                "style": style,
                "duration": duration,
                "source": source,
                "bgm_path": bgm_path,
@@ -147,6 +153,11 @@ class AssetBasedPipeline(LinearVideoPipeline):
            await self.handle_exception(ctx, e)
            raise
    
+    def _emit_progress(self, event: ProgressEvent):
+        """Emit progress event to callback if available"""
+        if self._progress_callback:
+            self._progress_callback(event)
+    
    async def setup_environment(self, context: PipelineContext) -> PipelineContext:
        """
        Analyze uploaded assets and build asset index
@@ -172,7 +183,17 @@ class AssetBasedPipeline(LinearVideoPipeline):
        if not assets:
            raise ValueError("No assets provided. Please upload at least one image or video.")
        
-        logger.info(f"Found {len(assets)} assets to analyze")
+        total_assets = len(assets)
+        logger.info(f"Found {total_assets} assets to analyze")
+        
+        # Emit initial progress (0-15% for asset analysis)
+        self._emit_progress(ProgressEvent(
+            event_type="analyzing_assets",
+            progress=0.01,
+            frame_current=0,
+            frame_total=total_assets,
+            extra_info="start"
+        ))
        
        self.asset_index = {}
        
@@ -183,7 +204,17 @@ class AssetBasedPipeline(LinearVideoPipeline):
                logger.warning(f"Asset not found: {asset_path}")
                continue
            
-            logger.info(f"Analyzing asset {i}/{len(assets)}: {asset_path_obj.name}")
+            logger.info(f"Analyzing asset {i}/{total_assets}: {asset_path_obj.name}")
+            
+            # Emit progress for this asset
+            progress = 0.01 + (i - 1) / total_assets * 0.14  # 1% - 15%
+            self._emit_progress(ProgressEvent(
+                event_type="analyzing_asset",
+                progress=progress,
+                frame_current=i,
+                frame_total=total_assets,
+                extra_info=asset_path_obj.name
+            ))
            
            # Determine asset type
            asset_type = self._get_asset_type(asset_path_obj)
@@ -222,34 +253,35 @@ class AssetBasedPipeline(LinearVideoPipeline):
        # Store asset index in context
        context.asset_index = self.asset_index
        
+        # Emit completion of asset analysis
+        self._emit_progress(ProgressEvent(
+            event_type="analyzing_assets",
+            progress=0.15,
+            frame_current=total_assets,
+            frame_total=total_assets,
+            extra_info="complete"
+        ))
+        
        return context
    
    async def determine_title(self, context: PipelineContext) -> PipelineContext:
        """
-        Use user-provided title or generate one via LLM
+        Use user-provided title if available, otherwise leave empty
        
        Args:
            context: Pipeline context
        
        Returns:
-            Updated context with title
+            Updated context with title (may be empty)
        """
-        from pixelle_video.utils.content_generators import generate_title
-        
        title = context.request.get("video_title")
        
        if title:
            context.title = title
            logger.info(f"📝 Video title: {title} (user-specified)")
        else:
-            # Generate title from intent using LLM
-            intent = context.request.get("intent", context.input_text)
-            context.title = await generate_title(
-                self.core.llm,
-                content=intent,
-                strategy="llm"
-            )
-            logger.info(f"📝 Video title: {context.title} (LLM-generated)")
+            context.title = ""
+            logger.info(f"📝 No video title specified (will be hidden in template)")
        
        return context
    
@@ -267,10 +299,16 @@ class AssetBasedPipeline(LinearVideoPipeline):
        """
        logger.info("🤖 Generating video script with LLM...")
        
+        # Emit progress for script generation (15% - 25%)
+        self._emit_progress(ProgressEvent(
+            event_type="generating_script",
+            progress=0.16
+        ))
+        
        # Build prompt for LLM
-        intent = context.request.get("intent", context.title)
-        style = context.request.get("style", "professional and engaging")
+        intent = context.request.get("intent", context.input_text)
        duration = context.request.get("duration", 30)
+        title = context.title  # May be empty if user didn't provide one
        
        # Prepare asset descriptions with full paths for LLM to reference
        asset_info = []
@@ -279,11 +317,13 @@ class AssetBasedPipeline(LinearVideoPipeline):
        
        assets_text = "\n".join(asset_info)
        
+        # Build title section for prompt (only if title is provided)
+        title_section = f"- Video Title: {title}\n" if title else ""
+        
        prompt = f"""You are a video script writer. Generate a {duration}-second video script.

 ## Requirements
- Intent: {intent}
- Style: {style}
+{title_section}- Intent: {intent}
 - Target Duration: {duration} seconds

 ## Available Assets (use the exact path in your response)
@@ -295,6 +335,7 @@ class AssetBasedPipeline(LinearVideoPipeline):
 3. Each scene can have 1-5 narration sentences
 4. Try to use all available assets, but it's OK to reuse if needed
 5. Total duration of all scenes should be approximately {duration} seconds
+{f"6. The narrations should align with the video title: {title}" if title else ""}

 ## Output Requirements
 For each scene, provide:
@@ -337,6 +378,13 @@ Generate the video script now:"""
        
        logger.success(f"✅ Generated script with {len(context.script)} scenes")
        
+        # Emit progress after script generation
+        self._emit_progress(ProgressEvent(
+            event_type="generating_script",
+            progress=0.25,
+            extra_info="complete"
+        ))
+        
        # Log script preview
        for scene in context.script:
            narrations = scene.get("narrations", [])
@@ -413,7 +461,7 @@ Generate the video script now:"""
        context.narrations = all_narrations
        
        # Get template dimensions
-        template_name = context.params.get("frame_template", "1080x1920/image_default.html")
+        template_name = "1080x1920/image_pure.html"
        # Extract dimensions from template name (e.g., "1080x1920")
        try:
            dims = template_name.split("/")[0].split("x")
@@ -492,9 +540,25 @@ Generate the video script now:"""
        
        storyboard = context.storyboard
        config = context.config
+        total_frames = len(storyboard.frames)
+        
+        # Progress range: 30% - 85% for frame production
+        base_progress = 0.30
+        progress_range = 0.55  # 85% - 30%
        
        for i, frame in enumerate(storyboard.frames, 1):
-            logger.info(f"Producing scene {i}/{len(storyboard.frames)}...")
+            logger.info(f"Producing scene {i}/{total_frames}...")
+            
+            # Emit progress for this frame (each frame has 4 steps: audio, combine, duration, compose)
+            frame_progress = base_progress + (i - 1) / total_frames * progress_range
+            self._emit_progress(ProgressEvent(
+                event_type="frame_step",
+                progress=frame_progress,
+                frame_current=i,
+                frame_total=total_frames,
+                step=1,
+                action="audio"
+            ))
            
            # Get scene data with narrations
            scene = frame._scene_data
@@ -524,6 +588,17 @@ Generate the video script now:"""
            if len(narration_audios) > 1:
                from pixelle_video.utils.os_util import get_task_frame_path
                
+                # Emit progress for combining audio
+                frame_progress = base_progress + ((i - 1) + 0.25) / total_frames * progress_range
+                self._emit_progress(ProgressEvent(
+                    event_type="frame_step",
+                    progress=frame_progress,
+                    frame_current=i,
+                    frame_total=total_frames,
+                    step=2,
+                    action="audio"
+                ))
+                
                combined_audio_path = Path(context.task_dir) / "frames" / f"{i:02d}_audio.mp3"
                
                # Use FFmpeg to concatenate audio files
@@ -564,6 +639,17 @@ Generate the video script now:"""
            # Since we already have the audio and image, we bypass some steps
            # by manually calling the composition steps
            
+            # Emit progress for duration calculation
+            frame_progress = base_progress + ((i - 1) + 0.5) / total_frames * progress_range
+            self._emit_progress(ProgressEvent(
+                event_type="frame_step",
+                progress=frame_progress,
+                frame_current=i,
+                frame_total=total_frames,
+                step=3,
+                action="compose"
+            ))
+            
            # Get audio duration for frame duration
            import subprocess
            duration_cmd = [
@@ -576,16 +662,35 @@ Generate the video script now:"""
            duration_result = subprocess.run(duration_cmd, capture_output=True, text=True, check=True)
            frame.duration = float(duration_result.stdout.strip())
            
+            # Emit progress for video composition
+            frame_progress = base_progress + ((i - 1) + 0.75) / total_frames * progress_range
+            self._emit_progress(ProgressEvent(
+                event_type="frame_step",
+                progress=frame_progress,
+                frame_current=i,
+                frame_total=total_frames,
+                step=4,
+                action="video"
+            ))
+            
            # Use FrameProcessor for proper composition
            processed_frame = await self.core.frame_processor(
                frame=frame,
                storyboard=storyboard,
                config=config,
-                total_frames=len(storyboard.frames)
+                total_frames=total_frames
            )
            
            logger.success(f"✅ Scene {i} complete")
        
+        # Emit completion of frame production
+        self._emit_progress(ProgressEvent(
+            event_type="processing_frame",
+            progress=0.85,
+            frame_current=total_frames,
+            frame_total=total_frames
+        ))
+        
        return context
    
    async def post_production(self, context: PipelineContext) -> PipelineContext:
@@ -600,6 +705,12 @@ Generate the video script now:"""
        """
        logger.info("🎞️ Concatenating scenes...")
        
+        # Emit progress for concatenation (85% - 95%)
+        self._emit_progress(ProgressEvent(
+            event_type="concatenating",
+            progress=0.86
+        ))
+        
        # Collect video segments from storyboard frames
        scene_videos = [frame.video_segment_path for frame in context.storyboard.frames]
        
@@ -626,6 +737,13 @@ Generate the video script now:"""
        
        logger.success(f"✅ Final video: {final_video_path}")
        
+        # Emit completion of concatenation
+        self._emit_progress(ProgressEvent(
+            event_type="concatenating",
+            progress=0.95,
+            extra_info="complete"
+        ))
+        
        return context
    
    async def finalize(self, context: PipelineContext) -> PipelineContext:
@@ -641,8 +759,84 @@ Generate the video script now:"""
        logger.success(f"🎉 Asset-based video generation complete!")
        logger.info(f"Video: {context.final_video_path}")
        
+        # Emit completion
+        self._emit_progress(ProgressEvent(
+            event_type="completed",
+            progress=1.0
+        ))
+        
+        # Persist metadata for history tracking
+        await self._persist_task_data(context)
+        
        return context
    
+    async def _persist_task_data(self, ctx: PipelineContext):
+        """
+        Persist task metadata and storyboard to filesystem for history tracking
+        """
+        from pathlib import Path
+        
+        try:
+            storyboard = ctx.storyboard
+            task_id = ctx.task_id
+            
+            if not task_id:
+                logger.warning("No task_id in context, skipping persistence")
+                return
+            
+            # Get file size
+            video_path_obj = Path(ctx.final_video_path)
+            file_size = video_path_obj.stat().st_size if video_path_obj.exists() else 0
+            
+            # Build metadata
+            input_params = {
+                "text": ctx.input_text,
+                "mode": "asset_based",
+                "title": ctx.title or "",
+                "n_scenes": len(storyboard.frames) if storyboard else 0,
+                "assets": ctx.request.get("assets", []),
+                "intent": ctx.request.get("intent"),
+                "duration": ctx.request.get("duration"),
+                "source": ctx.request.get("source"),
+                "voice_id": ctx.request.get("voice_id"),
+                "tts_speed": ctx.request.get("tts_speed"),
+            }
+            
+            metadata = {
+                "task_id": task_id,
+                "created_at": storyboard.created_at.isoformat() if storyboard and storyboard.created_at else None,
+                "completed_at": storyboard.completed_at.isoformat() if storyboard and storyboard.completed_at else None,
+                "status": "completed",
+                
+                "input": input_params,
+                
+                "result": {
+                    "video_path": ctx.final_video_path,
+                    "duration": storyboard.total_duration if storyboard else 0,
+                    "file_size": file_size,
+                    "n_frames": len(storyboard.frames) if storyboard else 0
+                },
+                
+                "config": {
+                    "llm_model": self.core.config.get("llm", {}).get("model", "unknown"),
+                    "llm_base_url": self.core.config.get("llm", {}).get("base_url", "unknown"),
+                    "source": ctx.request.get("source", "runninghub"),
+                }
+            }
+            
+            # Save metadata
+            await self.core.persistence.save_task_metadata(task_id, metadata)
+            logger.info(f"💾 Saved task metadata: {task_id}")
+            
+            # Save storyboard
+            if storyboard:
+                await self.core.persistence.save_storyboard(task_id, storyboard)
+                logger.info(f"💾 Saved storyboard: {task_id}")
+            
+        except Exception as e:
+            logger.error(f"Failed to persist task data: {e}")
+            # Don't raise - persistence failure shouldn't break video generation
+    
    # Helper methods
    
    def _get_asset_type(self, path: Path) -> str:
--- a/templates/1080x1920/image_pure.html
+++ b/templates/1080x1920/image_pure.html
@@ -0,0 +1,145 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="template:media-width" content="1024">
+    <meta name="template:media-height" content="1024">
+    <style>
+        html {
+            margin: 0;
+            padding: 0;
+        }
+        
+        body {
+            margin: 0;
+            padding: 0;
+            width: 1080px;
+            height: 1920px;
+            font-family: 'PingFang SC', 'Source Han Sans', 'Microsoft YaHei', sans-serif;
+            background: #000;
+            overflow: hidden;
+        }
+        
+        .page-container {
+            width: 1080px;
+            height: 1920px;
+            position: relative;
+            overflow: hidden;
+        }
+
+        /* 1. Background Image Layer (垫底图片) */
+        .background-layer {
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            z-index: 0;
+        }
+
+        .background-layer img {
+            width: 100%;
+            height: 100%;
+            object-fit: contain;
+            display: block;
+        }
+
+        /* 2. Gradient Overlay (渐变遮罩) 
+           Ensures text readability regardless of image brightness
+           Top: Darker for Title
+           Middle: Transparent for Image visibility
+           Bottom: Darker for Subtitles
+        */
+        .gradient-overlay {
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            z-index: 1;
+            background: linear-gradient(
+                to bottom,
+                rgba(0,0,0,0.6) 0%,
+                rgba(0,0,0,0.1) 25%,
+                rgba(0,0,0,0.1) 60%,
+                rgba(0,0,0,0.8) 100%
+            );
+        }
+
+        /* 3. Content Layer (内容层) */
+        .content-layer {
+            position: relative;
+            z-index: 2;
+            width: 100%;
+            height: 100%;
+            padding: 120px 80px 0px 80px; /* Top, Right, Bottom, Left */
+            box-sizing: border-box;
+            display: flex;
+            flex-direction: column;
+            justify-content: flex-start;
+            color: #ffffff;
+        }
+
+        /* Title Styling */
+        .video-title {
+            font-size: 80px;
+            font-weight: 700;
+            line-height: 1.2;
+            text-shadow: 0 4px 12px rgba(0,0,0,0.5);
+            margin-bottom: 40px;
+            text-align: center;
+        }
+
+        /* Hide title when empty */
+        .video-title:empty {
+            display: none;
+        }
+
+        /* Flex spacer to push subtitle to bottom */
+        .spacer {
+            flex-grow: 1;
+        }
+
+        /* Narration/Subtitle Styling */
+        .subtitle-wrapper {
+            margin-bottom: 60px;
+        }
+
+        .text {
+            font-size: 52px;
+            font-weight: 500;
+            line-height: 1.6;
+            text-align: center;
+            text-shadow: 0 2px 8px rgba(0,0,0,0.6);
+            backdrop-filter: blur(4px);
+        }
+    </style>
+</head>
+<body>
+    <div class="page-container">
+        <!-- Background Image -->
+        <div class="background-layer">
+            <img src="{{image}}" alt="Background">
+        </div>
+
+        <!-- Shadow Overlay for Text Readability -->
+        <div class="gradient-overlay"></div>
+
+        <!-- Main Content -->
+        <div class="content-layer">
+            <!-- Top Section: Title -->
+            <div class="video-title">
+                {{title}}
+            </div>
+
+            <!-- Spacer pushes content apart -->
+            <div class="spacer"></div>
+
+            <!-- Bottom Section: Narration/Text -->
+            <div class="subtitle-wrapper">
+                <div class="text">{{text}}</div>
+            </div>
+        </div>
+    </div>
+</body>
+</html>
--- a/web/components/content_input.py
+++ b/web/components/content_input.py
@@ -169,7 +169,7 @@ def render_content_input():
            }


-def render_bgm_section():
+def render_bgm_section(key_prefix=""):
    """Render BGM selection section"""
    with st.container(border=True):
        st.markdown(f"**{tr('section.bgm')}**")
@@ -204,7 +204,8 @@ def render_bgm_section():
            "BGM",
            bgm_options,
            index=default_index,
-            label_visibility="collapsed"
+            label_visibility="collapsed",
+            key=f"{key_prefix}bgm_selector"
        )
        
        # BGM volume slider (only show when BGM is selected)
@@ -216,7 +217,7 @@ def render_bgm_section():
                value=0.2,
                step=0.01,
                format="%.2f",
-                key="bgm_volume_slider",
+                key=f"{key_prefix}bgm_volume_slider",
                help=tr("bgm.volume_help")
            )
        else:
@@ -224,7 +225,7 @@ def render_bgm_section():
        
        # BGM preview button (only if BGM is not "None")
        if bgm_choice != tr("bgm.none"):
-            if st.button(tr("bgm.preview"), key="preview_bgm", use_container_width=True):
+            if st.button(tr("bgm.preview"), key=f"{key_prefix}preview_bgm", use_container_width=True):
                from pixelle_video.utils.os_util import get_resource_path, resource_exists
                try:
                    if resource_exists("bgm", bgm_choice):
--- a/web/i18n/locales/en_US.json
+++ b/web/i18n/locales/en_US.json
@@ -332,7 +332,44 @@
    "batch.error": "Error",
    "batch.error_detail": "View detailed error stack",
    "pipeline.standard.name": "Standard Video",
-    "pipeline.demo.name": "Demo Feature",
-    "pipeline.demo.description": "A demo pipeline with a custom layout"
+    "pipeline.asset_based.name": "Asset-Based Video",
+    "pipeline.asset_based.description": "Generate videos from user-provided assets",
+    "asset_based.section.assets": "📦 Asset Upload",
+    "asset_based.section.video_info": "📝 Video Information",
+    "asset_based.section.source": "⚙️ Service Configuration",
+    "asset_based.assets.what": "Upload your images or video assets, AI will automatically analyze them and generate a video script",
+    "asset_based.assets.how": "Supports JPG/PNG/GIF/WebP images and MP4/MOV/AVI videos. Each asset should be clear and relevant",
+    "asset_based.assets.upload": "Upload Assets",
+    "asset_based.assets.upload_help": "Supports multiple image or video files",
+    "asset_based.assets.count": "✅ Uploaded {count} assets",
+    "asset_based.assets.preview": "📷 Asset Preview",
+    "asset_based.assets.empty_hint": "💡 Please upload at least one image or video asset",
+    "asset_based.video_title": "Video Title (Optional)",
+    "asset_based.video_title_placeholder": "e.g., Pet Store Year-End Sale",
+    "asset_based.video_title_help": "Main title for the video, leave empty to hide title",
+    "asset_based.intent": "Video Intent",
+    "asset_based.intent_placeholder": "e.g., Promote our pet store's year-end special offers to attract more customers, use a warm and friendly tone",
+    "asset_based.intent_help": "Describe the purpose, message, and desired style of this video",
+    "asset_based.duration": "Target Duration (seconds)",
+    "asset_based.duration_help": "Expected video duration, AI will adjust based on asset count",
+    "asset_based.duration_label": "Target Duration: {seconds}s",
+    "asset_based.source.what": "Select the service provider for image analysis",
+    "asset_based.source.how": "RunningHub is a cloud service requiring API Key; SelfHost uses local ComfyUI",
+    "asset_based.source.select": "Select Service",
+    "asset_based.source.runninghub": "☁️ RunningHub (Cloud)",
+    "asset_based.source.selfhost": "🖥️ SelfHost (Local)",
+    "asset_based.source.runninghub_hint": "💡 Using RunningHub cloud service for asset analysis",
+    "asset_based.source.selfhost_hint": "💡 Using local ComfyUI service for asset analysis",
+    "asset_based.source.runninghub_not_configured": "⚠️ RunningHub API Key not configured",
+    "asset_based.source.selfhost_not_configured": "⚠️ Local ComfyUI URL not configured",
+    "asset_based.output.no_assets": "💡 Please upload assets on the left first",
+    "asset_based.output.ready": "📦 {count} assets ready, you can start generating",
+    "asset_based.progress.analyzing": "🔍 Analyzing assets...",
+    "asset_based.progress.analyzing_start": "🔍 Starting to analyze {total} assets...",
+    "asset_based.progress.analyzing_asset": "🔍 Analyzing asset {current}/{total}: {name}",
+    "asset_based.progress.analyzing_complete": "✅ Asset analysis complete ({count} total)",
+    "asset_based.progress.generating_script": "📝 Generating video script...",
+    "asset_based.progress.script_complete": "✅ Script generation complete",
+    "asset_based.progress.concat_complete": "✅ Video concatenation complete"
  }
 }
--- a/web/i18n/locales/zh_CN.json
+++ b/web/i18n/locales/zh_CN.json
@@ -332,7 +332,44 @@
    "batch.error": "错误信息",
    "batch.error_detail": "查看详细错误堆栈",
    "pipeline.standard.name": "标准视频",
-    "pipeline.demo.name": "演示功能",
-    "pipeline.demo.description": "具有自定义布局的演示 Pipeline"
+    "pipeline.asset_based.name": "素材视频",
+    "pipeline.asset_based.description": "基于用户上传的素材生成视频",
+    "asset_based.section.assets": "📦 素材上传",
+    "asset_based.section.video_info": "📝 视频信息",
+    "asset_based.section.source": "⚙️ 服务配置",
+    "asset_based.assets.what": "上传您的图片或视频素材，AI 将自动分析并生成视频脚本",
+    "asset_based.assets.how": "支持 JPG/PNG/GIF/WebP 图片和 MP4/MOV/AVI 等视频格式，建议每个素材清晰且内容相关",
+    "asset_based.assets.upload": "上传素材",
+    "asset_based.assets.upload_help": "支持多个图片或视频文件",
+    "asset_based.assets.count": "✅ 已上传 {count} 个素材",
+    "asset_based.assets.preview": "📷 素材预览",
+    "asset_based.assets.empty_hint": "💡 请上传至少一个图片或视频素材",
+    "asset_based.video_title": "视频标题（选填）",
+    "asset_based.video_title_placeholder": "例如：宠物店年终大促",
+    "asset_based.video_title_help": "视频的主标题，留空则不显示标题",
+    "asset_based.intent": "视频意图",
+    "asset_based.intent_placeholder": "例如：宣传我们的宠物店年终特惠活动，吸引更多客户到店消费，风格要温馨亲切",
+    "asset_based.intent_help": "描述这个视频的目的、想传达的信息以及期望的风格",
+    "asset_based.duration": "目标时长（秒）",
+    "asset_based.duration_help": "视频的预期时长，AI 会根据素材数量和时长进行调整",
+    "asset_based.duration_label": "目标时长：{seconds} 秒",
+    "asset_based.source.what": "选择用于图像分析的服务提供商",
+    "asset_based.source.how": "RunningHub 是云端服务，需配置 API Key；SelfHost 是本地 ComfyUI 服务",
+    "asset_based.source.select": "选择服务",
+    "asset_based.source.runninghub": "☁️ RunningHub（云端）",
+    "asset_based.source.selfhost": "🖥️ SelfHost（本地）",
+    "asset_based.source.runninghub_hint": "💡 使用 RunningHub 云端服务分析素材",
+    "asset_based.source.selfhost_hint": "💡 使用本地 ComfyUI 服务分析素材",
+    "asset_based.source.runninghub_not_configured": "⚠️ 未配置 RunningHub API Key",
+    "asset_based.source.selfhost_not_configured": "⚠️ 未配置本地 ComfyUI 地址",
+    "asset_based.output.no_assets": "💡 请先在左侧上传素材",
+    "asset_based.output.ready": "📦 已准备好 {count} 个素材，可以开始生成",
+    "asset_based.progress.analyzing": "🔍 正在分析素材...",
+    "asset_based.progress.analyzing_start": "🔍 开始分析 {total} 个素材...",
+    "asset_based.progress.analyzing_asset": "🔍 分析素材 {current}/{total}：{name}",
+    "asset_based.progress.analyzing_complete": "✅ 素材分析完成（共 {count} 个）",
+    "asset_based.progress.generating_script": "📝 正在生成视频脚本...",
+    "asset_based.progress.script_complete": "✅ 脚本生成完成",
+    "asset_based.progress.concat_complete": "✅ 视频合成完成"
  }
 }
--- a/web/pipelines/init.py
+++ b/web/pipelines/init.py
@@ -25,7 +25,7 @@ from web.pipelines.base import (

 # Import all pipeline UI modules to ensure they register themselves
 from web.pipelines import standard
-from web.pipelines import demo
+from web.pipelines import asset_based

 __all__ = [
    "PipelineUI",
--- a/web/pipelines/asset_based.py
+++ b/web/pipelines/asset_based.py
@@ -0,0 +1,447 @@
+# Copyright (C) 2025 AIDC-AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Asset-Based Pipeline UI
+
+Implements the UI for generating videos from user-provided assets.
+"""
+
+import os
+import time
+from pathlib import Path
+from typing import Any
+
+import streamlit as st
+from loguru import logger
+
+from web.i18n import tr, get_language
+from web.pipelines.base import PipelineUI, register_pipeline_ui
+from web.components.content_input import render_bgm_section, render_version_info
+from web.utils.async_helpers import run_async
+from pixelle_video.config import config_manager
+from pixelle_video.models.progress import ProgressEvent
+
+
+class AssetBasedPipelineUI(PipelineUI):
+    """
+    UI for the Asset-Based Video Generation Pipeline.
+    Generates videos from user-provided assets (images/videos).
+    """
+    name = "asset_based"
+    icon = "📦"
+    
+    @property
+    def display_name(self):
+        return tr("pipeline.asset_based.name")
+    
+    @property
+    def description(self):
+        return tr("pipeline.asset_based.description")
+    
+    def render(self, pixelle_video: Any):
+        # Three-column layout
+        left_col, middle_col, right_col = st.columns([1, 1, 1])
+        
+        # ====================================================================
+        # Left Column: Asset Upload & Video Info
+        # ====================================================================
+        with left_col:
+            asset_params = self._render_asset_input()
+            bgm_params = render_bgm_section(key_prefix="asset_")
+            render_version_info()
+        
+        # ====================================================================
+        # Middle Column: Video Configuration
+        # ====================================================================
+        with middle_col:
+            config_params = self._render_video_config(pixelle_video)
+        
+        # ====================================================================
+        # Right Column: Output Preview
+        # ====================================================================
+        with right_col:
+            # Combine all parameters
+            video_params = {
+                "pipeline": self.name,
+                **asset_params,
+                **bgm_params,
+                **config_params
+            }
+            
+            self._render_output_preview(pixelle_video, video_params)
+    
+    def _render_asset_input(self) -> dict:
+        """Render asset upload section"""
+        with st.container(border=True):
+            st.markdown(f"**{tr('asset_based.section.assets')}**")
+            
+            with st.expander(tr("help.feature_description"), expanded=False):
+                st.markdown(f"**{tr('help.what')}**")
+                st.markdown(tr("asset_based.assets.what"))
+                st.markdown(f"**{tr('help.how')}**")
+                st.markdown(tr("asset_based.assets.how"))
+            
+            # File uploader for multiple files
+            uploaded_files = st.file_uploader(
+                tr("asset_based.assets.upload"),
+                type=["jpg", "jpeg", "png", "gif", "webp", "mp4", "mov", "avi", "mkv", "webm"],
+                accept_multiple_files=True,
+                help=tr("asset_based.assets.upload_help"),
+                key="asset_files"
+            )
+            
+            # Save uploaded files to temp directory with unique session ID
+            asset_paths = []
+            if uploaded_files:
+                import uuid
+                session_id = str(uuid.uuid4()).replace('-', '')[:12]
+                temp_dir = Path(f"temp/assets_{session_id}")
+                temp_dir.mkdir(parents=True, exist_ok=True)
+                
+                for uploaded_file in uploaded_files:
+                    file_path = temp_dir / uploaded_file.name
+                    with open(file_path, "wb") as f:
+                        f.write(uploaded_file.getbuffer())
+                    asset_paths.append(str(file_path.absolute()))
+                
+                st.success(tr("asset_based.assets.count", count=len(asset_paths)))
+                
+                # Preview uploaded assets
+                with st.expander(tr("asset_based.assets.preview"), expanded=True):
+                    # Show in a grid (3 columns)
+                    cols = st.columns(3)
+                    for i, (file, path) in enumerate(zip(uploaded_files, asset_paths)):
+                        with cols[i % 3]:
+                            # Check if image or video
+                            ext = Path(path).suffix.lower()
+                            if ext in [".jpg", ".jpeg", ".png", ".gif", ".webp"]:
+                                st.image(file, caption=file.name, use_container_width=True)
+                            elif ext in [".mp4", ".mov", ".avi", ".mkv", ".webm"]:
+                                st.video(file)
+                                st.caption(file.name)
+            else:
+                st.info(tr("asset_based.assets.empty_hint"))
+        
+        # Video title & intent
+        with st.container(border=True):
+            st.markdown(f"**{tr('asset_based.section.video_info')}**")
+            
+            video_title = st.text_input(
+                tr("asset_based.video_title"),
+                placeholder=tr("asset_based.video_title_placeholder"),
+                help=tr("asset_based.video_title_help"),
+                key="asset_video_title"
+            )
+            
+            intent = st.text_area(
+                tr("asset_based.intent"),
+                placeholder=tr("asset_based.intent_placeholder"),
+                help=tr("asset_based.intent_help"),
+                height=100,
+                key="asset_intent"
+            )
+        
+        return {
+            "assets": asset_paths,
+            "video_title": video_title,
+            "intent": intent if intent else None
+        }
+    
+    def _render_video_config(self, pixelle_video: Any) -> dict:
+        """Render video configuration section"""
+        # Duration configuration
+        with st.container(border=True):
+            st.markdown(f"**{tr('video.title')}**")
+            
+            # Duration slider
+            duration = st.slider(
+                tr("asset_based.duration"),
+                min_value=15,
+                max_value=120,
+                value=30,
+                step=5,
+                help=tr("asset_based.duration_help"),
+                key="asset_duration"
+            )
+            st.caption(tr("asset_based.duration_label", seconds=duration))
+        
+        # Workflow source selection
+        with st.container(border=True):
+            st.markdown(f"**{tr('asset_based.section.source')}**")
+            
+            with st.expander(tr("help.feature_description"), expanded=False):
+                st.markdown(f"**{tr('help.what')}**")
+                st.markdown(tr("asset_based.source.what"))
+                st.markdown(f"**{tr('help.how')}**")
+                st.markdown(tr("asset_based.source.how"))
+            
+            source_options = {
+                "runninghub": tr("asset_based.source.runninghub"),
+                "selfhost": tr("asset_based.source.selfhost")
+            }
+            
+            # Check if RunningHub API key is configured
+            comfyui_config = config_manager.get_comfyui_config()
+            has_runninghub = bool(comfyui_config.get("runninghub_api_key"))
+            has_selfhost = bool(comfyui_config.get("comfyui_url"))
+            
+            # Default to available source
+            if has_runninghub:
+                default_source_index = 0
+            elif has_selfhost:
+                default_source_index = 1
+            else:
+                default_source_index = 0
+            
+            source = st.radio(
+                tr("asset_based.source.select"),
+                options=list(source_options.keys()),
+                format_func=lambda x: source_options[x],
+                index=default_source_index,
+                horizontal=True,
+                key="asset_source",
+                label_visibility="collapsed"
+            )
+            
+            # Show hint based on selection
+            if source == "runninghub":
+                if not has_runninghub:
+                    st.warning(tr("asset_based.source.runninghub_not_configured"))
+                else:
+                    st.info(tr("asset_based.source.runninghub_hint"))
+            else:
+                if not has_selfhost:
+                    st.warning(tr("asset_based.source.selfhost_not_configured"))
+                else:
+                    st.info(tr("asset_based.source.selfhost_hint"))
+        
+        # TTS configuration
+        with st.container(border=True):
+            st.markdown(f"**{tr('section.tts')}**")
+            
+            # Import voice configuration
+            from pixelle_video.tts_voices import EDGE_TTS_VOICES, get_voice_display_name
+            
+            # Get saved voice from config
+            comfyui_config = config_manager.get_comfyui_config()
+            tts_config = comfyui_config.get("tts", {})
+            local_config = tts_config.get("local", {})
+            saved_voice = local_config.get("voice", "zh-CN-YunjianNeural")
+            saved_speed = local_config.get("speed", 1.2)
+            
+            # Build voice options with i18n
+            voice_options = []
+            voice_ids = []
+            default_voice_index = 0
+            
+            for idx, voice_config in enumerate(EDGE_TTS_VOICES):
+                voice_id = voice_config["id"]
+                display_name = get_voice_display_name(voice_id, tr, get_language())
+                voice_options.append(display_name)
+                voice_ids.append(voice_id)
+                
+                if voice_id == saved_voice:
+                    default_voice_index = idx
+            
+            # Two-column layout
+            voice_col, speed_col = st.columns([1, 1])
+            
+            with voice_col:
+                selected_voice_display = st.selectbox(
+                    tr("tts.voice_selector"),
+                    voice_options,
+                    index=default_voice_index,
+                    key="asset_tts_voice"
+                )
+                selected_voice_index = voice_options.index(selected_voice_display)
+                voice_id = voice_ids[selected_voice_index]
+            
+            with speed_col:
+                tts_speed = st.slider(
+                    tr("tts.speed"),
+                    min_value=0.5,
+                    max_value=2.0,
+                    value=saved_speed,
+                    step=0.1,
+                    format="%.1fx",
+                    key="asset_tts_speed"
+                )
+                st.caption(tr("tts.speed_label", speed=f"{tts_speed:.1f}"))
+        
+        return {
+            "duration": duration,
+            "source": source,
+            "voice_id": voice_id,
+            "tts_speed": tts_speed
+        }
+    
+    def _render_output_preview(self, pixelle_video: Any, video_params: dict):
+        """Render output preview section"""
+        with st.container(border=True):
+            st.markdown(f"**{tr('section.video_generation')}**")
+            
+            # Check configuration
+            if not config_manager.validate():
+                st.warning(tr("settings.not_configured"))
+            
+            # Check if assets are provided
+            assets = video_params.get("assets", [])
+            if not assets:
+                st.info(tr("asset_based.output.no_assets"))
+                st.button(
+                    tr("btn.generate"),
+                    type="primary",
+                    use_container_width=True,
+                    disabled=True,
+                    key="asset_generate_disabled"
+                )
+                return
+            
+            # Show asset summary
+            st.info(tr("asset_based.output.ready", count=len(assets)))
+            
+            # Generate button
+            if st.button(tr("btn.generate"), type="primary", use_container_width=True, key="asset_generate"):
+                # Validate
+                if not config_manager.validate():
+                    st.error(tr("settings.not_configured"))
+                    st.stop()
+                
+                # Show progress
+                progress_bar = st.progress(0)
+                status_text = st.empty()
+                
+                start_time = time.time()
+                
+                try:
+                    # Import pipeline
+                    from pixelle_video.pipelines.asset_based import AssetBasedPipeline
+                    
+                    # Create pipeline
+                    pipeline = AssetBasedPipeline(pixelle_video)
+                    
+                    # Progress callback
+                    def update_progress(event: ProgressEvent):
+                        if event.event_type == "analyzing_assets":
+                            if event.extra_info == "start":
+                                message = tr("asset_based.progress.analyzing_start", total=event.frame_total)
+                            else:
+                                message = tr("asset_based.progress.analyzing_complete", count=event.frame_total)
+                        elif event.event_type == "analyzing_asset":
+                            message = tr(
+                                "asset_based.progress.analyzing_asset",
+                                current=event.frame_current,
+                                total=event.frame_total,
+                                name=event.extra_info or ""
+                            )
+                        elif event.event_type == "generating_script":
+                            if event.extra_info == "complete":
+                                message = tr("asset_based.progress.script_complete")
+                            else:
+                                message = tr("asset_based.progress.generating_script")
+                        elif event.event_type == "frame_step":
+                            action_key = f"progress.step_{event.action}"
+                            action_text = tr(action_key)
+                            message = tr(
+                                "progress.frame_step",
+                                current=event.frame_current,
+                                total=event.frame_total,
+                                step=event.step,
+                                action=action_text
+                            )
+                        elif event.event_type == "processing_frame":
+                            message = tr(
+                                "progress.frame",
+                                current=event.frame_current,
+                                total=event.frame_total
+                            )
+                        elif event.event_type == "concatenating":
+                            if event.extra_info == "complete":
+                                message = tr("asset_based.progress.concat_complete")
+                            else:
+                                message = tr("progress.concatenating")
+                        elif event.event_type == "completed":
+                            message = tr("progress.completed")
+                        else:
+                            message = tr(f"progress.{event.event_type}")
+                        
+                        status_text.text(message)
+                        progress_bar.progress(min(int(event.progress * 100), 99))
+                    
+                    # Execute pipeline with progress callback
+                    ctx = run_async(pipeline(
+                        assets=video_params["assets"],
+                        video_title=video_params.get("video_title", ""),
+                        intent=video_params.get("intent"),
+                        duration=video_params.get("duration", 30),
+                        source=video_params.get("source", "runninghub"),
+                        bgm_path=video_params.get("bgm_path"),
+                        bgm_volume=video_params.get("bgm_volume", 0.2),
+                        bgm_mode=video_params.get("bgm_mode", "loop"),
+                        voice_id=video_params.get("voice_id", "zh-CN-YunjianNeural"),
+                        tts_speed=video_params.get("tts_speed", 1.2),
+                        progress_callback=update_progress
+                    ))
+                    
+                    total_time = time.time() - start_time
+                    
+                    progress_bar.progress(100)
+                    status_text.text(tr("status.success"))
+                    
+                    # Display result
+                    st.success(tr("status.video_generated", path=ctx.final_video_path))
+                    
+                    st.markdown("---")
+                    
+                    # Video info
+                    if os.path.exists(ctx.final_video_path):
+                        file_size_mb = os.path.getsize(ctx.final_video_path) / (1024 * 1024)
+                        n_scenes = len(ctx.storyboard.frames) if ctx.storyboard else 0
+                        
+                        info_text = (
+                            f"⏱️ {tr('info.generation_time')} {total_time:.1f}s   "
+                            f"📦 {file_size_mb:.2f}MB   "
+                            f"🎬 {n_scenes}{tr('info.scenes_unit')}"
+                        )
+                        st.caption(info_text)
+                        
+                        st.markdown("---")
+                        
+                        # Video preview
+                        st.video(ctx.final_video_path)
+                        
+                        # Download button
+                        with open(ctx.final_video_path, "rb") as video_file:
+                            video_bytes = video_file.read()
+                            video_filename = os.path.basename(ctx.final_video_path)
+                            st.download_button(
+                                label="⬇️ 下载视频" if get_language() == "zh_CN" else "⬇️ Download Video",
+                                data=video_bytes,
+                                file_name=video_filename,
+                                mime="video/mp4",
+                                use_container_width=True
+                            )
+                    else:
+                        st.error(tr("status.video_not_found", path=ctx.final_video_path))
+                
+                except Exception as e:
+                    status_text.text("")
+                    progress_bar.empty()
+                    st.error(tr("status.error", error=str(e)))
+                    logger.exception(e)
+                    st.stop()
+
+
+# Register self
+register_pipeline_ui(AssetBasedPipelineUI)
+
--- a/web/pipelines/demo.py
+++ b/web/pipelines/demo.py
@@ -1,69 +0,0 @@
-# Copyright (C) 2025 AIDC-AI
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#     http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Demo Pipeline UI
-
-Implements a custom layout for the Demo Pipeline.
-"""
-
-import streamlit as st
-from typing import Any
-from web.i18n import tr
-
-from web.pipelines.base import PipelineUI, register_pipeline_ui
-
-
-class DemoPipelineUI(PipelineUI):
-    """
-    Demo UI to verify the full-page plugin system.
-    Uses a completely different layout (2 columns).
-    """
-    name = "demo"
-    icon = "✨"
-    
-    @property
-    def display_name(self):
-        return tr("pipeline.demo.name")
-        
-    @property
-    def description(self):
-        return tr("pipeline.demo.description")
-    
-    def render(self, pixelle_video: Any):
-        st.markdown("### ✨ Demo Pipeline Custom Layout")
-        st.info("This pipeline uses a custom 2-column layout, demonstrating full UI control.")
-        
-        col1, col2 = st.columns([2, 1])
-        
-        with col1:
-            with st.container(border=True):
-                st.subheader("1. Input")
-                topic = st.text_input("Enter Topic", placeholder="e.g. AI News")
-                mood = st.selectbox("Mood", ["Happy", "Serious", "Funny"])
-                
-                st.markdown("---")
-                st.subheader("2. Settings")
-                # Simplified settings for demo
-                n_scenes = st.slider("Scenes", 3, 10, 5)
-        
-        with col2:
-            with st.container(border=True):
-                st.subheader("3. Generate")
-                if st.button("🚀 Generate Demo Video", type="primary", use_container_width=True):
-                    # Mock generation logic or call backend
-                    st.success(f"Generating video for '{topic}' ({mood}) with {n_scenes} scenes...")
-                    st.balloons()
-
-
-# Register self
-register_pipeline_ui(DemoPipelineUI)