Merge branch 'dev_video'

This commit is contained in:
puke
2025-11-12 21:22:08 +08:00
53 changed files with 2158 additions and 438 deletions

View File

@@ -13,7 +13,7 @@
只需输入一个 **主题**Pixelle-Video 就能自动完成:
- ✍️ 撰写视频文案
- 🎨 生成 AI 配图
- 🎨 生成 AI 配图/视频
- 🗣️ 合成语音解说
- 🎵 添加背景音乐
- 🎬 一键合成视频
@@ -32,6 +32,7 @@
-**全自动生成** - 输入主题,自动生成完整视频
-**AI 智能文案** - 根据主题智能创作解说词,无需自己写脚本
-**AI 生成配图** - 每句话都配上精美的 AI 插图
-**AI 生成视频** - 支持使用 AI 视频生成模型(如 WAN 2.1)创建动态视频内容
-**AI 生成语音** - 支持 Edge-TTS、Index-TTS 等众多主流 TTS 方案
-**背景音乐** - 支持添加 BGM让视频更有氛围
-**视觉风格** - 多种模板可选,打造独特视频风格
@@ -281,6 +282,12 @@ uv run streamlit run web/app.py
#### 视频模板
决定视频画面的布局和设计。
**模板命名规范**
- `static_*.html`: 静态模板无需AI生成媒体纯文字样式
- `image_*.html`: 图片模板使用AI生成的图片作为背景
- `video_*.html`: 视频模板使用AI生成的视频作为背景
**使用方法**
- 从下拉菜单选择模板,按尺寸分组显示(竖屏/横屏/方形)
- 点击「预览模板」可以自定义参数测试效果
- 如果懂 HTML可以在 `templates/` 文件夹创建自己的模板

View File

@@ -13,7 +13,7 @@
Just input a **topic**, and Pixelle-Video will automatically:
- ✍️ Write video script
- 🎨 Generate AI images
- 🎨 Generate AI images/videos
- 🗣️ Synthesize voice narration
- 🎵 Add background music
- 🎬 Create video with one click
@@ -32,6 +32,7 @@ Just input a **topic**, and Pixelle-Video will automatically:
-**Fully Automatic Generation** - Input a topic, automatically generate complete video
-**AI Smart Copywriting** - Intelligently create narration based on topic, no need to write scripts yourself
-**AI Generated Images** - Each sentence comes with beautiful AI illustrations
-**AI Generated Videos** - Support AI video generation models (like WAN 2.1) to create dynamic video content
-**AI Generated Voice** - Support Edge-TTS, Index-TTS and many other mainstream TTS solutions
-**Background Music** - Support adding BGM to make videos more atmospheric
-**Visual Styles** - Multiple templates to choose from, create unique video styles
@@ -281,6 +282,12 @@ Determine what style of images AI generates.
#### Video Template
Determines video layout and design.
**Template Naming Convention**
- `static_*.html`: Static templates (no AI-generated media, text-only styles)
- `image_*.html`: Image templates (uses AI-generated images as background)
- `video_*.html`: Video templates (uses AI-generated videos as background)
**Usage**
- Select template from dropdown menu, displayed grouped by dimension (portrait/landscape/square)
- Click "Preview Template" to test effect with custom parameters
- If you know HTML, you can create your own templates in the `templates/` folder

View File

@@ -43,18 +43,27 @@ async def image_generate(
try:
logger.info(f"Image generation request: {request.prompt[:50]}...")
# Call image service
image_path = await pixelle_video.image(
# Call media service (backward compatible with image API)
media_result = await pixelle_video.media(
prompt=request.prompt,
width=request.width,
height=request.height,
workflow=request.workflow
)
# For backward compatibility, only support image results in /image endpoint
if media_result.is_video:
raise HTTPException(
status_code=400,
detail="Video workflow used. Please use /media/generate endpoint for video generation."
)
return ImageGenerateResponse(
image_path=image_path
image_path=media_result.url
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Image generation error: {e}")
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -73,8 +73,7 @@ async def generate_video_sync(
"max_narration_words": request_body.max_narration_words,
"min_image_prompt_words": request_body.min_image_prompt_words,
"max_image_prompt_words": request_body.max_image_prompt_words,
"image_width": request_body.image_width,
"image_height": request_body.image_height,
# Note: image_width and image_height are now auto-determined from template
"image_workflow": request_body.image_workflow,
"video_fps": request_body.video_fps,
"frame_template": request_body.frame_template,
@@ -161,8 +160,7 @@ async def generate_video_async(
"max_narration_words": request_body.max_narration_words,
"min_image_prompt_words": request_body.min_image_prompt_words,
"max_image_prompt_words": request_body.max_image_prompt_words,
"image_width": request_body.image_width,
"image_height": request_body.image_height,
# Note: image_width and image_height are now auto-determined from template
"image_workflow": request_body.image_workflow,
"video_fps": request_body.video_fps,
"frame_template": request_body.frame_template,

View File

@@ -57,8 +57,7 @@ class VideoGenerateRequest(BaseModel):
max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words")
# === Image Parameters ===
image_width: int = Field(1024, description="Image width")
image_height: int = Field(1024, description="Image height")
# Note: image_width and image_height are now auto-determined from template meta tags
image_workflow: Optional[str] = Field(None, description="Custom image workflow")
# === Video Parameters ===

View File

@@ -37,15 +37,29 @@ comfyui:
# Image prompt prefix (optional)
prompt_prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style"
# Video-specific configuration
video:
# Required: Default workflow to use (no fallback)
# Options: runninghub/video_wan2.1_fusionx.json (recommended, no local setup)
# selfhost/video_wan2.1_fusionx.json (requires local ComfyUI)
default_workflow: runninghub/video_wan2.1_fusionx.json
# Video prompt prefix (optional)
prompt_prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style"
# ==================== Template Configuration ====================
# Configure default template for video generation
template:
# Default frame template to use when not explicitly specified
# Determines video aspect ratio and layout style
# Template naming convention:
# - static_*.html: Static style templates (no AI-generated media)
# - image_*.html: Templates requiring AI-generated images
# - video_*.html: Templates requiring AI-generated videos
# Options:
# - 1080x1920 (vertical/portrait): default.html, modern.html, elegant.html, etc.
# - 1080x1080 (square): minimal_framed.html, magazine_cover.html, etc.
# - 1920x1080 (horizontal/landscape): film.html, full.html, etc.
# - 1080x1920 (vertical/portrait): image_default.html, image_modern.html, image_elegant.html, static_simple.html, etc.
# - 1080x1080 (square): image_minimal_framed.html, etc.
# - 1920x1080 (horizontal/landscape): image_film.html, image_full.html, etc.
# See templates/ directory for all available templates
default_template: "1080x1920/default.html"
default_template: "1080x1920/image_default.html"

View File

@@ -21,6 +21,10 @@ comfyui:
default_workflow: "runninghub/image_flux.json"
prompt_prefix: "Minimalist illustration style"
video:
default_workflow: "runninghub/video_wan2.1_fusionx.json"
prompt_prefix: "Minimalist illustration style"
tts:
default_workflow: "selfhost/tts_edge.json"
```
@@ -48,6 +52,13 @@ comfyui:
- `default_workflow`: Default image generation workflow
- `prompt_prefix`: Prompt prefix
### Video Configuration
- `default_workflow`: Default video generation workflow
- `runninghub/video_wan2.1_fusionx.json`: Cloud workflow (recommended, no local setup required)
- `selfhost/video_wan2.1_fusionx.json`: Local workflow (requires local ComfyUI support)
- `prompt_prefix`: Video prompt prefix (controls video generation style)
### TTS Configuration
- `default_workflow`: Default TTS workflow

View File

@@ -154,15 +154,39 @@ Suitable for Instagram, WeChat Moments, and other platforms.
---
## Template Naming Convention
Templates follow a unified naming convention to distinguish different types:
- **`static_*.html`**: Static templates
- No AI-generated media content required
- Pure text style rendering
- Suitable for quick generation and low-cost scenarios
- **`image_*.html`**: Image templates
- Uses AI-generated images as background
- Invokes ComfyUI image generation workflows
- Suitable for content requiring visual illustrations
- **`video_*.html`**: Video templates
- Uses AI-generated videos as background
- Invokes ComfyUI video generation workflows
- Creates dynamic video content with enhanced expressiveness
## Template Structure
Templates are located in the `templates/` directory, grouped by size:
```
templates/
├── 1080x1920/ # Portrait (11 templates)
├── 1920x1080/ # Landscape (2 templates)
└── 1080x1080/ # Square (1 template)
├── 1080x1920/ # Portrait
│ ├── static_*.html # Static templates
│ ├── image_*.html # Image templates
│ └── video_*.html # Video templates
├── 1920x1080/ # Landscape
│ └── image_*.html # Image templates
└── 1080x1080/ # Square
└── image_*.html # Image templates
```
---

View File

@@ -16,10 +16,42 @@ Pixelle-Video is built on the ComfyUI architecture and supports custom workflows
Located in `workflows/selfhost/` or `workflows/runninghub/`
Used for Text-to-Speech, supporting various TTS engines:
- Edge-TTS
- Index-TTS (supports voice cloning)
- Other ComfyUI-compatible TTS nodes
### Image Generation Workflows
Located in `workflows/selfhost/` or `workflows/runninghub/`
Used for generating static images as video backgrounds:
- FLUX series models
- Stable Diffusion series models
- Other image generation models
### Video Generation Workflows
Located in `workflows/selfhost/` or `workflows/runninghub/`
**New Feature**: Supports AI video generation to create dynamic video content.
**Preset Workflows**:
- `runninghub/video_wan2.1_fusionx.json`: Cloud workflow (recommended)
- Based on WAN 2.1 model
- No local setup required, accessed via RunningHub API
- Supports Text-to-Video generation
- `selfhost/video_wan2.1_fusionx.json`: Local workflow
- Requires local ComfyUI environment
- Requires installation of corresponding video generation nodes
- Suitable for users with local GPU
**Use Cases**:
- Works with `video_*.html` templates
- Automatically generates dynamic video backgrounds based on scripts
- Enhances visual expressiveness and viewing experience
---
## Custom Workflows

View File

@@ -21,6 +21,10 @@ comfyui:
default_workflow: "runninghub/image_flux.json"
prompt_prefix: "Minimalist illustration style"
video:
default_workflow: "runninghub/video_wan2.1_fusionx.json"
prompt_prefix: "Minimalist illustration style"
tts:
default_workflow: "selfhost/tts_edge.json"
```
@@ -48,6 +52,13 @@ comfyui:
- `default_workflow`: 默认图像生成工作流
- `prompt_prefix`: 提示词前缀
### 视频配置
- `default_workflow`: 默认视频生成工作流
- `runninghub/video_wan2.1_fusionx.json`: 云端工作流(推荐,无需本地环境)
- `selfhost/video_wan2.1_fusionx.json`: 本地工作流(需要本地 ComfyUI 支持)
- `prompt_prefix`: 视频提示词前缀(用于控制视频生成风格)
### TTS 配置
- `default_workflow`: 默认 TTS 工作流

View File

@@ -154,15 +154,39 @@
---
## 模板命名规范
模板采用统一的命名规范来区分不同类型:
- **`static_*.html`**: 静态模板
- 无需 AI 生成任何媒体内容
- 纯文字样式渲染
- 适合快速生成、低成本场景
- **`image_*.html`**: 图片模板
- 使用 AI 生成的图片作为背景
- 调用 ComfyUI 的图像生成工作流
- 适合需要视觉配图的内容
- **`video_*.html`**: 视频模板
- 使用 AI 生成的视频作为背景
- 调用 ComfyUI 的视频生成工作流
- 创建动态视频内容,增强表现力
## 模板结构
模板位于 `templates/` 目录,按尺寸分组:
```
templates/
├── 1080x1920/ # 竖屏11个模板
├── 1920x1080/ # 横屏2个模板
└── 1080x1080/ # 方形1个模板
├── 1080x1920/ # 竖屏
│ ├── static_*.html # 静态模板
│ ├── image_*.html # 图片模板
│ └── video_*.html # 视频模板
├── 1920x1080/ # 横屏
│ └── image_*.html # 图片模板
└── 1080x1080/ # 方形
└── image_*.html # 图片模板
```
---

View File

@@ -16,10 +16,42 @@ Pixelle-Video 基于 ComfyUI 架构,支持自定义工作流。
位于 `workflows/selfhost/``workflows/runninghub/`
用于文本转语音Text-to-Speech支持多种 TTS 引擎:
- Edge-TTS
- Index-TTS支持声音克隆
- 其他 ComfyUI 兼容的 TTS 节点
### 图像生成工作流
位于 `workflows/selfhost/``workflows/runninghub/`
用于生成静态图像作为视频背景:
- FLUX 系列模型
- Stable Diffusion 系列模型
- 其他图像生成模型
### 视频生成工作流
位于 `workflows/selfhost/``workflows/runninghub/`
**新功能**:支持 AI 视频生成,创建动态视频内容。
**预置工作流**
- `runninghub/video_wan2.1_fusionx.json`: 云端工作流(推荐)
- 基于 WAN 2.1 模型
- 无需本地环境,通过 RunningHub API 调用
- 支持文本到视频Text-to-Video
- `selfhost/video_wan2.1_fusionx.json`: 本地工作流
- 需要本地 ComfyUI 环境
- 需要安装相应的视频生成节点
- 适合有本地 GPU 的用户
**使用场景**
- 配合 `video_*.html` 模板使用
- 自动根据文案生成动态视频背景
- 增强视频的视觉表现力和观看体验
---
## 自定义工作流

View File

@@ -0,0 +1,61 @@
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Media generation result models
"""
from typing import Literal, Optional
from pydantic import BaseModel, Field
class MediaResult(BaseModel):
"""
Media generation result from workflow execution
Supports both image and video outputs from ComfyUI workflows.
The media_type indicates what kind of media was generated.
Attributes:
media_type: Type of media generated ("image" or "video")
url: URL or path to the generated media
duration: Duration in seconds (only for video, None for image)
Examples:
# Image result
MediaResult(media_type="image", url="http://example.com/image.png")
# Video result
MediaResult(media_type="video", url="http://example.com/video.mp4", duration=5.2)
"""
media_type: Literal["image", "video"] = Field(
description="Type of generated media"
)
url: str = Field(
description="URL or path to the generated media file"
)
duration: Optional[float] = Field(
None,
description="Duration in seconds (only applicable for video)"
)
@property
def is_image(self) -> bool:
"""Check if this is an image result"""
return self.media_type == "image"
@property
def is_video(self) -> bool:
"""Check if this is a video result"""
return self.media_type == "video"

View File

@@ -57,16 +57,18 @@ class StoryboardFrame:
"""Single storyboard frame"""
index: int # Frame index (0-based)
narration: str # Narration text
image_prompt: str # Image generation prompt
image_prompt: str # Image generation prompt (can be None for text-only or video)
# Generated resource paths
audio_path: Optional[str] = None # Audio file path
image_path: Optional[str] = None # Original image path
composed_image_path: Optional[str] = None # Composed image path (with subtitles)
video_segment_path: Optional[str] = None # Video segment path
audio_path: Optional[str] = None # Audio file path (narration)
media_type: Optional[str] = None # Media type: "image" or "video" (None if no media)
image_path: Optional[str] = None # Original image path (for image type)
video_path: Optional[str] = None # Original video path (for video type, before composition)
composed_image_path: Optional[str] = None # Composed image path (with subtitles, for image type)
video_segment_path: Optional[str] = None # Final video segment path
# Metadata
duration: float = 0.0 # Audio duration (seconds)
duration: float = 0.0 # Frame duration (seconds, from audio or video)
created_at: Optional[datetime] = None
def __post_init__(self):

View File

@@ -63,8 +63,11 @@ class BasePipeline(ABC):
# Quick access to services (convenience)
self.llm = pixelle_video_core.llm
self.tts = pixelle_video_core.tts
self.image = pixelle_video_core.image
self.media = pixelle_video_core.media
self.video = pixelle_video_core.video
# Backward compatibility alias
self.image = pixelle_video_core.media
@abstractmethod
async def __call__(

View File

@@ -92,8 +92,7 @@ class CustomPipeline(BasePipeline):
ref_audio: Optional[str] = None,
image_workflow: Optional[str] = None,
image_width: int = 1024,
image_height: int = 1024,
# Note: image_width and image_height are now auto-determined from template
frame_template: Optional[str] = None,
video_fps: int = 30,
@@ -118,9 +117,10 @@ class CustomPipeline(BasePipeline):
VideoGenerationResult
Image Generation Logic:
- If template has {{image}} → automatically generates images
- If template has no {{image}} → skips image generation (faster, cheaper)
- To customize: Override the template_requires_image logic in your subclass
- image_*.html templates → automatically generates images
- video_*.html templates → automatically generates videos
- static_*.html templates → skips media generation (faster, cheaper)
- To customize: Override the template type detection logic in your subclass
"""
logger.info("Starting CustomPipeline")
logger.info(f"Input text length: {len(text)} chars")
@@ -152,19 +152,27 @@ class CustomPipeline(BasePipeline):
frame_template = template_config.get("default_template", "1080x1920/default.html")
# ========== Step 0.5: Check template requirements ==========
# Detect if template requires {{image}} parameter
# This allows skipping the entire image generation pipeline for text-only templates
# Detect template type by filename prefix
from pathlib import Path
from pixelle_video.services.frame_html import HTMLFrameGenerator
from pixelle_video.utils.template_util import resolve_template_path
from pixelle_video.utils.template_util import resolve_template_path, get_template_type
template_name = Path(frame_template).name
template_type = get_template_type(template_name)
template_requires_image = (template_type == "image")
# Read media size from template meta tags
template_path = resolve_template_path(frame_template)
generator = HTMLFrameGenerator(template_path)
template_requires_image = generator.requires_image()
image_width, image_height = generator.get_media_size()
logger.info(f"📐 Media size from template: {image_width}x{image_height}")
if template_requires_image:
if template_type == "image":
logger.info(f"📸 Template requires image generation")
else:
logger.info(f" Template does not require images - skipping image generation pipeline")
elif template_type == "video":
logger.info(f"🎬 Template requires video generation")
else: # static
logger.info(f"⚡ Static template - skipping media generation pipeline")
logger.info(f" 💡 Benefits: Faster generation + Lower cost + No ComfyUI dependency")
# ========== Step 1: Process content (CUSTOMIZE THIS) ==========
@@ -194,8 +202,8 @@ class CustomPipeline(BasePipeline):
# ========== Step 2: Generate image prompts (CONDITIONAL - CUSTOMIZE THIS) ==========
self._report_progress(progress_callback, "generating_image_prompts", 0.25)
# IMPORTANT: Check if template actually needs images
# If your template doesn't use {{image}}, you can skip this entire step!
# IMPORTANT: Check if template is image type
# If your template is static_*.html, you can skip this entire step!
if template_requires_image:
# Template requires images - generate image prompts using LLM
from pixelle_video.utils.content_generators import generate_image_prompts

View File

@@ -94,8 +94,7 @@ class StandardPipeline(BasePipeline):
max_image_prompt_words: int = 60,
# === Image Parameters ===
image_width: int = 1024,
image_height: int = 1024,
# Note: image_width and image_height are now auto-determined from template meta tags
image_workflow: Optional[str] = None,
# === Video Parameters ===
@@ -151,9 +150,8 @@ class StandardPipeline(BasePipeline):
min_image_prompt_words: Min image prompt length
max_image_prompt_words: Max image prompt length
image_width: Generated image width (default 1024)
image_height: Generated image height (default 1024)
image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)
Note: Image/video size is now auto-determined from template meta tags
video_fps: Video frame rate (default 30)
@@ -239,6 +237,16 @@ class StandardPipeline(BasePipeline):
template_config = self.core.config.get("template", {})
frame_template = template_config.get("default_template", "1080x1920/default.html")
# Read media size from template meta tags
from pixelle_video.services.frame_html import HTMLFrameGenerator
from pixelle_video.utils.template_util import resolve_template_path
template_path = resolve_template_path(frame_template)
temp_generator = HTMLFrameGenerator(template_path)
image_width, image_height = temp_generator.get_media_size()
logger.info(f"📐 Media size from template: {image_width}x{image_height}")
# Create storyboard config
config = StoryboardConfig(
task_id=task_id,
@@ -269,11 +277,13 @@ class StandardPipeline(BasePipeline):
)
# ========== Step 0.8: Check template requirements ==========
template_requires_image = self._check_template_requires_image(config.frame_template)
if template_requires_image:
template_media_type = self._check_template_media_type(config.frame_template)
if template_media_type == "video":
logger.info(f"🎬 Template requires video generation")
elif template_media_type == "image":
logger.info(f"📸 Template requires image generation")
else:
logger.info(f"Template does not require images - skipping image generation pipeline")
else: # static
logger.info(f"Static template - skipping media generation pipeline")
logger.info(f" 💡 Benefits: Faster generation + Lower cost + No ComfyUI dependency")
try:
@@ -294,8 +304,61 @@ class StandardPipeline(BasePipeline):
logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode")
# ========== Step 2: Generate image prompts (conditional) ==========
if template_requires_image:
# ========== Step 2: Generate media prompts (conditional) ==========
if template_media_type == "video":
# Video template: generate video prompts
self._report_progress(progress_callback, "generating_video_prompts", 0.15)
from pixelle_video.utils.content_generators import generate_video_prompts
# Override prompt_prefix if provided
original_prefix = None
if prompt_prefix is not None:
image_config = self.core.config.get("comfyui", {}).get("image", {})
original_prefix = image_config.get("prompt_prefix")
image_config["prompt_prefix"] = prompt_prefix
logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
try:
# Create progress callback wrapper for video prompt generation
def video_prompt_progress(completed: int, total: int, message: str):
batch_progress = completed / total if total > 0 else 0
overall_progress = 0.15 + (batch_progress * 0.15)
self._report_progress(
progress_callback,
"generating_video_prompts",
overall_progress,
extra_info=message
)
# Generate base video prompts
base_image_prompts = await generate_video_prompts(
self.llm,
narrations=narrations,
min_words=min_image_prompt_words,
max_words=max_image_prompt_words,
progress_callback=video_prompt_progress
)
# Apply prompt prefix
from pixelle_video.utils.prompt_helper import build_image_prompt
image_config = self.core.config.get("comfyui", {}).get("image", {})
prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
image_prompts = []
for base_prompt in base_image_prompts:
final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
image_prompts.append(final_prompt)
finally:
# Restore original prompt_prefix
if original_prefix is not None:
image_config["prompt_prefix"] = original_prefix
logger.info(f"✅ Generated {len(image_prompts)} video prompts")
elif template_media_type == "image":
# Image template: generate image prompts
self._report_progress(progress_callback, "generating_image_prompts", 0.15)
# Override prompt_prefix if provided
@@ -343,12 +406,13 @@ class StandardPipeline(BasePipeline):
image_config["prompt_prefix"] = original_prefix
logger.info(f"✅ Generated {len(image_prompts)} image prompts")
else:
# Skip image prompt generation
else: # text
# Text-only template: skip media prompt generation
image_prompts = [None] * len(narrations)
self._report_progress(progress_callback, "preparing_frames", 0.15)
logger.info(f"⚡ Skipped image prompt generation (template doesn't need images)")
logger.info(f" 💡 Savings: {len(narrations)} LLM calls + {len(narrations)} image generations")
logger.info(f"⚡ Skipped media prompt generation (text-only template)")
logger.info(f" 💡 Savings: {len(narrations)} LLM calls + {len(narrations)} media generations")
# ========== Step 3: Create frames ==========
for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
@@ -452,29 +516,32 @@ class StandardPipeline(BasePipeline):
logger.error(f"❌ Video generation failed: {e}")
raise
def _check_template_requires_image(self, frame_template: str) -> bool:
def _check_template_media_type(self, frame_template: str) -> str:
"""
Check if template requires image generation
Check template media type requirement
This is checked at pipeline level to avoid unnecessary:
- LLM calls (generating image_prompts)
- Image generation API calls
- LLM calls (generating media prompts)
- Media generation API calls
- ComfyUI dependency
Template naming convention:
- static_*.html: Static style template (returns "static")
- image_*.html: Image template (returns "image")
- video_*.html: Video template (returns "video")
Args:
frame_template: Template path (e.g., "1080x1920/default.html")
frame_template: Template path (e.g., "1080x1920/image_default.html" or "1080x1920/video_default.html")
Returns:
True if template contains {{image}}, False otherwise
"static", "image", or "video"
"""
from pixelle_video.services.frame_html import HTMLFrameGenerator
from pixelle_video.utils.template_util import resolve_template_path
from pixelle_video.utils.template_util import get_template_type
template_path = resolve_template_path(frame_template)
generator = HTMLFrameGenerator(template_path)
# Determine type by template filename prefix
template_name = Path(frame_template).name
template_type = get_template_type(template_name)
requires = generator.requires_image()
logger.debug(f"Template '{frame_template}' requires_image={requires}")
return requires
logger.debug(f"Template '{frame_template}' is {template_type} template")
return template_type

View File

@@ -0,0 +1,133 @@
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Video prompt generation template
For generating video prompts from narrations.
"""
import json
from typing import List
VIDEO_PROMPT_GENERATION_PROMPT = """# 角色定位
你是一个专业的视频创意设计师,擅长为视频脚本创作富有动感和表现力的视频生成提示词,将叙述内容转化为生动的视频画面。
# 核心任务
基于已有的视频脚本,为每个分镜的"旁白内容"创作对应的**英文**视频生成提示词,确保视频画面与叙述内容完美配合,通过动态画面增强观众的理解和记忆。
**重要:输入包含 {narrations_count} 个旁白,你必须为每个旁白都生成一个对应的视频提示词,总共输出 {narrations_count} 个视频提示词。**
# 输入内容
{narrations_json}
# 输出要求
## 视频提示词规范
- 语言:**必须使用英文**(用于 AI 视频生成模型)
- 描述结构scene + character action + camera movement + emotion + atmosphere
- 描述长度:确保描述清晰完整且富有创意(建议 50-100 个英文单词)
- 动态元素:强调动作、运动、变化等动态效果
## 视觉创意要求
- 每个视频都要准确反映对应旁白的具体内容和情感
- 突出画面的动态性:角色动作、物体运动、镜头移动、场景转换等
- 使用象征手法将抽象概念视觉化(如用流动的水代表时间流逝,用上升的阶梯代表进步等)
- 画面要表现出丰富的情感和动作,增强视觉冲击力
- 通过镜头语言(推拉摇移)和剪辑节奏增强表现力
## 关键英文词汇参考
- 动作moving, running, flowing, transforming, growing, falling
- 镜头camera pan, zoom in, zoom out, tracking shot, aerial view
- 转场transition, fade in, fade out, dissolve
- 氛围dynamic, energetic, peaceful, dramatic, mysterious
- 光影lighting changes, shadows moving, sunlight streaming
## 视频与文案配合原则
- 视频要服务于文案,成为文案内容的视觉延伸
- 避免与文案内容无关或矛盾的视觉元素
- 选择最能增强文案说服力的动态表现方式
- 确保观众能通过视频动态快速理解文案的核心观点
## 创意指导
1. **现象描述类文案**:用动态场景表现社会现象的发生过程
2. **原因分析类文案**:用因果关系的动态演变表现内在逻辑
3. **影响论证类文案**:用后果场景的动态展开或对比表现影响程度
4. **深入探讨类文案**:用抽象概念的动态具象化表现深刻思考
5. **结论启发类文案**:用开放式动态场景或指引性运动表现启发性
## 视频特有注意事项
- 强调动态:每个视频都应该包含明显的动作或运动
- 镜头语言:适当使用推拉摇移等镜头技巧增强表现力
- 时长考虑:视频应该是连贯的动态过程,不是静态画面
- 流畅性:注意动作的流畅性和自然性
# 输出格式
严格按照以下JSON格式输出**视频提示词必须是英文**
```json
{{
"video_prompts": [
"[detailed English video prompt with dynamic elements and camera movements]",
"[detailed English video prompt with dynamic elements and camera movements]"
]
}}
```
# 重要提醒
1. 只输出JSON格式内容不要添加任何解释说明
2. 确保JSON格式严格正确可以被程序直接解析
3. 输入是 {{"narrations": [旁白数组]}} 格式,输出是 {{"video_prompts": [视频提示词数组]}} 格式
4. **输出的video_prompts数组必须恰好包含 {narrations_count} 个元素与输入的narrations数组一一对应**
5. **视频提示词必须使用英文**for AI video generation models
6. 视频提示词必须准确反映对应旁白的具体内容和情感
7. 每个视频都要强调动态性和运动感,避免静态描述
8. 适当使用镜头语言增强表现力
9. 确保视频画面能增强文案的说服力和观众的理解度
现在,请为上述 {narrations_count} 个旁白创作对应的 {narrations_count} 个**英文**视频提示词。只输出JSON不要其他内容。
"""
def build_video_prompt_prompt(
narrations: List[str],
min_words: int,
max_words: int
) -> str:
"""
Build video prompt generation prompt
Args:
narrations: List of narrations
min_words: Minimum word count
max_words: Maximum word count
Returns:
Formatted prompt for LLM
Example:
>>> build_video_prompt_prompt(narrations, 50, 100)
"""
narrations_json = json.dumps(
{"narrations": narrations},
ensure_ascii=False,
indent=2
)
return VIDEO_PROMPT_GENERATION_PROMPT.format(
narrations_json=narrations_json,
narrations_count=len(narrations),
min_words=min_words,
max_words=max_words
)

View File

@@ -23,7 +23,7 @@ from loguru import logger
from pixelle_video.config import config_manager
from pixelle_video.services.llm_service import LLMService
from pixelle_video.services.tts_service import TTSService
from pixelle_video.services.image import ImageService
from pixelle_video.services.media import MediaService
from pixelle_video.services.video import VideoService
from pixelle_video.services.frame_processor import FrameProcessor
from pixelle_video.pipelines.standard import StandardPipeline
@@ -45,7 +45,7 @@ class PixelleVideoCore:
# Use capabilities directly
answer = await pixelle_video.llm("Explain atomic habits")
audio = await pixelle_video.tts("Hello world")
image = await pixelle_video.image(prompt="a cat")
media = await pixelle_video.media(prompt="a cat")
# Check active capabilities
print(f"Using LLM: {pixelle_video.llm.active}")
@@ -56,7 +56,7 @@ class PixelleVideoCore:
├── config (configuration)
├── llm (LLM service - direct OpenAI SDK)
├── tts (TTS service - ComfyKit workflows)
├── image (Image service - ComfyKit workflows)
├── media (Media service - ComfyKit workflows, supports image & video)
└── pipelines (video generation pipelines)
├── standard (standard workflow)
├── custom (custom workflow template)
@@ -77,7 +77,7 @@ class PixelleVideoCore:
# Core services (initialized in initialize())
self.llm: Optional[LLMService] = None
self.tts: Optional[TTSService] = None
self.image: Optional[ImageService] = None
self.media: Optional[MediaService] = None
self.video: Optional[VideoService] = None
self.frame_processor: Optional[FrameProcessor] = None
@@ -105,7 +105,7 @@ class PixelleVideoCore:
# 1. Initialize core services
self.llm = LLMService(self.config)
self.tts = TTSService(self.config)
self.image = ImageService(self.config)
self.media = MediaService(self.config)
self.video = VideoService()
self.frame_processor = FrameProcessor(self)

View File

@@ -18,7 +18,7 @@ Core services providing atomic capabilities.
Services:
- LLMService: LLM text generation
- TTSService: Text-to-speech
- ImageService: Image generation
- MediaService: Media generation (image & video)
- VideoService: Video processing
- FrameProcessor: Frame processing orchestrator
- ComfyBaseService: Base class for ComfyUI-based services
@@ -27,15 +27,19 @@ Services:
from pixelle_video.services.comfy_base_service import ComfyBaseService
from pixelle_video.services.llm_service import LLMService
from pixelle_video.services.tts_service import TTSService
from pixelle_video.services.image import ImageService
from pixelle_video.services.media import MediaService
from pixelle_video.services.video import VideoService
from pixelle_video.services.frame_processor import FrameProcessor
# Backward compatibility alias
ImageService = MediaService
__all__ = [
"ComfyBaseService",
"LLMService",
"TTSService",
"ImageService",
"MediaService",
"ImageService", # Backward compatibility
"VideoService",
"FrameProcessor",
]

View File

@@ -77,21 +77,6 @@ class HTMLFrameGenerator:
self._check_linux_dependencies()
logger.debug(f"Loaded HTML template: {template_path} (size: {self.width}x{self.height})")
def requires_image(self) -> bool:
"""
Detect if template requires {{image}} parameter
This method checks if the template uses the {{image}} variable.
If the template doesn't use images, the entire image generation
pipeline can be skipped, significantly improving:
- Generation speed (no image generation API calls)
- Cost efficiency (no LLM calls for image prompts)
- Dependency requirements (no ComfyUI needed)
Returns:
True if template contains {{image}}, False otherwise
"""
return '{{image}}' in self.template
def _check_linux_dependencies(self):
"""Check Linux system dependencies and warn if missing"""
@@ -141,6 +126,58 @@ class HTMLFrameGenerator:
logger.debug(f"Template loaded: {len(content)} chars")
return content
def _parse_media_size_from_meta(self) -> tuple[Optional[int], Optional[int]]:
"""
Parse media size from meta tags in template
Looks for meta tags:
- <meta name="template:media-width" content="1024">
- <meta name="template:media-height" content="1024">
Returns:
Tuple of (width, height) or (None, None) if not found
"""
from bs4 import BeautifulSoup
try:
soup = BeautifulSoup(self.template, 'html.parser')
# Find width and height meta tags
width_meta = soup.find('meta', attrs={'name': 'template:media-width'})
height_meta = soup.find('meta', attrs={'name': 'template:media-height'})
if width_meta and height_meta:
width = int(width_meta.get('content', 0))
height = int(height_meta.get('content', 0))
if width > 0 and height > 0:
logger.debug(f"Found media size in meta tags: {width}x{height}")
return width, height
return None, None
except Exception as e:
logger.warning(f"Failed to parse media size from meta tags: {e}")
return None, None
def get_media_size(self) -> tuple[int, int]:
"""
Get media size for image/video generation
Returns media size specified in template meta tags.
Returns:
Tuple of (width, height)
"""
media_width, media_height = self._parse_media_size_from_meta()
if media_width and media_height:
return media_width, media_height
# Fallback to default if not specified (should not happen with properly configured templates)
logger.warning(f"No media size meta tags found in template {self.template_path}, using fallback 1024x1024")
return 1024, 1024
def parse_template_parameters(self) -> Dict[str, Dict[str, Any]]:
"""
Parse custom parameters from HTML template

View File

@@ -84,7 +84,7 @@ class FrameProcessor:
))
await self._step_generate_audio(frame, config)
# Step 2: Generate image (conditional)
# Step 2: Generate media (image or video, conditional)
if needs_image:
if progress_callback:
progress_callback(ProgressEvent(
@@ -93,12 +93,13 @@ class FrameProcessor:
frame_current=frame_num,
frame_total=total_frames,
step=2,
action="image"
action="media"
))
await self._step_generate_image(frame, config)
await self._step_generate_media(frame, config)
else:
frame.image_path = None
logger.debug(f" 2/4: Skipped image generation (not required by template)")
frame.media_type = None
logger.debug(f" 2/4: Skipped media generation (not required by template)")
# Step 3: Compose frame (add subtitle)
if progress_callback:
@@ -176,27 +177,66 @@ class FrameProcessor:
logger.debug(f" ✓ Audio generated: {audio_path} ({frame.duration:.2f}s)")
async def _step_generate_image(
async def _step_generate_media(
self,
frame: StoryboardFrame,
config: StoryboardConfig
):
"""Step 2: Generate image using ComfyKit"""
logger.debug(f" 2/4: Generating image for frame {frame.index}...")
"""Step 2: Generate media (image or video) using ComfyKit"""
logger.debug(f" 2/4: Generating media for frame {frame.index}...")
# Call Image generation (with optional preset)
image_url = await self.core.image(
# Determine media type based on workflow
# video_ prefix in workflow name indicates video generation
workflow_name = config.image_workflow or ""
is_video_workflow = "video_" in workflow_name.lower()
media_type = "video" if is_video_workflow else "image"
logger.debug(f" → Media type: {media_type} (workflow: {workflow_name})")
# Call Media generation (with optional preset)
media_result = await self.core.media(
prompt=frame.image_prompt,
workflow=config.image_workflow, # Pass workflow from config (None = use default)
media_type=media_type,
width=config.image_width,
height=config.image_height
)
# Download image to local (pass task_id)
local_path = await self._download_image(image_url, frame.index, config.task_id)
frame.image_path = local_path
# Store media type
frame.media_type = media_result.media_type
logger.debug(f" ✓ Image generated: {local_path}")
if media_result.is_image:
# Download image to local (pass task_id)
local_path = await self._download_media(
media_result.url,
frame.index,
config.task_id,
media_type="image"
)
frame.image_path = local_path
logger.debug(f" ✓ Image generated: {local_path}")
elif media_result.is_video:
# Download video to local (pass task_id)
local_path = await self._download_media(
media_result.url,
frame.index,
config.task_id,
media_type="video"
)
frame.video_path = local_path
# Update duration from video if available
if media_result.duration:
frame.duration = media_result.duration
logger.debug(f" ✓ Video generated: {local_path} (duration: {frame.duration:.2f}s)")
else:
# Get video duration from file
frame.duration = await self._get_video_duration(local_path)
logger.debug(f" ✓ Video generated: {local_path} (duration: {frame.duration:.2f}s)")
else:
raise ValueError(f"Unknown media type: {media_result.media_type}")
async def _step_compose_frame(
self,
@@ -211,7 +251,9 @@ class FrameProcessor:
from pixelle_video.utils.os_util import get_task_frame_path
output_path = get_task_frame_path(config.task_id, frame.index, "composed")
# Use HTML template to compose frame
# For video type: render HTML as transparent overlay image
# For image type: render HTML with image background
# In both cases, we need the composed image
composed_path = await self._compose_frame_html(frame, storyboard, config, output_path)
frame.composed_image_path = composed_path
@@ -264,23 +306,60 @@ class FrameProcessor:
frame: StoryboardFrame,
config: StoryboardConfig
):
"""Step 4: Create video segment from image + audio"""
"""Step 4: Create video segment from media + audio"""
logger.debug(f" 4/4: Creating video segment for frame {frame.index}...")
# Generate output path using task_id
from pixelle_video.utils.os_util import get_task_frame_path
output_path = get_task_frame_path(config.task_id, frame.index, "segment")
# Call video compositor to create video from image + audio
from pixelle_video.services.video import VideoService
video_service = VideoService()
segment_path = video_service.create_video_from_image(
image=frame.composed_image_path,
audio=frame.audio_path,
output=output_path,
fps=config.video_fps
)
# Branch based on media type
if frame.media_type == "video":
# Video workflow: overlay HTML template on video, then add audio
logger.debug(f" → Using video-based composition with HTML overlay")
# Step 1: Overlay transparent HTML image on video
# The composed_image_path contains the rendered HTML with transparent background
temp_video_with_overlay = get_task_frame_path(config.task_id, frame.index, "video") + "_overlay.mp4"
video_service.overlay_image_on_video(
video=frame.video_path,
overlay_image=frame.composed_image_path,
output=temp_video_with_overlay,
scale_mode="contain" # Scale video to fit template size (contain mode)
)
# Step 2: Add narration audio to the overlaid video
# Note: The video might have audio (replaced) or be silent (audio added)
segment_path = video_service.merge_audio_video(
video=temp_video_with_overlay,
audio=frame.audio_path,
output=output_path,
replace_audio=True, # Replace video audio with narration
audio_volume=1.0
)
# Clean up temp file
import os
if os.path.exists(temp_video_with_overlay):
os.unlink(temp_video_with_overlay)
elif frame.media_type == "image" or frame.media_type is None:
# Image workflow: create video from image + audio
logger.debug(f" → Using image-based composition")
segment_path = video_service.create_video_from_image(
image=frame.composed_image_path,
audio=frame.audio_path,
output=output_path,
fps=config.video_fps
)
else:
raise ValueError(f"Unknown media type: {frame.media_type}")
frame.video_segment_path = segment_path
@@ -303,10 +382,16 @@ class FrameProcessor:
estimated_duration = file_size / 2000
return max(1.0, estimated_duration) # At least 1 second
async def _download_image(self, url: str, frame_index: int, task_id: str) -> str:
"""Download image from URL to local file"""
async def _download_media(
self,
url: str,
frame_index: int,
task_id: str,
media_type: str
) -> str:
"""Download media (image or video) from URL to local file"""
from pixelle_video.utils.os_util import get_task_frame_path
output_path = get_task_frame_path(task_id, frame_index, "image")
output_path = get_task_frame_path(task_id, frame_index, media_type)
async with httpx.AsyncClient() as client:
response = await client.get(url)
@@ -316,4 +401,16 @@ class FrameProcessor:
f.write(response.content)
return output_path
async def _get_video_duration(self, video_path: str) -> float:
"""Get video duration in seconds"""
try:
import ffmpeg
probe = ffmpeg.probe(video_path)
duration = float(probe['format']['duration'])
return duration
except Exception as e:
logger.warning(f"Failed to get video duration: {e}, using audio duration")
# Fallback: use audio duration if available
return 1.0 # Default to 1 second if unable to determine

View File

@@ -1,192 +0,0 @@
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Image Generation Service - ComfyUI Workflow-based implementation
"""
from typing import Optional
from comfykit import ComfyKit
from loguru import logger
from pixelle_video.services.comfy_base_service import ComfyBaseService
class ImageService(ComfyBaseService):
"""
Image generation service - Workflow-based
Uses ComfyKit to execute image generation workflows.
Usage:
# Use default workflow (workflows/image_flux.json)
image_url = await pixelle_video.image(prompt="a cat")
# Use specific workflow
image_url = await pixelle_video.image(
prompt="a cat",
workflow="image_flux.json"
)
# List available workflows
workflows = pixelle_video.image.list_workflows()
"""
WORKFLOW_PREFIX = "image_"
DEFAULT_WORKFLOW = None # No hardcoded default, must be configured
WORKFLOWS_DIR = "workflows"
def __init__(self, config: dict):
"""
Initialize image service
Args:
config: Full application config dict
"""
super().__init__(config, service_name="image")
async def __call__(
self,
prompt: str,
workflow: Optional[str] = None,
# ComfyUI connection (optional overrides)
comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None,
# Common workflow parameters
width: Optional[int] = None,
height: Optional[int] = None,
negative_prompt: Optional[str] = None,
steps: Optional[int] = None,
seed: Optional[int] = None,
cfg: Optional[float] = None,
sampler: Optional[str] = None,
**params
) -> str:
"""
Generate image using workflow
Args:
prompt: Image generation prompt
workflow: Workflow filename (default: from config or "image_flux.json")
comfyui_url: ComfyUI URL (optional, overrides config)
runninghub_api_key: RunningHub API key (optional, overrides config)
width: Image width
height: Image height
negative_prompt: Negative prompt
steps: Sampling steps
seed: Random seed
cfg: CFG scale
sampler: Sampler name
**params: Additional workflow parameters
Returns:
Generated image URL/path
Examples:
# Simplest: use default workflow (workflows/image_flux.json)
image_url = await pixelle_video.image(prompt="a beautiful cat")
# Use specific workflow
image_url = await pixelle_video.image(
prompt="a cat",
workflow="image_flux.json"
)
# With additional parameters
image_url = await pixelle_video.image(
prompt="a cat",
workflow="image_flux.json",
width=1024,
height=1024,
steps=20,
seed=42
)
# With absolute path
image_url = await pixelle_video.image(
prompt="a cat",
workflow="/path/to/custom.json"
)
# With custom ComfyUI server
image_url = await pixelle_video.image(
prompt="a cat",
comfyui_url="http://192.168.1.100:8188"
)
"""
# 1. Resolve workflow (returns structured info)
workflow_info = self._resolve_workflow(workflow=workflow)
# 2. Prepare ComfyKit config (supports both selfhost and runninghub)
kit_config = self._prepare_comfykit_config(
comfyui_url=comfyui_url,
runninghub_api_key=runninghub_api_key
)
# 3. Build workflow parameters
workflow_params = {"prompt": prompt}
# Add optional parameters
if width is not None:
workflow_params["width"] = width
if height is not None:
workflow_params["height"] = height
if negative_prompt is not None:
workflow_params["negative_prompt"] = negative_prompt
if steps is not None:
workflow_params["steps"] = steps
if seed is not None:
workflow_params["seed"] = seed
if cfg is not None:
workflow_params["cfg"] = cfg
if sampler is not None:
workflow_params["sampler"] = sampler
# Add any additional parameters
workflow_params.update(params)
logger.debug(f"Workflow parameters: {workflow_params}")
# 4. Execute workflow (ComfyKit auto-detects based on input type)
try:
kit = ComfyKit(**kit_config)
# Determine what to pass to ComfyKit based on source
if workflow_info["source"] == "runninghub" and "workflow_id" in workflow_info:
# RunningHub: pass workflow_id (ComfyKit will use runninghub backend)
workflow_input = workflow_info["workflow_id"]
logger.info(f"Executing RunningHub workflow: {workflow_input}")
else:
# Selfhost: pass file path (ComfyKit will use local ComfyUI)
workflow_input = workflow_info["path"]
logger.info(f"Executing selfhost workflow: {workflow_input}")
result = await kit.execute(workflow_input, workflow_params)
# 5. Handle result
if result.status != "completed":
error_msg = result.msg or "Unknown error"
logger.error(f"Image generation failed: {error_msg}")
raise Exception(f"Image generation failed: {error_msg}")
if not result.images:
logger.error("No images generated")
raise Exception("No images generated")
image_url = result.images[0]
logger.info(f"✅ Generated image: {image_url}")
return image_url
except Exception as e:
logger.error(f"Image generation error: {e}")
raise

View File

@@ -0,0 +1,285 @@
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Media Generation Service - ComfyUI Workflow-based implementation
Supports both image and video generation workflows.
Automatically detects output type based on ExecuteResult.
"""
from typing import Optional
from comfykit import ComfyKit
from loguru import logger
from pixelle_video.services.comfy_base_service import ComfyBaseService
from pixelle_video.models.media import MediaResult
class MediaService(ComfyBaseService):
"""
Media generation service - Workflow-based
Uses ComfyKit to execute image/video generation workflows.
Supports both image_ and video_ workflow prefixes.
Usage:
# Use default workflow (workflows/image_flux.json)
media = await pixelle_video.media(prompt="a cat")
if media.is_image:
print(f"Generated image: {media.url}")
elif media.is_video:
print(f"Generated video: {media.url} ({media.duration}s)")
# Use specific workflow
media = await pixelle_video.media(
prompt="a cat",
workflow="image_flux.json"
)
# List available workflows
workflows = pixelle_video.media.list_workflows()
"""
WORKFLOW_PREFIX = "" # Will be overridden by _scan_workflows
DEFAULT_WORKFLOW = None # No hardcoded default, must be configured
WORKFLOWS_DIR = "workflows"
def __init__(self, config: dict):
"""
Initialize media service
Args:
config: Full application config dict
"""
super().__init__(config, service_name="image") # Keep "image" for config compatibility
def _scan_workflows(self):
"""
Scan workflows for both image_ and video_ prefixes
Override parent method to support multiple prefixes
"""
from pixelle_video.utils.os_util import list_resource_dirs, list_resource_files, get_resource_path
from pathlib import Path
workflows = []
# Get all workflow source directories
source_dirs = list_resource_dirs("workflows")
if not source_dirs:
logger.warning("No workflow source directories found")
return workflows
# Scan each source directory for workflow files
for source_name in source_dirs:
# Get all JSON files for this source
workflow_files = list_resource_files("workflows", source_name)
# Filter to only files matching image_ or video_ prefix
matching_files = [
f for f in workflow_files
if (f.startswith("image_") or f.startswith("video_")) and f.endswith('.json')
]
for filename in matching_files:
try:
# Get actual file path
file_path = Path(get_resource_path("workflows", source_name, filename))
workflow_info = self._parse_workflow_file(file_path, source_name)
workflows.append(workflow_info)
logger.debug(f"Found workflow: {workflow_info['key']}")
except Exception as e:
logger.error(f"Failed to parse workflow {source_name}/{filename}: {e}")
# Sort by key (source/name)
return sorted(workflows, key=lambda w: w["key"])
async def __call__(
self,
prompt: str,
workflow: Optional[str] = None,
# Media type specification (required for proper handling)
media_type: str = "image", # "image" or "video"
# ComfyUI connection (optional overrides)
comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None,
# Common workflow parameters
width: Optional[int] = None,
height: Optional[int] = None,
negative_prompt: Optional[str] = None,
steps: Optional[int] = None,
seed: Optional[int] = None,
cfg: Optional[float] = None,
sampler: Optional[str] = None,
**params
) -> MediaResult:
"""
Generate media (image or video) using workflow
Media type must be specified explicitly via media_type parameter.
Returns a MediaResult object containing media type and URL.
Args:
prompt: Media generation prompt
workflow: Workflow filename (default: from config or "image_flux.json")
media_type: Type of media to generate - "image" or "video" (default: "image")
comfyui_url: ComfyUI URL (optional, overrides config)
runninghub_api_key: RunningHub API key (optional, overrides config)
width: Media width
height: Media height
negative_prompt: Negative prompt
steps: Sampling steps
seed: Random seed
cfg: CFG scale
sampler: Sampler name
**params: Additional workflow parameters
Returns:
MediaResult object with media_type ("image" or "video") and url
Examples:
# Simplest: use default workflow (workflows/image_flux.json)
media = await pixelle_video.media(prompt="a beautiful cat")
if media.is_image:
print(f"Image: {media.url}")
# Use specific workflow
media = await pixelle_video.media(
prompt="a cat",
workflow="image_flux.json"
)
# Video workflow
media = await pixelle_video.media(
prompt="a cat running",
workflow="image_video.json"
)
if media.is_video:
print(f"Video: {media.url}, duration: {media.duration}s")
# With additional parameters
media = await pixelle_video.media(
prompt="a cat",
workflow="image_flux.json",
width=1024,
height=1024,
steps=20,
seed=42
)
# With absolute path
media = await pixelle_video.media(
prompt="a cat",
workflow="/path/to/custom.json"
)
# With custom ComfyUI server
media = await pixelle_video.media(
prompt="a cat",
comfyui_url="http://192.168.1.100:8188"
)
"""
# 1. Resolve workflow (returns structured info)
workflow_info = self._resolve_workflow(workflow=workflow)
# 2. Prepare ComfyKit config (supports both selfhost and runninghub)
kit_config = self._prepare_comfykit_config(
comfyui_url=comfyui_url,
runninghub_api_key=runninghub_api_key
)
# 3. Build workflow parameters
workflow_params = {"prompt": prompt}
# Add optional parameters
if width is not None:
workflow_params["width"] = width
if height is not None:
workflow_params["height"] = height
if negative_prompt is not None:
workflow_params["negative_prompt"] = negative_prompt
if steps is not None:
workflow_params["steps"] = steps
if seed is not None:
workflow_params["seed"] = seed
if cfg is not None:
workflow_params["cfg"] = cfg
if sampler is not None:
workflow_params["sampler"] = sampler
# Add any additional parameters
workflow_params.update(params)
logger.debug(f"Workflow parameters: {workflow_params}")
# 4. Execute workflow (ComfyKit auto-detects based on input type)
try:
kit = ComfyKit(**kit_config)
# Determine what to pass to ComfyKit based on source
if workflow_info["source"] == "runninghub" and "workflow_id" in workflow_info:
# RunningHub: pass workflow_id (ComfyKit will use runninghub backend)
workflow_input = workflow_info["workflow_id"]
logger.info(f"Executing RunningHub workflow: {workflow_input}")
else:
# Selfhost: pass file path (ComfyKit will use local ComfyUI)
workflow_input = workflow_info["path"]
logger.info(f"Executing selfhost workflow: {workflow_input}")
result = await kit.execute(workflow_input, workflow_params)
# 5. Handle result based on specified media_type
if result.status != "completed":
error_msg = result.msg or "Unknown error"
logger.error(f"Media generation failed: {error_msg}")
raise Exception(f"Media generation failed: {error_msg}")
# Extract media based on specified type
if media_type == "video":
# Video workflow - get video from result
if not result.videos:
logger.error("No video generated (workflow returned no videos)")
raise Exception("No video generated")
video_url = result.videos[0]
logger.info(f"✅ Generated video: {video_url}")
# Try to extract duration from result (if available)
duration = None
if hasattr(result, 'duration') and result.duration:
duration = result.duration
return MediaResult(
media_type="video",
url=video_url,
duration=duration
)
else: # image
# Image workflow - get image from result
if not result.images:
logger.error("No image generated (workflow returned no images)")
raise Exception("No image generated")
image_url = result.images[0]
logger.info(f"✅ Generated image: {image_url}")
return MediaResult(
media_type="image",
url=image_url
)
except Exception as e:
logger.error(f"Media generation error: {e}")
raise

View File

@@ -224,20 +224,88 @@ class VideoService:
-map "[v]" -map "[a]" output.mp4
"""
try:
inputs = [ffmpeg.input(v) for v in videos]
(
ffmpeg
.concat(*inputs, v=1, a=1)
.output(output)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
# Build filter_complex string manually
n = len(videos)
# Build input stream labels: [0:v][0:a][1:v][1:a]...
stream_spec = "".join([f"[{i}:v][{i}:a]" for i in range(n)])
filter_complex = f"{stream_spec}concat=n={n}:v=1:a=1[v][a]"
# Build ffmpeg command
cmd = ['ffmpeg']
for video in videos:
cmd.extend(['-i', video])
cmd.extend([
'-filter_complex', filter_complex,
'-map', '[v]',
'-map', '[a]',
'-y', # Overwrite output
output
])
# Run command
import subprocess
result = subprocess.run(
cmd,
capture_output=True,
text=True,
check=True
)
logger.success(f"Videos concatenated successfully: {output}")
return output
except ffmpeg.Error as e:
error_msg = e.stderr.decode() if e.stderr else str(e)
except subprocess.CalledProcessError as e:
error_msg = e.stderr if e.stderr else str(e)
logger.error(f"FFmpeg concat filter error: {error_msg}")
raise RuntimeError(f"Failed to concatenate videos: {error_msg}")
except Exception as e:
logger.error(f"Concatenation error: {e}")
raise RuntimeError(f"Failed to concatenate videos: {e}")
def _get_video_duration(self, video: str) -> float:
"""Get video duration in seconds"""
try:
probe = ffmpeg.probe(video)
duration = float(probe['format']['duration'])
return duration
except Exception as e:
logger.warning(f"Failed to get video duration: {e}")
return 0.0
def _get_audio_duration(self, audio: str) -> float:
"""Get audio duration in seconds"""
try:
probe = ffmpeg.probe(audio)
duration = float(probe['format']['duration'])
return duration
except Exception as e:
logger.warning(f"Failed to get audio duration: {e}, using estimate")
# Fallback: estimate based on file size (very rough)
import os
file_size = os.path.getsize(audio)
# Assume ~16kbps for MP3, so 2KB per second
estimated_duration = file_size / 2000
return max(1.0, estimated_duration) # At least 1 second
def has_audio_stream(self, video: str) -> bool:
"""
Check if video has audio stream
Args:
video: Video file path
Returns:
True if video has audio stream, False otherwise
"""
try:
probe = ffmpeg.probe(video)
audio_streams = [s for s in probe.get('streams', []) if s['codec_type'] == 'audio']
has_audio = len(audio_streams) > 0
logger.debug(f"Video {video} has_audio={has_audio}")
return has_audio
except Exception as e:
logger.warning(f"Failed to probe video audio streams: {e}, assuming no audio")
return False
def merge_audio_video(
self,
@@ -247,9 +315,18 @@ class VideoService:
replace_audio: bool = True,
audio_volume: float = 1.0,
video_volume: float = 0.0,
pad_strategy: str = "freeze", # "freeze" (freeze last frame) or "black" (black screen)
) -> str:
"""
Merge audio with video
Merge audio with video, using the longer duration
The output video duration will be the maximum of video and audio duration.
If audio is longer than video, the video will be padded using the specified strategy.
Automatically handles videos with or without audio streams.
- If video has no audio: adds the audio track
- If video has audio and replace_audio=True: replaces with new audio
- If video has audio and replace_audio=False: mixes both audio tracks
Args:
video: Video file path
@@ -259,6 +336,9 @@ class VideoService:
audio_volume: Volume of the new audio (0.0 to 1.0+)
video_volume: Volume of original video audio (0.0 to 1.0+)
Only used when replace_audio=False
pad_strategy: Strategy to pad video if audio is longer
- "freeze": Freeze last frame (default)
- "black": Fill with black screen
Returns:
Path to the output video file
@@ -267,28 +347,115 @@ class VideoService:
RuntimeError: If FFmpeg execution fails
Note:
- When replace_audio=True, video's original audio is removed
- When replace_audio=False, original and new audio are mixed
- Audio is trimmed/extended to match video duration
- Uses the longer duration between video and audio
- When audio is longer, video is padded using pad_strategy
- When video is longer, audio is looped or extended
- Automatically detects if video has audio
- When video is silent, audio is added regardless of replace_audio
- When replace_audio=True and video has audio, original audio is removed
- When replace_audio=False and video has audio, original and new audio are mixed
"""
# Get durations of video and audio
video_duration = self._get_video_duration(video)
audio_duration = self._get_audio_duration(audio)
logger.info(f"Video duration: {video_duration:.2f}s, Audio duration: {audio_duration:.2f}s")
# Determine target duration (max of both)
target_duration = max(video_duration, audio_duration)
logger.info(f"Target output duration: {target_duration:.2f}s")
# Check if video has audio stream
video_has_audio = self.has_audio_stream(video)
# Prepare video stream (potentially with padding)
input_video = ffmpeg.input(video)
video_stream = input_video.video
# Pad video if audio is longer
if audio_duration > video_duration:
pad_duration = audio_duration - video_duration
logger.info(f"Audio is longer, padding video by {pad_duration:.2f}s using '{pad_strategy}' strategy")
if pad_strategy == "freeze":
# Freeze last frame: tpad filter
video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration)
else: # black
# Generate black frames for padding duration
from pixelle_video.utils.os_util import get_temp_path
import os
# Get video properties
probe = ffmpeg.probe(video)
video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
width = int(video_info['width'])
height = int(video_info['height'])
fps_str = video_info['r_frame_rate']
fps_num, fps_den = map(int, fps_str.split('/'))
fps = fps_num / fps_den if fps_den != 0 else 30
# Create black video for padding
black_video_path = get_temp_path(f"black_pad_{os.path.basename(output)}")
black_input = ffmpeg.input(
f'color=c=black:s={width}x{height}:r={fps}',
f='lavfi',
t=pad_duration
)
# Concatenate original video with black padding
video_stream = ffmpeg.concat(video_stream, black_input.video, v=1, a=0)
# Prepare audio stream (pad if needed to match target duration)
input_audio = ffmpeg.input(audio)
audio_stream = input_audio.audio.filter('volume', audio_volume)
# Pad audio with silence if video is longer
if video_duration > audio_duration:
pad_duration = video_duration - audio_duration
logger.info(f"Video is longer, padding audio with {pad_duration:.2f}s silence")
# Use apad to add silence at the end
audio_stream = audio_stream.filter('apad', whole_dur=target_duration)
if not video_has_audio:
logger.info(f"Video has no audio stream, adding audio track")
# Video is silent, just add the audio
try:
(
ffmpeg
.output(
video_stream,
audio_stream,
output,
vcodec='libx264', # Re-encode video if padded
acodec='aac',
audio_bitrate='192k'
)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
)
logger.success(f"Audio added to silent video: {output}")
return output
except ffmpeg.Error as e:
error_msg = e.stderr.decode() if e.stderr else str(e)
logger.error(f"FFmpeg error adding audio to silent video: {error_msg}")
raise RuntimeError(f"Failed to add audio to video: {error_msg}")
# Video has audio, proceed with merging
logger.info(f"Merging audio with video (replace={replace_audio})")
try:
input_video = ffmpeg.input(video)
input_audio = ffmpeg.input(audio)
if replace_audio:
# Replace audio: use only new audio, ignore original
(
ffmpeg
.output(
input_video.video,
input_audio.audio.filter('volume', audio_volume),
video_stream,
audio_stream,
output,
vcodec='copy',
vcodec='libx264', # Re-encode video if padded
acodec='aac',
audio_bitrate='192k',
shortest=None
audio_bitrate='192k'
)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
@@ -298,20 +465,20 @@ class VideoService:
mixed_audio = ffmpeg.filter(
[
input_video.audio.filter('volume', video_volume),
input_audio.audio.filter('volume', audio_volume)
audio_stream
],
'amix',
inputs=2,
duration='first'
duration='longest' # Use longest audio
)
(
ffmpeg
.output(
input_video.video,
video_stream,
mixed_audio,
output,
vcodec='copy',
vcodec='libx264', # Re-encode video if padded
acodec='aac',
audio_bitrate='192k'
)
@@ -326,6 +493,92 @@ class VideoService:
logger.error(f"FFmpeg merge error: {error_msg}")
raise RuntimeError(f"Failed to merge audio and video: {error_msg}")
def overlay_image_on_video(
self,
video: str,
overlay_image: str,
output: str,
scale_mode: str = "contain"
) -> str:
"""
Overlay a transparent image on top of video
Args:
video: Base video file path
overlay_image: Transparent overlay image path (e.g., rendered HTML with transparent background)
output: Output video file path
scale_mode: How to scale the base video to fit the overlay size
- "contain": Scale video to fit within overlay dimensions (letterbox/pillarbox)
- "cover": Scale video to cover overlay dimensions (may crop)
- "stretch": Stretch video to exact overlay dimensions
Returns:
Path to the output video file
Raises:
RuntimeError: If FFmpeg execution fails
Note:
- Overlay image should have transparent background
- Video is scaled to match overlay dimensions based on scale_mode
- Final video size matches overlay image size
- Video codec is re-encoded to support overlay
"""
logger.info(f"Overlaying image on video (scale_mode={scale_mode})")
try:
# Get overlay image dimensions
overlay_probe = ffmpeg.probe(overlay_image)
overlay_stream = next(s for s in overlay_probe['streams'] if s['codec_type'] == 'video')
overlay_width = int(overlay_stream['width'])
overlay_height = int(overlay_stream['height'])
logger.debug(f"Overlay dimensions: {overlay_width}x{overlay_height}")
input_video = ffmpeg.input(video)
input_overlay = ffmpeg.input(overlay_image)
# Scale video to fit overlay size using scale_mode
if scale_mode == "contain":
# Scale to fit (letterbox/pillarbox if aspect ratio differs)
# Use scale filter with force_original_aspect_ratio=decrease and pad to center
scaled_video = (
input_video
.filter('scale', overlay_width, overlay_height, force_original_aspect_ratio='decrease')
.filter('pad', overlay_width, overlay_height, '(ow-iw)/2', '(oh-ih)/2', color='black')
)
elif scale_mode == "cover":
# Scale to cover (crop if aspect ratio differs)
scaled_video = (
input_video
.filter('scale', overlay_width, overlay_height, force_original_aspect_ratio='increase')
.filter('crop', overlay_width, overlay_height)
)
else: # stretch
# Stretch to exact dimensions
scaled_video = input_video.filter('scale', overlay_width, overlay_height)
# Overlay the transparent image on top of the scaled video
output_stream = ffmpeg.overlay(scaled_video, input_overlay)
(
ffmpeg
.output(output_stream, output,
vcodec='libx264',
pix_fmt='yuv420p',
preset='medium',
crf=23)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
)
logger.success(f"Image overlaid on video: {output}")
return output
except ffmpeg.Error as e:
error_msg = e.stderr.decode() if e.stderr else str(e)
logger.error(f"FFmpeg overlay error: {error_msg}")
raise RuntimeError(f"Failed to overlay image on video: {error_msg}")
def create_video_from_image(
self,
image: str,

View File

@@ -321,6 +321,98 @@ async def generate_image_prompts(
return all_prompts
async def generate_video_prompts(
llm_service,
narrations: List[str],
min_words: int = 30,
max_words: int = 60,
batch_size: int = 10,
max_retries: int = 3,
progress_callback: Optional[callable] = None
) -> List[str]:
"""
Generate video prompts from narrations (with batching and retry)
Args:
llm_service: LLM service instance
narrations: List of narrations
min_words: Min video prompt length
max_words: Max video prompt length
batch_size: Max narrations per batch (default: 10)
max_retries: Max retry attempts per batch (default: 3)
progress_callback: Optional callback(completed, total, message) for progress updates
Returns:
List of video prompts (base prompts, without prefix applied)
"""
from pixelle_video.prompts.video_generation import build_video_prompt_prompt
logger.info(f"Generating video prompts for {len(narrations)} narrations (batch_size={batch_size})")
# Split narrations into batches
batches = [narrations[i:i + batch_size] for i in range(0, len(narrations), batch_size)]
logger.info(f"Split into {len(batches)} batches")
all_prompts = []
# Process each batch
for batch_idx, batch_narrations in enumerate(batches, 1):
logger.info(f"Processing batch {batch_idx}/{len(batches)} ({len(batch_narrations)} narrations)")
# Retry logic for this batch
for attempt in range(1, max_retries + 1):
try:
# Generate prompts for this batch
prompt = build_video_prompt_prompt(
narrations=batch_narrations,
min_words=min_words,
max_words=max_words
)
response = await llm_service(
prompt=prompt,
temperature=0.7,
max_tokens=8192
)
logger.debug(f"Batch {batch_idx} attempt {attempt}: LLM response length: {len(response)} chars")
# Parse JSON
result = _parse_json(response)
if "video_prompts" not in result:
raise KeyError("Invalid response format: missing 'video_prompts'")
batch_prompts = result["video_prompts"]
# Validate batch result
if len(batch_prompts) != len(batch_narrations):
raise ValueError(
f"Prompt count mismatch: expected {len(batch_narrations)}, got {len(batch_prompts)}"
)
# Success - add to all_prompts
all_prompts.extend(batch_prompts)
logger.info(f"✓ Batch {batch_idx} completed: {len(batch_prompts)} video prompts")
# Report progress
if progress_callback:
completed = len(all_prompts)
total = len(narrations)
progress_callback(completed, total, f"Batch {batch_idx}/{len(batches)} completed")
break # Success, move to next batch
except Exception as e:
logger.warning(f"✗ Batch {batch_idx} attempt {attempt} failed: {e}")
if attempt >= max_retries:
raise
logger.info(f"Retrying batch {batch_idx}...")
logger.info(f"✅ Generated {len(all_prompts)} video prompts")
return all_prompts
def _parse_json(text: str) -> dict:
"""
Parse JSON from text, with fallback to extract JSON from markdown code blocks

View File

@@ -260,7 +260,7 @@ def get_task_path(task_id: str, *paths: str) -> str:
def get_task_frame_path(
task_id: str,
frame_index: int,
file_type: Literal["audio", "image", "composed", "segment"]
file_type: Literal["audio", "image", "video", "composed", "segment"]
) -> str:
"""
Get frame file path within task directory
@@ -268,7 +268,7 @@ def get_task_frame_path(
Args:
task_id: Task ID
frame_index: Frame index (0-based internally, but filename starts from 01)
file_type: File type (audio/image/composed/segment)
file_type: File type (audio/image/video/composed/segment)
Returns:
Absolute path to frame file
@@ -280,6 +280,7 @@ def get_task_frame_path(
ext_map = {
"audio": "mp3",
"image": "png",
"video": "mp4",
"composed": "png",
"segment": "mp4"
}

View File

@@ -18,6 +18,7 @@ import os
from pathlib import Path
from typing import List, Tuple, Optional, Literal
from pydantic import BaseModel, Field
import logging
from pixelle_video.utils.os_util import (
get_resource_path,
@@ -26,6 +27,8 @@ from pixelle_video.utils.os_util import (
resource_exists
)
logger = logging.getLogger(__name__)
def parse_template_size(template_path: str) -> Tuple[int, int]:
"""
@@ -316,7 +319,7 @@ def resolve_template_path(template_input: Optional[str]) -> str:
Args:
template_input: Can be:
- None: Use default "1080x1920/default.html"
- None: Use default "1080x1920/image_default.html"
- "template.html": Use default size + this template
- "1080x1920/template.html": Full relative path
- "templates/1080x1920/template.html": Absolute-ish path (legacy)
@@ -330,15 +333,15 @@ def resolve_template_path(template_input: Optional[str]) -> str:
Examples:
>>> resolve_template_path(None)
'templates/1080x1920/default.html'
>>> resolve_template_path("modern.html")
'templates/1080x1920/modern.html'
>>> resolve_template_path("1920x1080/default.html")
'templates/1920x1080/default.html'
'templates/1080x1920/image_default.html'
>>> resolve_template_path("image_modern.html")
'templates/1080x1920/image_modern.html'
>>> resolve_template_path("1920x1080/image_default.html")
'templates/1920x1080/image_default.html'
"""
# Default case
if template_input is None:
template_input = "1080x1920/default.html"
template_input = "1080x1920/image_default.html"
# Parse input to extract size and template name
size = None
@@ -359,6 +362,18 @@ def resolve_template_path(template_input: Optional[str]) -> str:
size = "1080x1920"
template_name = template_input
# Backward compatibility: migrate "default.html" to "image_default.html"
if template_name == "default.html":
migrated_name = "image_default.html"
try:
# Try migrated name first
path = get_resource_path("templates", size, migrated_name)
logger.info(f"Backward compatibility: migrated '{template_input}' to '{size}/{migrated_name}'")
return path
except FileNotFoundError:
# Fall through to try original name
logger.warning(f"Migrated template '{size}/{migrated_name}' not found, trying original name")
# Use resource API to resolve path (custom > default)
try:
return get_resource_path("templates", size, template_name)
@@ -367,6 +382,120 @@ def resolve_template_path(template_input: Optional[str]) -> str:
raise FileNotFoundError(
f"Template not found: {size}/{template_name}\n"
f"Available sizes: {available_sizes}\n"
f"Hint: Use format 'SIZExSIZE/template.html' (e.g., '1080x1920/default.html')"
f"Hint: Use format 'SIZExSIZE/template.html' (e.g., '1080x1920/image_default.html')"
)
def get_template_type(template_name: str) -> Literal['static', 'image', 'video']:
"""
Detect template type from template filename
Template naming convention:
- static_*.html: Static style templates (no AI-generated media)
- image_*.html: Templates requiring AI-generated images
- video_*.html: Templates requiring AI-generated videos
Args:
template_name: Template filename like "image_default.html" or "video_simple.html"
Returns:
Template type: 'static', 'image', or 'video'
Examples:
>>> get_template_type("static_simple.html")
'static'
>>> get_template_type("image_default.html")
'image'
>>> get_template_type("video_simple.html")
'video'
"""
name = Path(template_name).name
if name.startswith("static_"):
return "static"
elif name.startswith("video_"):
return "video"
elif name.startswith("image_"):
return "image"
else:
# Fallback: try to detect from legacy names
logger.warning(
f"Template '{template_name}' doesn't follow naming convention (static_/image_/video_). "
f"Defaulting to 'image' type."
)
return "image"
def filter_templates_by_type(
templates: List[TemplateInfo],
template_type: Literal['static', 'image', 'video']
) -> List[TemplateInfo]:
"""
Filter templates by type
Args:
templates: List of TemplateInfo objects
template_type: Type to filter by ('static', 'image', or 'video')
Returns:
Filtered list of TemplateInfo objects
Examples:
>>> all_templates = get_all_templates_with_info()
>>> image_templates = filter_templates_by_type(all_templates, 'image')
>>> len(image_templates) > 0
True
"""
filtered = []
for t in templates:
template_name = t.display_info.name
if get_template_type(template_name) == template_type:
filtered.append(t)
return filtered
def get_templates_grouped_by_size_and_type(
template_type: Optional[Literal['static', 'image', 'video']] = None
) -> dict:
"""
Get templates grouped by size, optionally filtered by type
Args:
template_type: Optional type filter ('static', 'image', or 'video')
Returns:
Dict with size as key, list of TemplateInfo as value
Ordered by orientation priority: portrait > landscape > square
Examples:
>>> # Get all templates
>>> all_grouped = get_templates_grouped_by_size_and_type()
>>> # Get only image templates
>>> image_grouped = get_templates_grouped_by_size_and_type('image')
"""
from collections import defaultdict
templates = get_all_templates_with_info()
# Filter by type if specified
if template_type is not None:
templates = filter_templates_by_type(templates, template_type)
grouped = defaultdict(list)
for t in templates:
grouped[t.display_info.size].append(t)
# Sort groups by orientation priority: portrait > landscape > square
orientation_priority = {'portrait': 0, 'landscape': 1, 'square': 2}
sorted_grouped = {}
for size in sorted(grouped.keys(), key=lambda s: (
orientation_priority.get(grouped[s][0].display_info.orientation, 3),
s
)):
sorted_grouped[size] = sorted(grouped[size], key=lambda t: t.display_info.name)
return sorted_grouped

View File

@@ -25,6 +25,7 @@ dependencies = [
"uvicorn[standard]>=0.32.0",
"python-multipart>=0.0.12",
"comfykit>=0.1.9",
"beautifulsoup4>=4.14.2",
]
[project.optional-dependencies]

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=1080, height=1080">
<title>极简边框风格 - 1080x1080</title>
<style>

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=1080, height=1920">
<title>模糊背景卡片 - 1080x1920</title>
<!-- Google Fonts - 中文字体 -->

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{title}}</title>
<style>

View File

@@ -2,6 +2,8 @@
<html>
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<style>
html {
margin: 0;

View File

@@ -2,6 +2,8 @@
<html>
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<style>
html {
margin: 0;

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=1080, height=1920">
<title>时尚复古风格 - 1080x1920</title>
<style>

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=1080, height=1920">
<title>全屏图片 - 1080x1920</title>
<!-- Google Fonts - 中文字体 -->

View File

@@ -2,6 +2,8 @@
<html>
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Ma+Shan+Zheng&family=ZCOOL+KuaiLe&display=swap" rel="stylesheet">

View File

@@ -2,6 +2,8 @@
<html>
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<style>
html {
margin: 0;

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>{{title}}</title>
<style>

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=1080, height=1920">
<title>心理卡片风 - 1080x1920</title>
<style>

View File

@@ -2,6 +2,8 @@
<html>
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<style>
html {
margin: 0;

View File

@@ -2,6 +2,8 @@
<html>
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<style>
html {
margin: 0;

View File

@@ -0,0 +1,185 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="512">
<meta name="template:media-height" content="288">
<style>
html {
margin: 0;
padding: 0;
height: 100%;
}
body {
margin: 0;
padding: 0;
width: 100%;
height: 100vh;
font-family: 'PingFang SC', 'Source Han Sans', 'Microsoft YaHei', sans-serif;
overflow: hidden;
/* background-color: #000; */
display: flex;
justify-content: center;
align-items: center;
}
/* 主容器 - 居中并包含所有内容 */
.main-container {
position: relative;
width: 1080px;
height: 1920px;
}
/* Background image layer (customizable using <img> tag) */
.background-image {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
z-index: 0;
}
/* Video overlay - 相对于main-container居中 */
.video-overlay {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
width: 1080px;
height: 607px;
/* background: #f00; */
z-index: 1;
}
/* Title section - positioned above video */
.video-title-wrapper {
position: absolute;
top: calc(50% - 607px / 2 - 130px);
left: 50%;
transform: translateX(-50%);
max-width: 900px;
width: 900px;
text-align: center;
z-index: 2;
}
.video-title {
font-size: 72px;
font-weight: 700;
color: #ffffff;
line-height: 1.3;
letter-spacing: 3px;
text-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
margin-bottom: 20px;
}
/* 字幕区域 - 对齐视频底部 */
.content {
position: absolute;
bottom: calc(50% - 607px / 2 + 0px);
left: 50%;
transform: translateX(-50%);
width: 900px;
z-index: 4;
}
.text {
font-size: 40px;
color: #ffffff;
text-align: center;
line-height: 1.6;
font-weight: 500;
text-shadow:
2px 2px 4px rgba(0, 0, 0, 0.9),
0 0 8px rgba(0, 0, 0, 0.8),
0 0 16px rgba(0, 0, 0, 0.6);
padding: 10px 0px;
/* background-color: aqua; */
}
/* Footer - positioned below video */
.footer {
position: absolute;
top: calc(50% + 607px / 2 + 50px);
left: 50%;
transform: translateX(-50%);
width: 900px;
display: flex;
align-items: center;
justify-content: space-between;
padding-top: 40px;
border-top: 2px solid rgba(255, 255, 255, 0.3);
z-index: 2;
}
.author-section {
display: flex;
flex-direction: column;
gap: 8px;
}
.author {
font-size: 32px;
font-weight: 600;
color: #ffffff;
text-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
}
.author-desc {
font-size: 24px;
color: rgba(255, 255, 255, 0.9);
font-weight: 400;
}
.logo-section {
display: flex;
flex-direction: column;
align-items: flex-end;
gap: 10px;
}
.logo {
font-size: 28px;
font-weight: 600;
color: #ffffff;
letter-spacing: 2px;
text-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
}
</style>
</head>
<body>
<!-- 主容器 - 所有元素都在这里面相对于video-overlay定位 -->
<div class="main-container">
<!-- Background image layer (customizable via background parameter) -->
<div class="background-image">
</div>
<!-- Video overlay - 居中参考点 -->
<div class="video-overlay"></div>
<!-- Video title - positioned above video -->
<div class="video-title-wrapper">
<div class="video-title">{{title}}</div>
</div>
<!-- 字幕区域 - 独立定位在视频底部 -->
<div class="content">
<div class="text">{{text}}</div>
</div>
<!-- Footer - positioned below video -->
<div class="footer">
<div class="author-section">
<div class="author">{{author=@Pixelle.AI}}</div>
<div class="author-desc">{{describe=Open Source Omnimodal AI Creative Agent}}</div>
</div>
<div class="logo-section">
<div class="logo">{{brand=Pixelle-Video}}</div>
</div>
</div>
</div>
</body>
</html>

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=1920, height=1080">
<title>视频模板 - 电影风格</title>
<style>

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=1920, height=1080">
<title>全屏图片 - 1920x1080</title>
<!-- Google Fonts - 中文字体 -->

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=1920, height=1080">
<title>视频模板 - 极简风格</title>
<style>

View File

@@ -2,6 +2,8 @@
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
<meta name="template:media-height" content="1024">
<meta name="viewport" content="width=1920, height=1080">
<title>视频模板 - 横屏科技风格</title>
<style>

24
uv.lock generated
View File

@@ -226,6 +226,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/2f/eb/f25ad1a7726b2fe21005c3580b35fa7bfe09646faf7c8f41867747987a35/beartype-0.22.4-py3-none-any.whl", hash = "sha256:7967a1cee01fee42e47da69c58c92da10ba5bcfb8072686e48487be5201e3d10", size = 1318387 },
]
[[package]]
name = "beautifulsoup4"
version = "4.14.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "soupsieve" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392 },
]
[[package]]
name = "blinker"
version = "1.9.0"
@@ -1653,6 +1666,7 @@ name = "pixelle-video"
version = "0.1.2"
source = { editable = "." }
dependencies = [
{ name = "beautifulsoup4" },
{ name = "certifi" },
{ name = "comfykit" },
{ name = "edge-tts" },
@@ -1680,6 +1694,7 @@ dev = [
[package.metadata]
requires-dist = [
{ name = "beautifulsoup4", specifier = ">=4.14.2" },
{ name = "certifi", specifier = ">=2025.10.5" },
{ name = "comfykit", specifier = ">=0.1.9" },
{ name = "edge-tts", specifier = ">=7.2.3" },
@@ -2461,6 +2476,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
]
[[package]]
name = "soupsieve"
version = "2.8"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679 },
]
[[package]]
name = "sse-starlette"
version = "3.0.3"

View File

@@ -684,13 +684,41 @@ def main():
st.markdown(f"🔗 [{tr('template.preview_link')}]({template_docs_url})")
# Import template utilities
from pixelle_video.utils.template_util import get_templates_grouped_by_size
from pixelle_video.utils.template_util import get_templates_grouped_by_size_and_type, get_template_type
# Get templates grouped by size
grouped_templates = get_templates_grouped_by_size()
# Template type selector
st.markdown(f"**{tr('template.type_selector')}**")
template_type_options = {
'static': tr('template.type.static'),
'image': tr('template.type.image'),
'video': tr('template.type.video')
}
# Radio buttons in horizontal layout
selected_template_type = st.radio(
tr('template.type_selector'),
options=list(template_type_options.keys()),
format_func=lambda x: template_type_options[x],
index=1, # Default to 'image'
key="template_type_selector",
label_visibility="collapsed",
horizontal=True
)
# Display hint based on selected type (below radio buttons)
if selected_template_type == 'static':
st.info(tr('template.type.static_hint'))
elif selected_template_type == 'image':
st.info(tr('template.type.image_hint'))
elif selected_template_type == 'video':
st.info(tr('template.type.video_hint'))
# Get templates grouped by size, filtered by selected type
grouped_templates = get_templates_grouped_by_size_and_type(selected_template_type)
if not grouped_templates:
st.error("No templates found. Please ensure templates are in templates/ directory with proper structure (e.g., templates/1080x1920/default.html).")
st.warning(f"No {template_type_options[selected_template_type]} templates found. Please select a different type or add templates.")
st.stop()
# Build display options with group separators
@@ -707,7 +735,19 @@ def main():
# Get default template from config
template_config = pixelle_video.config.get("template", {})
config_default_template = template_config.get("default_template", "1080x1920/default.html")
config_default_template = template_config.get("default_template", "1080x1920/image_default.html")
# Backward compatibility
if config_default_template == "1080x1920/default.html":
config_default_template = "1080x1920/image_default.html"
# Determine type-specific default template
type_default_templates = {
'static': '1080x1920/static_default.html',
'image': '1080x1920/image_default.html',
'video': '1080x1920/video_default.html'
}
type_specific_default = type_default_templates.get(selected_template_type, config_default_template)
for size, templates in grouped_templates.items():
if not templates:
@@ -733,10 +773,12 @@ def main():
display_options.append(display_name)
template_paths_ordered.append(t.template_path) # Add to ordered list
# Set default based on config (priority: config > first default.html in portrait)
# Set default: priority is config > type-specific default > first in portrait
if t.template_path == config_default_template:
default_index = current_index
elif default_index == 0 and "default.html" in t.display_info.name and t.display_info.orientation == 'portrait':
elif default_index == 0 and t.template_path == type_specific_default:
default_index = current_index
elif default_index == 0 and t.display_info.orientation == 'portrait':
default_index = current_index
current_index += 1
@@ -782,10 +824,25 @@ def main():
generator_for_params = HTMLFrameGenerator(template_path_for_params)
custom_params_for_video = generator_for_params.parse_template_parameters()
# Detect if template requires image generation
template_requires_image = generator_for_params.requires_image()
# Store in session state for Image Section to read
st.session_state['template_requires_image'] = template_requires_image
# Get media size from template (for image/video generation)
media_width, media_height = generator_for_params.get_media_size()
st.session_state['template_media_width'] = media_width
st.session_state['template_media_height'] = media_height
# Detect template media type
from pathlib import Path
from pixelle_video.utils.template_util import get_template_type
template_name = Path(frame_template).name
template_media_type = get_template_type(template_name)
template_requires_media = (template_media_type in ["image", "video"])
# Store in session state for workflow filtering
st.session_state['template_media_type'] = template_media_type
st.session_state['template_requires_media'] = template_requires_media
# Backward compatibility
st.session_state['template_requires_image'] = (template_media_type == "image")
custom_values_for_video = {}
if custom_params_for_video:
@@ -928,25 +985,51 @@ def main():
logger.exception(e)
# ====================================================================
# Image Generation Section (conditional based on template)
# Media Generation Section (conditional based on template)
# ====================================================================
# Check if current template requires image generation
if st.session_state.get('template_requires_image', True):
# Template requires images - show full Image Section
# Check if current template requires media generation
template_media_type = st.session_state.get('template_media_type', 'image')
template_requires_media = st.session_state.get('template_requires_media', True)
if template_requires_media:
# Template requires media - show Media Generation Section
with st.container(border=True):
st.markdown(f"**{tr('section.image')}**")
# Dynamic section title based on template type
if template_media_type == "video":
section_title = tr('section.video')
else:
section_title = tr('section.image')
st.markdown(f"**{section_title}**")
# 1. ComfyUI Workflow selection
with st.expander(tr("help.feature_description"), expanded=False):
st.markdown(f"**{tr('help.what')}**")
st.markdown(tr("style.workflow_what"))
if template_media_type == "video":
st.markdown(tr('style.video_workflow_what'))
else:
st.markdown(tr("style.workflow_what"))
st.markdown(f"**{tr('help.how')}**")
st.markdown(tr("style.workflow_how"))
if template_media_type == "video":
st.markdown(tr('style.video_workflow_how'))
else:
st.markdown(tr("style.workflow_how"))
st.markdown(f"**{tr('help.note')}**")
st.markdown(tr("style.image_size_note"))
if template_media_type == "video":
st.markdown(tr('style.video_size_note'))
else:
st.markdown(tr("style.image_size_note"))
# Get available workflows from pixelle_video (with source info)
workflows = pixelle_video.image.list_workflows()
# Get available workflows and filter by template type
all_workflows = pixelle_video.media.list_workflows()
# Filter workflows based on template media type
if template_media_type == "video":
# Only show video_ workflows
workflows = [wf for wf in all_workflows if "video_" in wf["key"].lower()]
else:
# Only show image_ workflows (exclude video_)
workflows = [wf for wf in all_workflows if "video_" not in wf["key"].lower()]
# Build options for selectbox
# Display: "image_flux.json - Runninghub"
@@ -959,7 +1042,9 @@ def main():
# If user has a saved preference in config, try to match it
comfyui_config = config_manager.get_comfyui_config()
saved_workflow = comfyui_config["image"]["default_workflow"]
# Select config based on template type (image or video)
media_config_key = "video" if template_media_type == "video" else "image"
saved_workflow = comfyui_config.get(media_config_key, {}).get("default_workflow", "")
if saved_workflow and saved_workflow in workflow_keys:
default_workflow_index = workflow_keys.index(saved_workflow)
@@ -978,31 +1063,20 @@ def main():
else:
workflow_key = "runninghub/image_flux.json" # fallback
# Get media size from template
image_width = st.session_state.get('template_media_width', 1024)
image_height = st.session_state.get('template_media_height', 1024)
# Display media size info (read-only)
if template_media_type == "video":
size_info_text = tr('style.video_size_info', width=image_width, height=image_height)
else:
size_info_text = tr('style.image_size_info', width=image_width, height=image_height)
st.info(f"📐 {size_info_text}")
# 2. Image size input
col1, col2 = st.columns(2)
with col1:
image_width = st.number_input(
tr('style.image_width'),
min_value=128,
value=1024,
step=1,
label_visibility="visible",
help=tr('style.image_width_help')
)
with col2:
image_height = st.number_input(
tr('style.image_height'),
min_value=128,
value=1024,
step=1,
label_visibility="visible",
help=tr('style.image_height_help')
)
# 3. Prompt prefix input
# Get current prompt_prefix from config
current_prefix = comfyui_config["image"]["prompt_prefix"]
# Prompt prefix input
# Get current prompt_prefix from config (based on media type)
current_prefix = comfyui_config.get(media_config_key, {}).get("prompt_prefix", "")
# Prompt prefix input (temporary, not saved to config)
prompt_prefix = st.text_area(
@@ -1014,54 +1088,71 @@ def main():
help=tr("style.prompt_prefix_help")
)
# Style preview expander (similar to template preview)
with st.expander(tr("style.preview_title"), expanded=False):
# Media preview expander
preview_title = tr("style.video_preview_title") if template_media_type == "video" else tr("style.preview_title")
with st.expander(preview_title, expanded=False):
# Test prompt input
if template_media_type == "video":
test_prompt_label = tr("style.test_video_prompt")
test_prompt_value = "a dog running in the park"
else:
test_prompt_label = tr("style.test_prompt")
test_prompt_value = "a dog"
test_prompt = st.text_input(
tr("style.test_prompt"),
value="a dog",
test_prompt_label,
value=test_prompt_value,
help=tr("style.test_prompt_help"),
key="style_test_prompt"
)
# Preview button
if st.button(tr("style.preview"), key="preview_style", use_container_width=True):
with st.spinner(tr("style.previewing")):
preview_button_label = tr("style.video_preview") if template_media_type == "video" else tr("style.preview")
if st.button(preview_button_label, key="preview_style", use_container_width=True):
previewing_text = tr("style.video_previewing") if template_media_type == "video" else tr("style.previewing")
with st.spinner(previewing_text):
try:
from pixelle_video.utils.prompt_helper import build_image_prompt
# Build final prompt with prefix
final_prompt = build_image_prompt(test_prompt, prompt_prefix)
# Generate preview image (use user-specified size)
preview_image_path = run_async(pixelle_video.image(
# Generate preview media (use user-specified size and media type)
media_result = run_async(pixelle_video.media(
prompt=final_prompt,
workflow=workflow_key,
media_type=template_media_type,
width=int(image_width),
height=int(image_height)
))
preview_media_path = media_result.url
# Display preview (support both URL and local path)
if preview_image_path:
st.success(tr("style.preview_success"))
if preview_media_path:
success_text = tr("style.video_preview_success") if template_media_type == "video" else tr("style.preview_success")
st.success(success_text)
# Read and encode image
if preview_image_path.startswith('http'):
# URL - use directly
img_html = f'<div class="preview-image"><img src="{preview_image_path}" alt="Style Preview"/></div>'
if template_media_type == "video":
# Display video
st.video(preview_media_path)
else:
# Local file - encode as base64
with open(preview_image_path, 'rb') as f:
img_data = base64.b64encode(f.read()).decode()
img_html = f'<div class="preview-image"><img src="data:image/png;base64,{img_data}" alt="Style Preview"/></div>'
st.markdown(img_html, unsafe_allow_html=True)
# Display image
if preview_media_path.startswith('http'):
# URL - use directly
img_html = f'<div class="preview-image"><img src="{preview_media_path}" alt="Style Preview"/></div>'
else:
# Local file - encode as base64
with open(preview_media_path, 'rb') as f:
img_data = base64.b64encode(f.read()).decode()
img_html = f'<div class="preview-image"><img src="data:image/png;base64,{img_data}" alt="Style Preview"/></div>'
st.markdown(img_html, unsafe_allow_html=True)
# Show the final prompt used
st.info(f"**{tr('style.final_prompt_label')}**\n{final_prompt}")
# Show file path
st.caption(f"📁 {preview_image_path}")
st.caption(f"📁 {preview_media_path}")
else:
st.error(tr("style.preview_failed_general"))
except Exception as e:
@@ -1076,10 +1167,12 @@ def main():
st.info(" " + tr("image.not_required"))
st.caption(tr("image.not_required_hint"))
# Get media size from template (even though not used, for consistency)
image_width = st.session_state.get('template_media_width', 1024)
image_height = st.session_state.get('template_media_height', 1024)
# Set default values for later use
workflow_key = None
image_width = 1024
image_height = 1024
prompt_prefix = ""
@@ -1149,14 +1242,13 @@ def main():
progress_bar.progress(min(int(event.progress * 100), 99)) # Cap at 99% until complete
# Generate video (directly pass parameters)
# Note: image_width and image_height are now auto-determined from template
video_params = {
"text": text,
"mode": mode,
"title": title if title else None,
"n_scenes": n_scenes,
"image_workflow": workflow_key,
"image_width": int(image_width),
"image_height": int(image_height),
"frame_template": frame_template,
"prompt_prefix": prompt_prefix,
"bgm_path": bgm_path,
@@ -1211,6 +1303,18 @@ def main():
# Video preview
if os.path.exists(result.video_path):
st.video(result.video_path)
# Download button
with open(result.video_path, "rb") as video_file:
video_bytes = video_file.read()
video_filename = os.path.basename(result.video_path)
st.download_button(
label="⬇️ 下载视频" if get_language() == "zh_CN" else "⬇️ Download Video",
data=video_bytes,
file_name=video_filename,
mime="video/mp4",
use_container_width=True
)
else:
st.error(tr("status.video_not_found", path=result.video_path))

View File

@@ -8,6 +8,8 @@
"section.bgm": "🎵 Background Music",
"section.tts": "🎤 Voiceover",
"section.image": "🎨 Image Generation",
"section.video": "🎬 Video Generation",
"section.media": "🎨 Media Generation",
"section.template": "📐 Storyboard Template",
"section.video_generation": "🎬 Generate Video",
@@ -45,12 +47,10 @@
"style.workflow": "Workflow Selection",
"style.workflow_what": "Determines how each frame's illustration is generated and its effect (e.g., using FLUX, SD models)",
"style.workflow_how": "Place the exported image_xxx.json workflow file(API format) into the workflows/selfhost/ folder (for local ComfyUI) or the workflows/runninghub/ folder (for cloud)",
"style.image_size": "Image Size",
"style.image_width": "Width",
"style.image_height": "Height",
"style.image_width_help": "Width of AI-generated images (Note: This is the image size, not the final video size. Video size is determined by the template)",
"style.image_height_help": "Height of AI-generated images (Note: This is the image size, not the final video size. Video size is determined by the template)",
"style.image_size_note": "Image size controls the dimensions of AI-generated illustrations, and does not affect the final video size. Video size is determined by the Storyboard Template below.",
"style.video_workflow_what": "Determines how each frame's video clip is generated and its effect (e.g., using different video generation models)",
"style.video_workflow_how": "Place the exported video_xxx.json workflow file(API format) into the workflows/selfhost/ folder (for local ComfyUI) or the workflows/runninghub/ folder (for cloud)",
"style.image_size_info": "Image Size: {width}x{height} (auto-determined by template)",
"style.video_size_info": "Video Size: {width}x{height} (auto-determined by template)",
"style.prompt_prefix": "Prompt Prefix",
"style.prompt_prefix_what": "Automatically added before all image prompts to control the illustration style uniformly (e.g., cartoon, realistic)",
"style.prompt_prefix_how": "Enter style description in the input box below. To save permanently, edit the config.yaml file",
@@ -60,11 +60,16 @@
"style.description": "Style Description",
"style.description_placeholder": "Describe the illustration style you want (any language)...",
"style.preview_title": "Preview Style",
"style.video_preview_title": "Preview Video",
"style.test_prompt": "Test Prompt",
"style.test_video_prompt": "Test Video Prompt",
"style.test_prompt_help": "Enter test prompt to preview style effect",
"style.preview": "🖼️ Generate Preview",
"style.video_preview": "🎬 Generate Video Preview",
"style.previewing": "Generating style preview...",
"style.video_previewing": "Generating video preview...",
"style.preview_success": "✅ Preview generated successfully!",
"style.video_preview_success": "✅ Video preview generated successfully!",
"style.preview_caption": "Style Preview",
"style.preview_failed": "Preview failed: {error}",
"style.preview_failed_general": "Failed to generate preview image",
@@ -81,8 +86,15 @@
"template.modern": "Modern",
"template.neon": "Neon",
"template.what": "Controls the visual layout and design style of each frame (title, text, image arrangement)",
"template.how": "Place .html template files in templates/SIZE/ directories (e.g., templates/1080x1920/). Templates are automatically grouped by size. Custom CSS styles are supported.\n\n**Note**\n\nAt least one of the following browsers must be installed on your computer for proper operation:\n1. Google Chrome (Windows, macOS)\n2. Chromium Browser (Linux)\n3. Microsoft Edge",
"template.how": "Place .html template files in templates/SIZE/ directories (e.g., templates/1080x1920/). Templates are automatically grouped by size. Custom CSS styles are supported.\n\n**Template Naming Convention**\n\n- `static_*.html` → Static style templates (no AI-generated media)\n- `image_*.html` → Image generation templates (AI-generated images)\n- `video_*.html` → Video generation templates (AI-generated videos)\n\n**Note**\n\nAt least one of the following browsers must be installed on your computer for proper operation:\n1. Google Chrome (Windows, macOS)\n2. Chromium Browser (Linux)\n3. Microsoft Edge",
"template.size_info": "Template Size",
"template.type_selector": "Template Type",
"template.type.static": "📄 Static Style",
"template.type.image": "🖼️ Generate Images",
"template.type.video": "🎬 Generate Videos",
"template.type.static_hint": "Uses template's built-in styles, no AI-generated media required. You can customize background images and other parameters in the template.",
"template.type.image_hint": "AI automatically generates illustrations matching the narration content. Image size is determined by the template.",
"template.type.video_hint": "AI automatically generates video clips matching the narration content. Video size is determined by the template.",
"orientation.portrait": "Portrait",
"orientation.landscape": "Landscape",
@@ -140,12 +152,16 @@
"progress.generating_narrations": "Generating narrations...",
"progress.splitting_script": "Splitting script...",
"progress.generating_image_prompts": "Generating image prompts...",
"progress.generating_video_prompts": "Generating video prompts...",
"progress.preparing_frames": "Preparing frames...",
"progress.frame": "Frame {current}/{total}",
"progress.frame_step": "Frame {current}/{total} - Step {step}/4: {action}",
"progress.step_audio": "Generating audio...",
"progress.step_image": "Generating image...",
"progress.step_compose": "Composing frame...",
"progress.step_video": "Creating video segment...",
"progress.processing_frame": "Processing frame {current}/{total}...",
"progress.step_audio": "Generating audio",
"progress.step_image": "Generating image",
"progress.step_media": "Generating media",
"progress.step_compose": "Composing frame",
"progress.step_video": "Creating video segment",
"progress.concatenating": "Concatenating video...",
"progress.finalizing": "Finalizing...",
"progress.completed": "✅ Completed",

View File

@@ -8,6 +8,8 @@
"section.bgm": "🎵 背景音乐",
"section.tts": "🎤 配音合成",
"section.image": "🎨 插图生成",
"section.video": "🎬 视频生成",
"section.media": "🎨 媒体生成",
"section.template": "📐 分镜模板",
"section.video_generation": "🎬 生成视频",
@@ -45,12 +47,10 @@
"style.workflow": "工作流选择",
"style.workflow_what": "决定视频中每帧插图的生成方式和效果(如使用 FLUX、SD 等模型)",
"style.workflow_how": "将导出的 image_xxx.json 工作流文件API格式放入 workflows/selfhost/(本地 ComfyUI或 workflows/runninghub/(云端)文件夹",
"style.image_size": "图片尺寸",
"style.image_width": "宽度",
"style.image_height": "高度",
"style.image_width_help": "AI 生成插图的宽度(注意:这是插图尺寸,不是最终视频尺寸。视频尺寸由模板决定)",
"style.image_height_help": "AI 生成插图的高度(注意:这是插图尺寸,不是最终视频尺寸。视频尺寸由模板决定)",
"style.image_size_note": "图片尺寸控制 AI 生成的插图大小,不影响最终视频尺寸。视频尺寸由下方的「📐 分镜模板」决定。",
"style.video_workflow_what": "决定视频中每帧视频片段的生成方式和效果(如使用不同的视频生成模型)",
"style.video_workflow_how": "将导出的 video_xxx.json 工作流文件API格式放入 workflows/selfhost/(本地 ComfyUI或 workflows/runninghub/(云端)文件夹",
"style.image_size_info": "插图尺寸:{width}x{height}(由模板自动决定)",
"style.video_size_info": "视频尺寸:{width}x{height}由模板自动决定)",
"style.prompt_prefix": "提示词前缀",
"style.prompt_prefix_what": "自动添加到所有图片提示词前面,统一控制插图风格(如:卡通风格、写实风格等)",
"style.prompt_prefix_how": "直接在下方输入框填写风格描述。若要永久保存,需编辑 config.yaml 文件",
@@ -60,11 +60,16 @@
"style.description": "风格描述",
"style.description_placeholder": "描述您想要的插图风格(任何语言)...",
"style.preview_title": "预览风格",
"style.video_preview_title": "预览视频",
"style.test_prompt": "测试提示词",
"style.test_video_prompt": "测试视频提示词",
"style.test_prompt_help": "输入测试提示词来预览风格效果",
"style.preview": "🖼️ 生成预览",
"style.video_preview": "🎬 生成视频预览",
"style.previewing": "正在生成风格预览...",
"style.video_previewing": "正在生成视频预览...",
"style.preview_success": "✅ 预览生成成功!",
"style.video_preview_success": "✅ 视频预览生成成功!",
"style.preview_caption": "风格预览",
"style.preview_failed": "预览失败:{error}",
"style.preview_failed_general": "预览图片生成失败",
@@ -81,8 +86,15 @@
"template.modern": "现代",
"template.neon": "霓虹",
"template.what": "控制视频每一帧的视觉布局和设计风格(标题、文本、图片的排版样式)",
"template.how": "将 .html 模板文件放入 templates/尺寸/ 目录(如 templates/1080x1920/),系统会自动按尺寸分组。支持自定义 CSS 样式。\n\n**注意**\n\n您的计算机上必须安装以下至少一种浏览器才能正常运行\n1. Google ChromeWindows、MacOS\n2. Chromium 浏览器Linux\n3. Microsoft Edge",
"template.how": "将 .html 模板文件放入 templates/尺寸/ 目录(如 templates/1080x1920/),系统会自动按尺寸分组。支持自定义 CSS 样式。\n\n**模板命名规范**\n\n- `static_*.html` → 静态样式模板无需AI生成媒体\n- `image_*.html` → 生成插图模板AI生成图片\n- `video_*.html` → 生成视频模板AI生成视频\n\n**注意**\n\n您的计算机上必须安装以下至少一种浏览器才能正常运行\n1. Google ChromeWindows、MacOS\n2. Chromium 浏览器Linux\n3. Microsoft Edge",
"template.size_info": "模板尺寸",
"template.type_selector": "分镜类型",
"template.type.static": "📄 静态样式",
"template.type.image": "🖼️ 生成插图",
"template.type.video": "🎬 生成视频",
"template.type.static_hint": "使用模板自带样式无需AI生成媒体。可在模板中自定义背景图片等参数。",
"template.type.image_hint": "AI自动根据文案内容生成与之匹配的插图插图尺寸由模板决定。",
"template.type.video_hint": "AI自动根据文案内容生成与之匹配的视频片段视频尺寸由模板决定。",
"orientation.portrait": "竖屏",
"orientation.landscape": "横屏",
@@ -140,12 +152,16 @@
"progress.generating_narrations": "生成旁白...",
"progress.splitting_script": "切分脚本...",
"progress.generating_image_prompts": "生成图片提示词...",
"progress.generating_video_prompts": "生成视频提示词...",
"progress.preparing_frames": "准备分镜...",
"progress.frame": "分镜 {current}/{total}",
"progress.frame_step": "分镜 {current}/{total} - 步骤 {step}/4: {action}",
"progress.step_audio": "生成语音...",
"progress.step_image": "生成插图...",
"progress.step_compose": "合成画面...",
"progress.step_video": "创建视频片段...",
"progress.processing_frame": "处理分镜 {current}/{total}...",
"progress.step_audio": "生成语音",
"progress.step_image": "生成插图",
"progress.step_media": "生成媒体",
"progress.step_compose": "合成画面",
"progress.step_video": "创建视频片段",
"progress.concatenating": "正在拼接视频...",
"progress.finalizing": "完成中...",
"progress.completed": "✅ 生成完成",

View File

@@ -0,0 +1,5 @@
{
"source": "runninghub",
"workflow_id": "1985909483975188481"
}

View File

@@ -0,0 +1,187 @@
{
"3": {
"inputs": {
"seed": 576600626757621,
"steps": 10,
"cfg": 1,
"sampler_name": "uni_pc",
"scheduler": "normal",
"denoise": 1,
"model": [
"48",
0
],
"positive": [
"6",
0
],
"negative": [
"7",
0
],
"latent_image": [
"40",
0
]
},
"class_type": "KSampler",
"_meta": {
"title": "KSampler"
}
},
"6": {
"inputs": {
"text": [
"49",
0
],
"clip": [
"38",
0
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Positive Prompt)"
}
},
"7": {
"inputs": {
"text": "色调艳丽过曝静态细节模糊不清字幕风格作品画作画面静止整体发灰最差质量低质量JPEG压缩残留丑陋的残缺的多余的手指画得不好的手部画得不好的脸部畸形的毁容的形态畸形的肢体手指融合静止不动的画面杂乱的背景三条腿背景人很多倒着走",
"clip": [
"38",
0
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Negative Prompt)"
}
},
"8": {
"inputs": {
"samples": [
"3",
0
],
"vae": [
"39",
0
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"30": {
"inputs": {
"frame_rate": 16,
"loop_count": 0,
"filename_prefix": "Video",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"crf": 19,
"save_metadata": true,
"trim_to_audio": false,
"pingpong": false,
"save_output": true,
"images": [
"8",
0
]
},
"class_type": "VHS_VideoCombine",
"_meta": {
"title": "Video Combine 🎥🅥🅗🅢"
}
},
"37": {
"inputs": {
"unet_name": "wan-fusionx/WanT2V_MasterModel.safetensors",
"weight_dtype": "default"
},
"class_type": "UNETLoader",
"_meta": {
"title": "Load Diffusion Model"
}
},
"38": {
"inputs": {
"clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
"type": "wan",
"device": "default"
},
"class_type": "CLIPLoader",
"_meta": {
"title": "Load CLIP"
}
},
"39": {
"inputs": {
"vae_name": "wan_2.1_vae.safetensors"
},
"class_type": "VAELoader",
"_meta": {
"title": "Load VAE"
}
},
"40": {
"inputs": {
"width": [
"50",
0
],
"height": [
"51",
0
],
"length": 81,
"batch_size": 1
},
"class_type": "EmptyHunyuanLatentVideo",
"_meta": {
"title": "EmptyHunyuanLatentVideo"
}
},
"48": {
"inputs": {
"shift": 1,
"model": [
"37",
0
]
},
"class_type": "ModelSamplingSD3",
"_meta": {
"title": "Shift"
}
},
"49": {
"inputs": {
"value": "草地上有个小狗在奔跑"
},
"class_type": "PrimitiveStringMultiline",
"_meta": {
"title": "$prompt.value!"
}
},
"50": {
"inputs": {
"value": 512
},
"class_type": "easy int",
"_meta": {
"title": "$width.value"
}
},
"51": {
"inputs": {
"value": 288
},
"class_type": "easy int",
"_meta": {
"title": "$height.value"
}
}
}