Merge branch 'dev_video'
This commit is contained in:
@@ -13,7 +13,7 @@
|
||||
|
||||
只需输入一个 **主题**,Pixelle-Video 就能自动完成:
|
||||
- ✍️ 撰写视频文案
|
||||
- 🎨 生成 AI 配图
|
||||
- 🎨 生成 AI 配图/视频
|
||||
- 🗣️ 合成语音解说
|
||||
- 🎵 添加背景音乐
|
||||
- 🎬 一键合成视频
|
||||
@@ -32,6 +32,7 @@
|
||||
- ✅ **全自动生成** - 输入主题,自动生成完整视频
|
||||
- ✅ **AI 智能文案** - 根据主题智能创作解说词,无需自己写脚本
|
||||
- ✅ **AI 生成配图** - 每句话都配上精美的 AI 插图
|
||||
- ✅ **AI 生成视频** - 支持使用 AI 视频生成模型(如 WAN 2.1)创建动态视频内容
|
||||
- ✅ **AI 生成语音** - 支持 Edge-TTS、Index-TTS 等众多主流 TTS 方案
|
||||
- ✅ **背景音乐** - 支持添加 BGM,让视频更有氛围
|
||||
- ✅ **视觉风格** - 多种模板可选,打造独特视频风格
|
||||
@@ -281,6 +282,12 @@ uv run streamlit run web/app.py
|
||||
#### 视频模板
|
||||
决定视频画面的布局和设计。
|
||||
|
||||
**模板命名规范**
|
||||
- `static_*.html`: 静态模板(无需AI生成媒体,纯文字样式)
|
||||
- `image_*.html`: 图片模板(使用AI生成的图片作为背景)
|
||||
- `video_*.html`: 视频模板(使用AI生成的视频作为背景)
|
||||
|
||||
**使用方法**
|
||||
- 从下拉菜单选择模板,按尺寸分组显示(竖屏/横屏/方形)
|
||||
- 点击「预览模板」可以自定义参数测试效果
|
||||
- 如果懂 HTML,可以在 `templates/` 文件夹创建自己的模板
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
Just input a **topic**, and Pixelle-Video will automatically:
|
||||
- ✍️ Write video script
|
||||
- 🎨 Generate AI images
|
||||
- 🎨 Generate AI images/videos
|
||||
- 🗣️ Synthesize voice narration
|
||||
- 🎵 Add background music
|
||||
- 🎬 Create video with one click
|
||||
@@ -32,6 +32,7 @@ Just input a **topic**, and Pixelle-Video will automatically:
|
||||
- ✅ **Fully Automatic Generation** - Input a topic, automatically generate complete video
|
||||
- ✅ **AI Smart Copywriting** - Intelligently create narration based on topic, no need to write scripts yourself
|
||||
- ✅ **AI Generated Images** - Each sentence comes with beautiful AI illustrations
|
||||
- ✅ **AI Generated Videos** - Support AI video generation models (like WAN 2.1) to create dynamic video content
|
||||
- ✅ **AI Generated Voice** - Support Edge-TTS, Index-TTS and many other mainstream TTS solutions
|
||||
- ✅ **Background Music** - Support adding BGM to make videos more atmospheric
|
||||
- ✅ **Visual Styles** - Multiple templates to choose from, create unique video styles
|
||||
@@ -281,6 +282,12 @@ Determine what style of images AI generates.
|
||||
#### Video Template
|
||||
Determines video layout and design.
|
||||
|
||||
**Template Naming Convention**
|
||||
- `static_*.html`: Static templates (no AI-generated media, text-only styles)
|
||||
- `image_*.html`: Image templates (uses AI-generated images as background)
|
||||
- `video_*.html`: Video templates (uses AI-generated videos as background)
|
||||
|
||||
**Usage**
|
||||
- Select template from dropdown menu, displayed grouped by dimension (portrait/landscape/square)
|
||||
- Click "Preview Template" to test effect with custom parameters
|
||||
- If you know HTML, you can create your own templates in the `templates/` folder
|
||||
|
||||
@@ -43,18 +43,27 @@ async def image_generate(
|
||||
try:
|
||||
logger.info(f"Image generation request: {request.prompt[:50]}...")
|
||||
|
||||
# Call image service
|
||||
image_path = await pixelle_video.image(
|
||||
# Call media service (backward compatible with image API)
|
||||
media_result = await pixelle_video.media(
|
||||
prompt=request.prompt,
|
||||
width=request.width,
|
||||
height=request.height,
|
||||
workflow=request.workflow
|
||||
)
|
||||
|
||||
# For backward compatibility, only support image results in /image endpoint
|
||||
if media_result.is_video:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Video workflow used. Please use /media/generate endpoint for video generation."
|
||||
)
|
||||
|
||||
return ImageGenerateResponse(
|
||||
image_path=image_path
|
||||
image_path=media_result.url
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Image generation error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@@ -73,8 +73,7 @@ async def generate_video_sync(
|
||||
"max_narration_words": request_body.max_narration_words,
|
||||
"min_image_prompt_words": request_body.min_image_prompt_words,
|
||||
"max_image_prompt_words": request_body.max_image_prompt_words,
|
||||
"image_width": request_body.image_width,
|
||||
"image_height": request_body.image_height,
|
||||
# Note: image_width and image_height are now auto-determined from template
|
||||
"image_workflow": request_body.image_workflow,
|
||||
"video_fps": request_body.video_fps,
|
||||
"frame_template": request_body.frame_template,
|
||||
@@ -161,8 +160,7 @@ async def generate_video_async(
|
||||
"max_narration_words": request_body.max_narration_words,
|
||||
"min_image_prompt_words": request_body.min_image_prompt_words,
|
||||
"max_image_prompt_words": request_body.max_image_prompt_words,
|
||||
"image_width": request_body.image_width,
|
||||
"image_height": request_body.image_height,
|
||||
# Note: image_width and image_height are now auto-determined from template
|
||||
"image_workflow": request_body.image_workflow,
|
||||
"video_fps": request_body.video_fps,
|
||||
"frame_template": request_body.frame_template,
|
||||
|
||||
@@ -57,8 +57,7 @@ class VideoGenerateRequest(BaseModel):
|
||||
max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words")
|
||||
|
||||
# === Image Parameters ===
|
||||
image_width: int = Field(1024, description="Image width")
|
||||
image_height: int = Field(1024, description="Image height")
|
||||
# Note: image_width and image_height are now auto-determined from template meta tags
|
||||
image_workflow: Optional[str] = Field(None, description="Custom image workflow")
|
||||
|
||||
# === Video Parameters ===
|
||||
|
||||
@@ -37,15 +37,29 @@ comfyui:
|
||||
|
||||
# Image prompt prefix (optional)
|
||||
prompt_prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style"
|
||||
|
||||
# Video-specific configuration
|
||||
video:
|
||||
# Required: Default workflow to use (no fallback)
|
||||
# Options: runninghub/video_wan2.1_fusionx.json (recommended, no local setup)
|
||||
# selfhost/video_wan2.1_fusionx.json (requires local ComfyUI)
|
||||
default_workflow: runninghub/video_wan2.1_fusionx.json
|
||||
|
||||
# Video prompt prefix (optional)
|
||||
prompt_prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style"
|
||||
|
||||
# ==================== Template Configuration ====================
|
||||
# Configure default template for video generation
|
||||
template:
|
||||
# Default frame template to use when not explicitly specified
|
||||
# Determines video aspect ratio and layout style
|
||||
# Template naming convention:
|
||||
# - static_*.html: Static style templates (no AI-generated media)
|
||||
# - image_*.html: Templates requiring AI-generated images
|
||||
# - video_*.html: Templates requiring AI-generated videos
|
||||
# Options:
|
||||
# - 1080x1920 (vertical/portrait): default.html, modern.html, elegant.html, etc.
|
||||
# - 1080x1080 (square): minimal_framed.html, magazine_cover.html, etc.
|
||||
# - 1920x1080 (horizontal/landscape): film.html, full.html, etc.
|
||||
# - 1080x1920 (vertical/portrait): image_default.html, image_modern.html, image_elegant.html, static_simple.html, etc.
|
||||
# - 1080x1080 (square): image_minimal_framed.html, etc.
|
||||
# - 1920x1080 (horizontal/landscape): image_film.html, image_full.html, etc.
|
||||
# See templates/ directory for all available templates
|
||||
default_template: "1080x1920/default.html"
|
||||
default_template: "1080x1920/image_default.html"
|
||||
|
||||
@@ -21,6 +21,10 @@ comfyui:
|
||||
default_workflow: "runninghub/image_flux.json"
|
||||
prompt_prefix: "Minimalist illustration style"
|
||||
|
||||
video:
|
||||
default_workflow: "runninghub/video_wan2.1_fusionx.json"
|
||||
prompt_prefix: "Minimalist illustration style"
|
||||
|
||||
tts:
|
||||
default_workflow: "selfhost/tts_edge.json"
|
||||
```
|
||||
@@ -48,6 +52,13 @@ comfyui:
|
||||
- `default_workflow`: Default image generation workflow
|
||||
- `prompt_prefix`: Prompt prefix
|
||||
|
||||
### Video Configuration
|
||||
|
||||
- `default_workflow`: Default video generation workflow
|
||||
- `runninghub/video_wan2.1_fusionx.json`: Cloud workflow (recommended, no local setup required)
|
||||
- `selfhost/video_wan2.1_fusionx.json`: Local workflow (requires local ComfyUI support)
|
||||
- `prompt_prefix`: Video prompt prefix (controls video generation style)
|
||||
|
||||
### TTS Configuration
|
||||
|
||||
- `default_workflow`: Default TTS workflow
|
||||
|
||||
@@ -154,15 +154,39 @@ Suitable for Instagram, WeChat Moments, and other platforms.
|
||||
|
||||
---
|
||||
|
||||
## Template Naming Convention
|
||||
|
||||
Templates follow a unified naming convention to distinguish different types:
|
||||
|
||||
- **`static_*.html`**: Static templates
|
||||
- No AI-generated media content required
|
||||
- Pure text style rendering
|
||||
- Suitable for quick generation and low-cost scenarios
|
||||
|
||||
- **`image_*.html`**: Image templates
|
||||
- Uses AI-generated images as background
|
||||
- Invokes ComfyUI image generation workflows
|
||||
- Suitable for content requiring visual illustrations
|
||||
|
||||
- **`video_*.html`**: Video templates
|
||||
- Uses AI-generated videos as background
|
||||
- Invokes ComfyUI video generation workflows
|
||||
- Creates dynamic video content with enhanced expressiveness
|
||||
|
||||
## Template Structure
|
||||
|
||||
Templates are located in the `templates/` directory, grouped by size:
|
||||
|
||||
```
|
||||
templates/
|
||||
├── 1080x1920/ # Portrait (11 templates)
|
||||
├── 1920x1080/ # Landscape (2 templates)
|
||||
└── 1080x1080/ # Square (1 template)
|
||||
├── 1080x1920/ # Portrait
|
||||
│ ├── static_*.html # Static templates
|
||||
│ ├── image_*.html # Image templates
|
||||
│ └── video_*.html # Video templates
|
||||
├── 1920x1080/ # Landscape
|
||||
│ └── image_*.html # Image templates
|
||||
└── 1080x1080/ # Square
|
||||
└── image_*.html # Image templates
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -16,10 +16,42 @@ Pixelle-Video is built on the ComfyUI architecture and supports custom workflows
|
||||
|
||||
Located in `workflows/selfhost/` or `workflows/runninghub/`
|
||||
|
||||
Used for Text-to-Speech, supporting various TTS engines:
|
||||
- Edge-TTS
|
||||
- Index-TTS (supports voice cloning)
|
||||
- Other ComfyUI-compatible TTS nodes
|
||||
|
||||
### Image Generation Workflows
|
||||
|
||||
Located in `workflows/selfhost/` or `workflows/runninghub/`
|
||||
|
||||
Used for generating static images as video backgrounds:
|
||||
- FLUX series models
|
||||
- Stable Diffusion series models
|
||||
- Other image generation models
|
||||
|
||||
### Video Generation Workflows
|
||||
|
||||
Located in `workflows/selfhost/` or `workflows/runninghub/`
|
||||
|
||||
**New Feature**: Supports AI video generation to create dynamic video content.
|
||||
|
||||
**Preset Workflows**:
|
||||
- `runninghub/video_wan2.1_fusionx.json`: Cloud workflow (recommended)
|
||||
- Based on WAN 2.1 model
|
||||
- No local setup required, accessed via RunningHub API
|
||||
- Supports Text-to-Video generation
|
||||
|
||||
- `selfhost/video_wan2.1_fusionx.json`: Local workflow
|
||||
- Requires local ComfyUI environment
|
||||
- Requires installation of corresponding video generation nodes
|
||||
- Suitable for users with local GPU
|
||||
|
||||
**Use Cases**:
|
||||
- Works with `video_*.html` templates
|
||||
- Automatically generates dynamic video backgrounds based on scripts
|
||||
- Enhances visual expressiveness and viewing experience
|
||||
|
||||
---
|
||||
|
||||
## Custom Workflows
|
||||
|
||||
@@ -21,6 +21,10 @@ comfyui:
|
||||
default_workflow: "runninghub/image_flux.json"
|
||||
prompt_prefix: "Minimalist illustration style"
|
||||
|
||||
video:
|
||||
default_workflow: "runninghub/video_wan2.1_fusionx.json"
|
||||
prompt_prefix: "Minimalist illustration style"
|
||||
|
||||
tts:
|
||||
default_workflow: "selfhost/tts_edge.json"
|
||||
```
|
||||
@@ -48,6 +52,13 @@ comfyui:
|
||||
- `default_workflow`: 默认图像生成工作流
|
||||
- `prompt_prefix`: 提示词前缀
|
||||
|
||||
### 视频配置
|
||||
|
||||
- `default_workflow`: 默认视频生成工作流
|
||||
- `runninghub/video_wan2.1_fusionx.json`: 云端工作流(推荐,无需本地环境)
|
||||
- `selfhost/video_wan2.1_fusionx.json`: 本地工作流(需要本地 ComfyUI 支持)
|
||||
- `prompt_prefix`: 视频提示词前缀(用于控制视频生成风格)
|
||||
|
||||
### TTS 配置
|
||||
|
||||
- `default_workflow`: 默认 TTS 工作流
|
||||
|
||||
@@ -154,15 +154,39 @@
|
||||
|
||||
---
|
||||
|
||||
## 模板命名规范
|
||||
|
||||
模板采用统一的命名规范来区分不同类型:
|
||||
|
||||
- **`static_*.html`**: 静态模板
|
||||
- 无需 AI 生成任何媒体内容
|
||||
- 纯文字样式渲染
|
||||
- 适合快速生成、低成本场景
|
||||
|
||||
- **`image_*.html`**: 图片模板
|
||||
- 使用 AI 生成的图片作为背景
|
||||
- 调用 ComfyUI 的图像生成工作流
|
||||
- 适合需要视觉配图的内容
|
||||
|
||||
- **`video_*.html`**: 视频模板
|
||||
- 使用 AI 生成的视频作为背景
|
||||
- 调用 ComfyUI 的视频生成工作流
|
||||
- 创建动态视频内容,增强表现力
|
||||
|
||||
## 模板结构
|
||||
|
||||
模板位于 `templates/` 目录,按尺寸分组:
|
||||
|
||||
```
|
||||
templates/
|
||||
├── 1080x1920/ # 竖屏(11个模板)
|
||||
├── 1920x1080/ # 横屏(2个模板)
|
||||
└── 1080x1080/ # 方形(1个模板)
|
||||
├── 1080x1920/ # 竖屏
|
||||
│ ├── static_*.html # 静态模板
|
||||
│ ├── image_*.html # 图片模板
|
||||
│ └── video_*.html # 视频模板
|
||||
├── 1920x1080/ # 横屏
|
||||
│ └── image_*.html # 图片模板
|
||||
└── 1080x1080/ # 方形
|
||||
└── image_*.html # 图片模板
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -16,10 +16,42 @@ Pixelle-Video 基于 ComfyUI 架构,支持自定义工作流。
|
||||
|
||||
位于 `workflows/selfhost/` 或 `workflows/runninghub/`
|
||||
|
||||
用于文本转语音(Text-to-Speech),支持多种 TTS 引擎:
|
||||
- Edge-TTS
|
||||
- Index-TTS(支持声音克隆)
|
||||
- 其他 ComfyUI 兼容的 TTS 节点
|
||||
|
||||
### 图像生成工作流
|
||||
|
||||
位于 `workflows/selfhost/` 或 `workflows/runninghub/`
|
||||
|
||||
用于生成静态图像作为视频背景:
|
||||
- FLUX 系列模型
|
||||
- Stable Diffusion 系列模型
|
||||
- 其他图像生成模型
|
||||
|
||||
### 视频生成工作流
|
||||
|
||||
位于 `workflows/selfhost/` 或 `workflows/runninghub/`
|
||||
|
||||
**新功能**:支持 AI 视频生成,创建动态视频内容。
|
||||
|
||||
**预置工作流**:
|
||||
- `runninghub/video_wan2.1_fusionx.json`: 云端工作流(推荐)
|
||||
- 基于 WAN 2.1 模型
|
||||
- 无需本地环境,通过 RunningHub API 调用
|
||||
- 支持文本到视频(Text-to-Video)
|
||||
|
||||
- `selfhost/video_wan2.1_fusionx.json`: 本地工作流
|
||||
- 需要本地 ComfyUI 环境
|
||||
- 需要安装相应的视频生成节点
|
||||
- 适合有本地 GPU 的用户
|
||||
|
||||
**使用场景**:
|
||||
- 配合 `video_*.html` 模板使用
|
||||
- 自动根据文案生成动态视频背景
|
||||
- 增强视频的视觉表现力和观看体验
|
||||
|
||||
---
|
||||
|
||||
## 自定义工作流
|
||||
|
||||
61
pixelle_video/models/media.py
Normal file
61
pixelle_video/models/media.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# Copyright (C) 2025 AIDC-AI
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Media generation result models
|
||||
"""
|
||||
|
||||
from typing import Literal, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class MediaResult(BaseModel):
|
||||
"""
|
||||
Media generation result from workflow execution
|
||||
|
||||
Supports both image and video outputs from ComfyUI workflows.
|
||||
The media_type indicates what kind of media was generated.
|
||||
|
||||
Attributes:
|
||||
media_type: Type of media generated ("image" or "video")
|
||||
url: URL or path to the generated media
|
||||
duration: Duration in seconds (only for video, None for image)
|
||||
|
||||
Examples:
|
||||
# Image result
|
||||
MediaResult(media_type="image", url="http://example.com/image.png")
|
||||
|
||||
# Video result
|
||||
MediaResult(media_type="video", url="http://example.com/video.mp4", duration=5.2)
|
||||
"""
|
||||
|
||||
media_type: Literal["image", "video"] = Field(
|
||||
description="Type of generated media"
|
||||
)
|
||||
url: str = Field(
|
||||
description="URL or path to the generated media file"
|
||||
)
|
||||
duration: Optional[float] = Field(
|
||||
None,
|
||||
description="Duration in seconds (only applicable for video)"
|
||||
)
|
||||
|
||||
@property
|
||||
def is_image(self) -> bool:
|
||||
"""Check if this is an image result"""
|
||||
return self.media_type == "image"
|
||||
|
||||
@property
|
||||
def is_video(self) -> bool:
|
||||
"""Check if this is a video result"""
|
||||
return self.media_type == "video"
|
||||
|
||||
@@ -57,16 +57,18 @@ class StoryboardFrame:
|
||||
"""Single storyboard frame"""
|
||||
index: int # Frame index (0-based)
|
||||
narration: str # Narration text
|
||||
image_prompt: str # Image generation prompt
|
||||
image_prompt: str # Image generation prompt (can be None for text-only or video)
|
||||
|
||||
# Generated resource paths
|
||||
audio_path: Optional[str] = None # Audio file path
|
||||
image_path: Optional[str] = None # Original image path
|
||||
composed_image_path: Optional[str] = None # Composed image path (with subtitles)
|
||||
video_segment_path: Optional[str] = None # Video segment path
|
||||
audio_path: Optional[str] = None # Audio file path (narration)
|
||||
media_type: Optional[str] = None # Media type: "image" or "video" (None if no media)
|
||||
image_path: Optional[str] = None # Original image path (for image type)
|
||||
video_path: Optional[str] = None # Original video path (for video type, before composition)
|
||||
composed_image_path: Optional[str] = None # Composed image path (with subtitles, for image type)
|
||||
video_segment_path: Optional[str] = None # Final video segment path
|
||||
|
||||
# Metadata
|
||||
duration: float = 0.0 # Audio duration (seconds)
|
||||
duration: float = 0.0 # Frame duration (seconds, from audio or video)
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
def __post_init__(self):
|
||||
|
||||
@@ -63,8 +63,11 @@ class BasePipeline(ABC):
|
||||
# Quick access to services (convenience)
|
||||
self.llm = pixelle_video_core.llm
|
||||
self.tts = pixelle_video_core.tts
|
||||
self.image = pixelle_video_core.image
|
||||
self.media = pixelle_video_core.media
|
||||
self.video = pixelle_video_core.video
|
||||
|
||||
# Backward compatibility alias
|
||||
self.image = pixelle_video_core.media
|
||||
|
||||
@abstractmethod
|
||||
async def __call__(
|
||||
|
||||
@@ -92,8 +92,7 @@ class CustomPipeline(BasePipeline):
|
||||
ref_audio: Optional[str] = None,
|
||||
|
||||
image_workflow: Optional[str] = None,
|
||||
image_width: int = 1024,
|
||||
image_height: int = 1024,
|
||||
# Note: image_width and image_height are now auto-determined from template
|
||||
|
||||
frame_template: Optional[str] = None,
|
||||
video_fps: int = 30,
|
||||
@@ -118,9 +117,10 @@ class CustomPipeline(BasePipeline):
|
||||
VideoGenerationResult
|
||||
|
||||
Image Generation Logic:
|
||||
- If template has {{image}} → automatically generates images
|
||||
- If template has no {{image}} → skips image generation (faster, cheaper)
|
||||
- To customize: Override the template_requires_image logic in your subclass
|
||||
- image_*.html templates → automatically generates images
|
||||
- video_*.html templates → automatically generates videos
|
||||
- static_*.html templates → skips media generation (faster, cheaper)
|
||||
- To customize: Override the template type detection logic in your subclass
|
||||
"""
|
||||
logger.info("Starting CustomPipeline")
|
||||
logger.info(f"Input text length: {len(text)} chars")
|
||||
@@ -152,19 +152,27 @@ class CustomPipeline(BasePipeline):
|
||||
frame_template = template_config.get("default_template", "1080x1920/default.html")
|
||||
|
||||
# ========== Step 0.5: Check template requirements ==========
|
||||
# Detect if template requires {{image}} parameter
|
||||
# This allows skipping the entire image generation pipeline for text-only templates
|
||||
# Detect template type by filename prefix
|
||||
from pathlib import Path
|
||||
from pixelle_video.services.frame_html import HTMLFrameGenerator
|
||||
from pixelle_video.utils.template_util import resolve_template_path
|
||||
from pixelle_video.utils.template_util import resolve_template_path, get_template_type
|
||||
|
||||
template_name = Path(frame_template).name
|
||||
template_type = get_template_type(template_name)
|
||||
template_requires_image = (template_type == "image")
|
||||
|
||||
# Read media size from template meta tags
|
||||
template_path = resolve_template_path(frame_template)
|
||||
generator = HTMLFrameGenerator(template_path)
|
||||
template_requires_image = generator.requires_image()
|
||||
image_width, image_height = generator.get_media_size()
|
||||
logger.info(f"📐 Media size from template: {image_width}x{image_height}")
|
||||
|
||||
if template_requires_image:
|
||||
if template_type == "image":
|
||||
logger.info(f"📸 Template requires image generation")
|
||||
else:
|
||||
logger.info(f"⚡ Template does not require images - skipping image generation pipeline")
|
||||
elif template_type == "video":
|
||||
logger.info(f"🎬 Template requires video generation")
|
||||
else: # static
|
||||
logger.info(f"⚡ Static template - skipping media generation pipeline")
|
||||
logger.info(f" 💡 Benefits: Faster generation + Lower cost + No ComfyUI dependency")
|
||||
|
||||
# ========== Step 1: Process content (CUSTOMIZE THIS) ==========
|
||||
@@ -194,8 +202,8 @@ class CustomPipeline(BasePipeline):
|
||||
# ========== Step 2: Generate image prompts (CONDITIONAL - CUSTOMIZE THIS) ==========
|
||||
self._report_progress(progress_callback, "generating_image_prompts", 0.25)
|
||||
|
||||
# IMPORTANT: Check if template actually needs images
|
||||
# If your template doesn't use {{image}}, you can skip this entire step!
|
||||
# IMPORTANT: Check if template is image type
|
||||
# If your template is static_*.html, you can skip this entire step!
|
||||
if template_requires_image:
|
||||
# Template requires images - generate image prompts using LLM
|
||||
from pixelle_video.utils.content_generators import generate_image_prompts
|
||||
|
||||
@@ -94,8 +94,7 @@ class StandardPipeline(BasePipeline):
|
||||
max_image_prompt_words: int = 60,
|
||||
|
||||
# === Image Parameters ===
|
||||
image_width: int = 1024,
|
||||
image_height: int = 1024,
|
||||
# Note: image_width and image_height are now auto-determined from template meta tags
|
||||
image_workflow: Optional[str] = None,
|
||||
|
||||
# === Video Parameters ===
|
||||
@@ -151,9 +150,8 @@ class StandardPipeline(BasePipeline):
|
||||
min_image_prompt_words: Min image prompt length
|
||||
max_image_prompt_words: Max image prompt length
|
||||
|
||||
image_width: Generated image width (default 1024)
|
||||
image_height: Generated image height (default 1024)
|
||||
image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)
|
||||
Note: Image/video size is now auto-determined from template meta tags
|
||||
|
||||
video_fps: Video frame rate (default 30)
|
||||
|
||||
@@ -239,6 +237,16 @@ class StandardPipeline(BasePipeline):
|
||||
template_config = self.core.config.get("template", {})
|
||||
frame_template = template_config.get("default_template", "1080x1920/default.html")
|
||||
|
||||
# Read media size from template meta tags
|
||||
from pixelle_video.services.frame_html import HTMLFrameGenerator
|
||||
from pixelle_video.utils.template_util import resolve_template_path
|
||||
|
||||
template_path = resolve_template_path(frame_template)
|
||||
temp_generator = HTMLFrameGenerator(template_path)
|
||||
image_width, image_height = temp_generator.get_media_size()
|
||||
|
||||
logger.info(f"📐 Media size from template: {image_width}x{image_height}")
|
||||
|
||||
# Create storyboard config
|
||||
config = StoryboardConfig(
|
||||
task_id=task_id,
|
||||
@@ -269,11 +277,13 @@ class StandardPipeline(BasePipeline):
|
||||
)
|
||||
|
||||
# ========== Step 0.8: Check template requirements ==========
|
||||
template_requires_image = self._check_template_requires_image(config.frame_template)
|
||||
if template_requires_image:
|
||||
template_media_type = self._check_template_media_type(config.frame_template)
|
||||
if template_media_type == "video":
|
||||
logger.info(f"🎬 Template requires video generation")
|
||||
elif template_media_type == "image":
|
||||
logger.info(f"📸 Template requires image generation")
|
||||
else:
|
||||
logger.info(f"⚡ Template does not require images - skipping image generation pipeline")
|
||||
else: # static
|
||||
logger.info(f"⚡ Static template - skipping media generation pipeline")
|
||||
logger.info(f" 💡 Benefits: Faster generation + Lower cost + No ComfyUI dependency")
|
||||
|
||||
try:
|
||||
@@ -294,8 +304,61 @@ class StandardPipeline(BasePipeline):
|
||||
logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
|
||||
logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode")
|
||||
|
||||
# ========== Step 2: Generate image prompts (conditional) ==========
|
||||
if template_requires_image:
|
||||
# ========== Step 2: Generate media prompts (conditional) ==========
|
||||
if template_media_type == "video":
|
||||
# Video template: generate video prompts
|
||||
self._report_progress(progress_callback, "generating_video_prompts", 0.15)
|
||||
|
||||
from pixelle_video.utils.content_generators import generate_video_prompts
|
||||
|
||||
# Override prompt_prefix if provided
|
||||
original_prefix = None
|
||||
if prompt_prefix is not None:
|
||||
image_config = self.core.config.get("comfyui", {}).get("image", {})
|
||||
original_prefix = image_config.get("prompt_prefix")
|
||||
image_config["prompt_prefix"] = prompt_prefix
|
||||
logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
|
||||
|
||||
try:
|
||||
# Create progress callback wrapper for video prompt generation
|
||||
def video_prompt_progress(completed: int, total: int, message: str):
|
||||
batch_progress = completed / total if total > 0 else 0
|
||||
overall_progress = 0.15 + (batch_progress * 0.15)
|
||||
self._report_progress(
|
||||
progress_callback,
|
||||
"generating_video_prompts",
|
||||
overall_progress,
|
||||
extra_info=message
|
||||
)
|
||||
|
||||
# Generate base video prompts
|
||||
base_image_prompts = await generate_video_prompts(
|
||||
self.llm,
|
||||
narrations=narrations,
|
||||
min_words=min_image_prompt_words,
|
||||
max_words=max_image_prompt_words,
|
||||
progress_callback=video_prompt_progress
|
||||
)
|
||||
|
||||
# Apply prompt prefix
|
||||
from pixelle_video.utils.prompt_helper import build_image_prompt
|
||||
image_config = self.core.config.get("comfyui", {}).get("image", {})
|
||||
prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
|
||||
|
||||
image_prompts = []
|
||||
for base_prompt in base_image_prompts:
|
||||
final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
|
||||
image_prompts.append(final_prompt)
|
||||
|
||||
finally:
|
||||
# Restore original prompt_prefix
|
||||
if original_prefix is not None:
|
||||
image_config["prompt_prefix"] = original_prefix
|
||||
|
||||
logger.info(f"✅ Generated {len(image_prompts)} video prompts")
|
||||
|
||||
elif template_media_type == "image":
|
||||
# Image template: generate image prompts
|
||||
self._report_progress(progress_callback, "generating_image_prompts", 0.15)
|
||||
|
||||
# Override prompt_prefix if provided
|
||||
@@ -343,12 +406,13 @@ class StandardPipeline(BasePipeline):
|
||||
image_config["prompt_prefix"] = original_prefix
|
||||
|
||||
logger.info(f"✅ Generated {len(image_prompts)} image prompts")
|
||||
else:
|
||||
# Skip image prompt generation
|
||||
|
||||
else: # text
|
||||
# Text-only template: skip media prompt generation
|
||||
image_prompts = [None] * len(narrations)
|
||||
self._report_progress(progress_callback, "preparing_frames", 0.15)
|
||||
logger.info(f"⚡ Skipped image prompt generation (template doesn't need images)")
|
||||
logger.info(f" 💡 Savings: {len(narrations)} LLM calls + {len(narrations)} image generations")
|
||||
logger.info(f"⚡ Skipped media prompt generation (text-only template)")
|
||||
logger.info(f" 💡 Savings: {len(narrations)} LLM calls + {len(narrations)} media generations")
|
||||
|
||||
# ========== Step 3: Create frames ==========
|
||||
for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
|
||||
@@ -452,29 +516,32 @@ class StandardPipeline(BasePipeline):
|
||||
logger.error(f"❌ Video generation failed: {e}")
|
||||
raise
|
||||
|
||||
def _check_template_requires_image(self, frame_template: str) -> bool:
|
||||
def _check_template_media_type(self, frame_template: str) -> str:
|
||||
"""
|
||||
Check if template requires image generation
|
||||
Check template media type requirement
|
||||
|
||||
This is checked at pipeline level to avoid unnecessary:
|
||||
- LLM calls (generating image_prompts)
|
||||
- Image generation API calls
|
||||
- LLM calls (generating media prompts)
|
||||
- Media generation API calls
|
||||
- ComfyUI dependency
|
||||
|
||||
Template naming convention:
|
||||
- static_*.html: Static style template (returns "static")
|
||||
- image_*.html: Image template (returns "image")
|
||||
- video_*.html: Video template (returns "video")
|
||||
|
||||
Args:
|
||||
frame_template: Template path (e.g., "1080x1920/default.html")
|
||||
frame_template: Template path (e.g., "1080x1920/image_default.html" or "1080x1920/video_default.html")
|
||||
|
||||
Returns:
|
||||
True if template contains {{image}}, False otherwise
|
||||
"static", "image", or "video"
|
||||
"""
|
||||
from pixelle_video.services.frame_html import HTMLFrameGenerator
|
||||
from pixelle_video.utils.template_util import resolve_template_path
|
||||
from pixelle_video.utils.template_util import get_template_type
|
||||
|
||||
template_path = resolve_template_path(frame_template)
|
||||
generator = HTMLFrameGenerator(template_path)
|
||||
# Determine type by template filename prefix
|
||||
template_name = Path(frame_template).name
|
||||
template_type = get_template_type(template_name)
|
||||
|
||||
requires = generator.requires_image()
|
||||
logger.debug(f"Template '{frame_template}' requires_image={requires}")
|
||||
|
||||
return requires
|
||||
logger.debug(f"Template '{frame_template}' is {template_type} template")
|
||||
return template_type
|
||||
|
||||
|
||||
133
pixelle_video/prompts/video_generation.py
Normal file
133
pixelle_video/prompts/video_generation.py
Normal file
@@ -0,0 +1,133 @@
|
||||
# Copyright (C) 2025 AIDC-AI
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Video prompt generation template
|
||||
|
||||
For generating video prompts from narrations.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
|
||||
VIDEO_PROMPT_GENERATION_PROMPT = """# 角色定位
|
||||
你是一个专业的视频创意设计师,擅长为视频脚本创作富有动感和表现力的视频生成提示词,将叙述内容转化为生动的视频画面。
|
||||
|
||||
# 核心任务
|
||||
基于已有的视频脚本,为每个分镜的"旁白内容"创作对应的**英文**视频生成提示词,确保视频画面与叙述内容完美配合,通过动态画面增强观众的理解和记忆。
|
||||
|
||||
**重要:输入包含 {narrations_count} 个旁白,你必须为每个旁白都生成一个对应的视频提示词,总共输出 {narrations_count} 个视频提示词。**
|
||||
|
||||
# 输入内容
|
||||
{narrations_json}
|
||||
|
||||
# 输出要求
|
||||
|
||||
## 视频提示词规范
|
||||
- 语言:**必须使用英文**(用于 AI 视频生成模型)
|
||||
- 描述结构:scene + character action + camera movement + emotion + atmosphere
|
||||
- 描述长度:确保描述清晰完整且富有创意(建议 50-100 个英文单词)
|
||||
- 动态元素:强调动作、运动、变化等动态效果
|
||||
|
||||
## 视觉创意要求
|
||||
- 每个视频都要准确反映对应旁白的具体内容和情感
|
||||
- 突出画面的动态性:角色动作、物体运动、镜头移动、场景转换等
|
||||
- 使用象征手法将抽象概念视觉化(如用流动的水代表时间流逝,用上升的阶梯代表进步等)
|
||||
- 画面要表现出丰富的情感和动作,增强视觉冲击力
|
||||
- 通过镜头语言(推拉摇移)和剪辑节奏增强表现力
|
||||
|
||||
## 关键英文词汇参考
|
||||
- 动作:moving, running, flowing, transforming, growing, falling
|
||||
- 镜头:camera pan, zoom in, zoom out, tracking shot, aerial view
|
||||
- 转场:transition, fade in, fade out, dissolve
|
||||
- 氛围:dynamic, energetic, peaceful, dramatic, mysterious
|
||||
- 光影:lighting changes, shadows moving, sunlight streaming
|
||||
|
||||
## 视频与文案配合原则
|
||||
- 视频要服务于文案,成为文案内容的视觉延伸
|
||||
- 避免与文案内容无关或矛盾的视觉元素
|
||||
- 选择最能增强文案说服力的动态表现方式
|
||||
- 确保观众能通过视频动态快速理解文案的核心观点
|
||||
|
||||
## 创意指导
|
||||
1. **现象描述类文案**:用动态场景表现社会现象的发生过程
|
||||
2. **原因分析类文案**:用因果关系的动态演变表现内在逻辑
|
||||
3. **影响论证类文案**:用后果场景的动态展开或对比表现影响程度
|
||||
4. **深入探讨类文案**:用抽象概念的动态具象化表现深刻思考
|
||||
5. **结论启发类文案**:用开放式动态场景或指引性运动表现启发性
|
||||
|
||||
## 视频特有注意事项
|
||||
- 强调动态:每个视频都应该包含明显的动作或运动
|
||||
- 镜头语言:适当使用推拉摇移等镜头技巧增强表现力
|
||||
- 时长考虑:视频应该是连贯的动态过程,不是静态画面
|
||||
- 流畅性:注意动作的流畅性和自然性
|
||||
|
||||
# 输出格式
|
||||
严格按照以下JSON格式输出,**视频提示词必须是英文**:
|
||||
|
||||
```json
|
||||
{{
|
||||
"video_prompts": [
|
||||
"[detailed English video prompt with dynamic elements and camera movements]",
|
||||
"[detailed English video prompt with dynamic elements and camera movements]"
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
# 重要提醒
|
||||
1. 只输出JSON格式内容,不要添加任何解释说明
|
||||
2. 确保JSON格式严格正确,可以被程序直接解析
|
||||
3. 输入是 {{"narrations": [旁白数组]}} 格式,输出是 {{"video_prompts": [视频提示词数组]}} 格式
|
||||
4. **输出的video_prompts数组必须恰好包含 {narrations_count} 个元素,与输入的narrations数组一一对应**
|
||||
5. **视频提示词必须使用英文**(for AI video generation models)
|
||||
6. 视频提示词必须准确反映对应旁白的具体内容和情感
|
||||
7. 每个视频都要强调动态性和运动感,避免静态描述
|
||||
8. 适当使用镜头语言增强表现力
|
||||
9. 确保视频画面能增强文案的说服力和观众的理解度
|
||||
|
||||
现在,请为上述 {narrations_count} 个旁白创作对应的 {narrations_count} 个**英文**视频提示词。只输出JSON,不要其他内容。
|
||||
"""
|
||||
|
||||
|
||||
def build_video_prompt_prompt(
|
||||
narrations: List[str],
|
||||
min_words: int,
|
||||
max_words: int
|
||||
) -> str:
|
||||
"""
|
||||
Build video prompt generation prompt
|
||||
|
||||
Args:
|
||||
narrations: List of narrations
|
||||
min_words: Minimum word count
|
||||
max_words: Maximum word count
|
||||
|
||||
Returns:
|
||||
Formatted prompt for LLM
|
||||
|
||||
Example:
|
||||
>>> build_video_prompt_prompt(narrations, 50, 100)
|
||||
"""
|
||||
narrations_json = json.dumps(
|
||||
{"narrations": narrations},
|
||||
ensure_ascii=False,
|
||||
indent=2
|
||||
)
|
||||
|
||||
return VIDEO_PROMPT_GENERATION_PROMPT.format(
|
||||
narrations_json=narrations_json,
|
||||
narrations_count=len(narrations),
|
||||
min_words=min_words,
|
||||
max_words=max_words
|
||||
)
|
||||
|
||||
@@ -23,7 +23,7 @@ from loguru import logger
|
||||
from pixelle_video.config import config_manager
|
||||
from pixelle_video.services.llm_service import LLMService
|
||||
from pixelle_video.services.tts_service import TTSService
|
||||
from pixelle_video.services.image import ImageService
|
||||
from pixelle_video.services.media import MediaService
|
||||
from pixelle_video.services.video import VideoService
|
||||
from pixelle_video.services.frame_processor import FrameProcessor
|
||||
from pixelle_video.pipelines.standard import StandardPipeline
|
||||
@@ -45,7 +45,7 @@ class PixelleVideoCore:
|
||||
# Use capabilities directly
|
||||
answer = await pixelle_video.llm("Explain atomic habits")
|
||||
audio = await pixelle_video.tts("Hello world")
|
||||
image = await pixelle_video.image(prompt="a cat")
|
||||
media = await pixelle_video.media(prompt="a cat")
|
||||
|
||||
# Check active capabilities
|
||||
print(f"Using LLM: {pixelle_video.llm.active}")
|
||||
@@ -56,7 +56,7 @@ class PixelleVideoCore:
|
||||
├── config (configuration)
|
||||
├── llm (LLM service - direct OpenAI SDK)
|
||||
├── tts (TTS service - ComfyKit workflows)
|
||||
├── image (Image service - ComfyKit workflows)
|
||||
├── media (Media service - ComfyKit workflows, supports image & video)
|
||||
└── pipelines (video generation pipelines)
|
||||
├── standard (standard workflow)
|
||||
├── custom (custom workflow template)
|
||||
@@ -77,7 +77,7 @@ class PixelleVideoCore:
|
||||
# Core services (initialized in initialize())
|
||||
self.llm: Optional[LLMService] = None
|
||||
self.tts: Optional[TTSService] = None
|
||||
self.image: Optional[ImageService] = None
|
||||
self.media: Optional[MediaService] = None
|
||||
self.video: Optional[VideoService] = None
|
||||
self.frame_processor: Optional[FrameProcessor] = None
|
||||
|
||||
@@ -105,7 +105,7 @@ class PixelleVideoCore:
|
||||
# 1. Initialize core services
|
||||
self.llm = LLMService(self.config)
|
||||
self.tts = TTSService(self.config)
|
||||
self.image = ImageService(self.config)
|
||||
self.media = MediaService(self.config)
|
||||
self.video = VideoService()
|
||||
self.frame_processor = FrameProcessor(self)
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ Core services providing atomic capabilities.
|
||||
Services:
|
||||
- LLMService: LLM text generation
|
||||
- TTSService: Text-to-speech
|
||||
- ImageService: Image generation
|
||||
- MediaService: Media generation (image & video)
|
||||
- VideoService: Video processing
|
||||
- FrameProcessor: Frame processing orchestrator
|
||||
- ComfyBaseService: Base class for ComfyUI-based services
|
||||
@@ -27,15 +27,19 @@ Services:
|
||||
from pixelle_video.services.comfy_base_service import ComfyBaseService
|
||||
from pixelle_video.services.llm_service import LLMService
|
||||
from pixelle_video.services.tts_service import TTSService
|
||||
from pixelle_video.services.image import ImageService
|
||||
from pixelle_video.services.media import MediaService
|
||||
from pixelle_video.services.video import VideoService
|
||||
from pixelle_video.services.frame_processor import FrameProcessor
|
||||
|
||||
# Backward compatibility alias
|
||||
ImageService = MediaService
|
||||
|
||||
__all__ = [
|
||||
"ComfyBaseService",
|
||||
"LLMService",
|
||||
"TTSService",
|
||||
"ImageService",
|
||||
"MediaService",
|
||||
"ImageService", # Backward compatibility
|
||||
"VideoService",
|
||||
"FrameProcessor",
|
||||
]
|
||||
|
||||
@@ -77,21 +77,6 @@ class HTMLFrameGenerator:
|
||||
self._check_linux_dependencies()
|
||||
logger.debug(f"Loaded HTML template: {template_path} (size: {self.width}x{self.height})")
|
||||
|
||||
def requires_image(self) -> bool:
|
||||
"""
|
||||
Detect if template requires {{image}} parameter
|
||||
|
||||
This method checks if the template uses the {{image}} variable.
|
||||
If the template doesn't use images, the entire image generation
|
||||
pipeline can be skipped, significantly improving:
|
||||
- Generation speed (no image generation API calls)
|
||||
- Cost efficiency (no LLM calls for image prompts)
|
||||
- Dependency requirements (no ComfyUI needed)
|
||||
|
||||
Returns:
|
||||
True if template contains {{image}}, False otherwise
|
||||
"""
|
||||
return '{{image}}' in self.template
|
||||
|
||||
def _check_linux_dependencies(self):
|
||||
"""Check Linux system dependencies and warn if missing"""
|
||||
@@ -141,6 +126,58 @@ class HTMLFrameGenerator:
|
||||
logger.debug(f"Template loaded: {len(content)} chars")
|
||||
return content
|
||||
|
||||
def _parse_media_size_from_meta(self) -> tuple[Optional[int], Optional[int]]:
|
||||
"""
|
||||
Parse media size from meta tags in template
|
||||
|
||||
Looks for meta tags:
|
||||
- <meta name="template:media-width" content="1024">
|
||||
- <meta name="template:media-height" content="1024">
|
||||
|
||||
Returns:
|
||||
Tuple of (width, height) or (None, None) if not found
|
||||
"""
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
try:
|
||||
soup = BeautifulSoup(self.template, 'html.parser')
|
||||
|
||||
# Find width and height meta tags
|
||||
width_meta = soup.find('meta', attrs={'name': 'template:media-width'})
|
||||
height_meta = soup.find('meta', attrs={'name': 'template:media-height'})
|
||||
|
||||
if width_meta and height_meta:
|
||||
width = int(width_meta.get('content', 0))
|
||||
height = int(height_meta.get('content', 0))
|
||||
|
||||
if width > 0 and height > 0:
|
||||
logger.debug(f"Found media size in meta tags: {width}x{height}")
|
||||
return width, height
|
||||
|
||||
return None, None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse media size from meta tags: {e}")
|
||||
return None, None
|
||||
|
||||
def get_media_size(self) -> tuple[int, int]:
|
||||
"""
|
||||
Get media size for image/video generation
|
||||
|
||||
Returns media size specified in template meta tags.
|
||||
|
||||
Returns:
|
||||
Tuple of (width, height)
|
||||
"""
|
||||
media_width, media_height = self._parse_media_size_from_meta()
|
||||
|
||||
if media_width and media_height:
|
||||
return media_width, media_height
|
||||
|
||||
# Fallback to default if not specified (should not happen with properly configured templates)
|
||||
logger.warning(f"No media size meta tags found in template {self.template_path}, using fallback 1024x1024")
|
||||
return 1024, 1024
|
||||
|
||||
def parse_template_parameters(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
Parse custom parameters from HTML template
|
||||
|
||||
@@ -84,7 +84,7 @@ class FrameProcessor:
|
||||
))
|
||||
await self._step_generate_audio(frame, config)
|
||||
|
||||
# Step 2: Generate image (conditional)
|
||||
# Step 2: Generate media (image or video, conditional)
|
||||
if needs_image:
|
||||
if progress_callback:
|
||||
progress_callback(ProgressEvent(
|
||||
@@ -93,12 +93,13 @@ class FrameProcessor:
|
||||
frame_current=frame_num,
|
||||
frame_total=total_frames,
|
||||
step=2,
|
||||
action="image"
|
||||
action="media"
|
||||
))
|
||||
await self._step_generate_image(frame, config)
|
||||
await self._step_generate_media(frame, config)
|
||||
else:
|
||||
frame.image_path = None
|
||||
logger.debug(f" 2/4: Skipped image generation (not required by template)")
|
||||
frame.media_type = None
|
||||
logger.debug(f" 2/4: Skipped media generation (not required by template)")
|
||||
|
||||
# Step 3: Compose frame (add subtitle)
|
||||
if progress_callback:
|
||||
@@ -176,27 +177,66 @@ class FrameProcessor:
|
||||
|
||||
logger.debug(f" ✓ Audio generated: {audio_path} ({frame.duration:.2f}s)")
|
||||
|
||||
async def _step_generate_image(
|
||||
async def _step_generate_media(
|
||||
self,
|
||||
frame: StoryboardFrame,
|
||||
config: StoryboardConfig
|
||||
):
|
||||
"""Step 2: Generate image using ComfyKit"""
|
||||
logger.debug(f" 2/4: Generating image for frame {frame.index}...")
|
||||
"""Step 2: Generate media (image or video) using ComfyKit"""
|
||||
logger.debug(f" 2/4: Generating media for frame {frame.index}...")
|
||||
|
||||
# Call Image generation (with optional preset)
|
||||
image_url = await self.core.image(
|
||||
# Determine media type based on workflow
|
||||
# video_ prefix in workflow name indicates video generation
|
||||
workflow_name = config.image_workflow or ""
|
||||
is_video_workflow = "video_" in workflow_name.lower()
|
||||
media_type = "video" if is_video_workflow else "image"
|
||||
|
||||
logger.debug(f" → Media type: {media_type} (workflow: {workflow_name})")
|
||||
|
||||
# Call Media generation (with optional preset)
|
||||
media_result = await self.core.media(
|
||||
prompt=frame.image_prompt,
|
||||
workflow=config.image_workflow, # Pass workflow from config (None = use default)
|
||||
media_type=media_type,
|
||||
width=config.image_width,
|
||||
height=config.image_height
|
||||
)
|
||||
|
||||
# Download image to local (pass task_id)
|
||||
local_path = await self._download_image(image_url, frame.index, config.task_id)
|
||||
frame.image_path = local_path
|
||||
# Store media type
|
||||
frame.media_type = media_result.media_type
|
||||
|
||||
logger.debug(f" ✓ Image generated: {local_path}")
|
||||
if media_result.is_image:
|
||||
# Download image to local (pass task_id)
|
||||
local_path = await self._download_media(
|
||||
media_result.url,
|
||||
frame.index,
|
||||
config.task_id,
|
||||
media_type="image"
|
||||
)
|
||||
frame.image_path = local_path
|
||||
logger.debug(f" ✓ Image generated: {local_path}")
|
||||
|
||||
elif media_result.is_video:
|
||||
# Download video to local (pass task_id)
|
||||
local_path = await self._download_media(
|
||||
media_result.url,
|
||||
frame.index,
|
||||
config.task_id,
|
||||
media_type="video"
|
||||
)
|
||||
frame.video_path = local_path
|
||||
|
||||
# Update duration from video if available
|
||||
if media_result.duration:
|
||||
frame.duration = media_result.duration
|
||||
logger.debug(f" ✓ Video generated: {local_path} (duration: {frame.duration:.2f}s)")
|
||||
else:
|
||||
# Get video duration from file
|
||||
frame.duration = await self._get_video_duration(local_path)
|
||||
logger.debug(f" ✓ Video generated: {local_path} (duration: {frame.duration:.2f}s)")
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown media type: {media_result.media_type}")
|
||||
|
||||
async def _step_compose_frame(
|
||||
self,
|
||||
@@ -211,7 +251,9 @@ class FrameProcessor:
|
||||
from pixelle_video.utils.os_util import get_task_frame_path
|
||||
output_path = get_task_frame_path(config.task_id, frame.index, "composed")
|
||||
|
||||
# Use HTML template to compose frame
|
||||
# For video type: render HTML as transparent overlay image
|
||||
# For image type: render HTML with image background
|
||||
# In both cases, we need the composed image
|
||||
composed_path = await self._compose_frame_html(frame, storyboard, config, output_path)
|
||||
|
||||
frame.composed_image_path = composed_path
|
||||
@@ -264,23 +306,60 @@ class FrameProcessor:
|
||||
frame: StoryboardFrame,
|
||||
config: StoryboardConfig
|
||||
):
|
||||
"""Step 4: Create video segment from image + audio"""
|
||||
"""Step 4: Create video segment from media + audio"""
|
||||
logger.debug(f" 4/4: Creating video segment for frame {frame.index}...")
|
||||
|
||||
# Generate output path using task_id
|
||||
from pixelle_video.utils.os_util import get_task_frame_path
|
||||
output_path = get_task_frame_path(config.task_id, frame.index, "segment")
|
||||
|
||||
# Call video compositor to create video from image + audio
|
||||
from pixelle_video.services.video import VideoService
|
||||
video_service = VideoService()
|
||||
|
||||
segment_path = video_service.create_video_from_image(
|
||||
image=frame.composed_image_path,
|
||||
audio=frame.audio_path,
|
||||
output=output_path,
|
||||
fps=config.video_fps
|
||||
)
|
||||
# Branch based on media type
|
||||
if frame.media_type == "video":
|
||||
# Video workflow: overlay HTML template on video, then add audio
|
||||
logger.debug(f" → Using video-based composition with HTML overlay")
|
||||
|
||||
# Step 1: Overlay transparent HTML image on video
|
||||
# The composed_image_path contains the rendered HTML with transparent background
|
||||
temp_video_with_overlay = get_task_frame_path(config.task_id, frame.index, "video") + "_overlay.mp4"
|
||||
|
||||
video_service.overlay_image_on_video(
|
||||
video=frame.video_path,
|
||||
overlay_image=frame.composed_image_path,
|
||||
output=temp_video_with_overlay,
|
||||
scale_mode="contain" # Scale video to fit template size (contain mode)
|
||||
)
|
||||
|
||||
# Step 2: Add narration audio to the overlaid video
|
||||
# Note: The video might have audio (replaced) or be silent (audio added)
|
||||
segment_path = video_service.merge_audio_video(
|
||||
video=temp_video_with_overlay,
|
||||
audio=frame.audio_path,
|
||||
output=output_path,
|
||||
replace_audio=True, # Replace video audio with narration
|
||||
audio_volume=1.0
|
||||
)
|
||||
|
||||
# Clean up temp file
|
||||
import os
|
||||
if os.path.exists(temp_video_with_overlay):
|
||||
os.unlink(temp_video_with_overlay)
|
||||
|
||||
elif frame.media_type == "image" or frame.media_type is None:
|
||||
# Image workflow: create video from image + audio
|
||||
logger.debug(f" → Using image-based composition")
|
||||
|
||||
segment_path = video_service.create_video_from_image(
|
||||
image=frame.composed_image_path,
|
||||
audio=frame.audio_path,
|
||||
output=output_path,
|
||||
fps=config.video_fps
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown media type: {frame.media_type}")
|
||||
|
||||
frame.video_segment_path = segment_path
|
||||
|
||||
@@ -303,10 +382,16 @@ class FrameProcessor:
|
||||
estimated_duration = file_size / 2000
|
||||
return max(1.0, estimated_duration) # At least 1 second
|
||||
|
||||
async def _download_image(self, url: str, frame_index: int, task_id: str) -> str:
|
||||
"""Download image from URL to local file"""
|
||||
async def _download_media(
|
||||
self,
|
||||
url: str,
|
||||
frame_index: int,
|
||||
task_id: str,
|
||||
media_type: str
|
||||
) -> str:
|
||||
"""Download media (image or video) from URL to local file"""
|
||||
from pixelle_video.utils.os_util import get_task_frame_path
|
||||
output_path = get_task_frame_path(task_id, frame_index, "image")
|
||||
output_path = get_task_frame_path(task_id, frame_index, media_type)
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(url)
|
||||
@@ -316,4 +401,16 @@ class FrameProcessor:
|
||||
f.write(response.content)
|
||||
|
||||
return output_path
|
||||
|
||||
async def _get_video_duration(self, video_path: str) -> float:
|
||||
"""Get video duration in seconds"""
|
||||
try:
|
||||
import ffmpeg
|
||||
probe = ffmpeg.probe(video_path)
|
||||
duration = float(probe['format']['duration'])
|
||||
return duration
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get video duration: {e}, using audio duration")
|
||||
# Fallback: use audio duration if available
|
||||
return 1.0 # Default to 1 second if unable to determine
|
||||
|
||||
|
||||
@@ -1,192 +0,0 @@
|
||||
# Copyright (C) 2025 AIDC-AI
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Image Generation Service - ComfyUI Workflow-based implementation
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from comfykit import ComfyKit
|
||||
from loguru import logger
|
||||
|
||||
from pixelle_video.services.comfy_base_service import ComfyBaseService
|
||||
|
||||
|
||||
class ImageService(ComfyBaseService):
|
||||
"""
|
||||
Image generation service - Workflow-based
|
||||
|
||||
Uses ComfyKit to execute image generation workflows.
|
||||
|
||||
Usage:
|
||||
# Use default workflow (workflows/image_flux.json)
|
||||
image_url = await pixelle_video.image(prompt="a cat")
|
||||
|
||||
# Use specific workflow
|
||||
image_url = await pixelle_video.image(
|
||||
prompt="a cat",
|
||||
workflow="image_flux.json"
|
||||
)
|
||||
|
||||
# List available workflows
|
||||
workflows = pixelle_video.image.list_workflows()
|
||||
"""
|
||||
|
||||
WORKFLOW_PREFIX = "image_"
|
||||
DEFAULT_WORKFLOW = None # No hardcoded default, must be configured
|
||||
WORKFLOWS_DIR = "workflows"
|
||||
|
||||
def __init__(self, config: dict):
|
||||
"""
|
||||
Initialize image service
|
||||
|
||||
Args:
|
||||
config: Full application config dict
|
||||
"""
|
||||
super().__init__(config, service_name="image")
|
||||
|
||||
async def __call__(
|
||||
self,
|
||||
prompt: str,
|
||||
workflow: Optional[str] = None,
|
||||
# ComfyUI connection (optional overrides)
|
||||
comfyui_url: Optional[str] = None,
|
||||
runninghub_api_key: Optional[str] = None,
|
||||
# Common workflow parameters
|
||||
width: Optional[int] = None,
|
||||
height: Optional[int] = None,
|
||||
negative_prompt: Optional[str] = None,
|
||||
steps: Optional[int] = None,
|
||||
seed: Optional[int] = None,
|
||||
cfg: Optional[float] = None,
|
||||
sampler: Optional[str] = None,
|
||||
**params
|
||||
) -> str:
|
||||
"""
|
||||
Generate image using workflow
|
||||
|
||||
Args:
|
||||
prompt: Image generation prompt
|
||||
workflow: Workflow filename (default: from config or "image_flux.json")
|
||||
comfyui_url: ComfyUI URL (optional, overrides config)
|
||||
runninghub_api_key: RunningHub API key (optional, overrides config)
|
||||
width: Image width
|
||||
height: Image height
|
||||
negative_prompt: Negative prompt
|
||||
steps: Sampling steps
|
||||
seed: Random seed
|
||||
cfg: CFG scale
|
||||
sampler: Sampler name
|
||||
**params: Additional workflow parameters
|
||||
|
||||
Returns:
|
||||
Generated image URL/path
|
||||
|
||||
Examples:
|
||||
# Simplest: use default workflow (workflows/image_flux.json)
|
||||
image_url = await pixelle_video.image(prompt="a beautiful cat")
|
||||
|
||||
# Use specific workflow
|
||||
image_url = await pixelle_video.image(
|
||||
prompt="a cat",
|
||||
workflow="image_flux.json"
|
||||
)
|
||||
|
||||
# With additional parameters
|
||||
image_url = await pixelle_video.image(
|
||||
prompt="a cat",
|
||||
workflow="image_flux.json",
|
||||
width=1024,
|
||||
height=1024,
|
||||
steps=20,
|
||||
seed=42
|
||||
)
|
||||
|
||||
# With absolute path
|
||||
image_url = await pixelle_video.image(
|
||||
prompt="a cat",
|
||||
workflow="/path/to/custom.json"
|
||||
)
|
||||
|
||||
# With custom ComfyUI server
|
||||
image_url = await pixelle_video.image(
|
||||
prompt="a cat",
|
||||
comfyui_url="http://192.168.1.100:8188"
|
||||
)
|
||||
"""
|
||||
# 1. Resolve workflow (returns structured info)
|
||||
workflow_info = self._resolve_workflow(workflow=workflow)
|
||||
|
||||
# 2. Prepare ComfyKit config (supports both selfhost and runninghub)
|
||||
kit_config = self._prepare_comfykit_config(
|
||||
comfyui_url=comfyui_url,
|
||||
runninghub_api_key=runninghub_api_key
|
||||
)
|
||||
|
||||
# 3. Build workflow parameters
|
||||
workflow_params = {"prompt": prompt}
|
||||
|
||||
# Add optional parameters
|
||||
if width is not None:
|
||||
workflow_params["width"] = width
|
||||
if height is not None:
|
||||
workflow_params["height"] = height
|
||||
if negative_prompt is not None:
|
||||
workflow_params["negative_prompt"] = negative_prompt
|
||||
if steps is not None:
|
||||
workflow_params["steps"] = steps
|
||||
if seed is not None:
|
||||
workflow_params["seed"] = seed
|
||||
if cfg is not None:
|
||||
workflow_params["cfg"] = cfg
|
||||
if sampler is not None:
|
||||
workflow_params["sampler"] = sampler
|
||||
|
||||
# Add any additional parameters
|
||||
workflow_params.update(params)
|
||||
|
||||
logger.debug(f"Workflow parameters: {workflow_params}")
|
||||
|
||||
# 4. Execute workflow (ComfyKit auto-detects based on input type)
|
||||
try:
|
||||
kit = ComfyKit(**kit_config)
|
||||
|
||||
# Determine what to pass to ComfyKit based on source
|
||||
if workflow_info["source"] == "runninghub" and "workflow_id" in workflow_info:
|
||||
# RunningHub: pass workflow_id (ComfyKit will use runninghub backend)
|
||||
workflow_input = workflow_info["workflow_id"]
|
||||
logger.info(f"Executing RunningHub workflow: {workflow_input}")
|
||||
else:
|
||||
# Selfhost: pass file path (ComfyKit will use local ComfyUI)
|
||||
workflow_input = workflow_info["path"]
|
||||
logger.info(f"Executing selfhost workflow: {workflow_input}")
|
||||
|
||||
result = await kit.execute(workflow_input, workflow_params)
|
||||
|
||||
# 5. Handle result
|
||||
if result.status != "completed":
|
||||
error_msg = result.msg or "Unknown error"
|
||||
logger.error(f"Image generation failed: {error_msg}")
|
||||
raise Exception(f"Image generation failed: {error_msg}")
|
||||
|
||||
if not result.images:
|
||||
logger.error("No images generated")
|
||||
raise Exception("No images generated")
|
||||
|
||||
image_url = result.images[0]
|
||||
logger.info(f"✅ Generated image: {image_url}")
|
||||
return image_url
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Image generation error: {e}")
|
||||
raise
|
||||
285
pixelle_video/services/media.py
Normal file
285
pixelle_video/services/media.py
Normal file
@@ -0,0 +1,285 @@
|
||||
# Copyright (C) 2025 AIDC-AI
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Media Generation Service - ComfyUI Workflow-based implementation
|
||||
|
||||
Supports both image and video generation workflows.
|
||||
Automatically detects output type based on ExecuteResult.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from comfykit import ComfyKit
|
||||
from loguru import logger
|
||||
|
||||
from pixelle_video.services.comfy_base_service import ComfyBaseService
|
||||
from pixelle_video.models.media import MediaResult
|
||||
|
||||
|
||||
class MediaService(ComfyBaseService):
|
||||
"""
|
||||
Media generation service - Workflow-based
|
||||
|
||||
Uses ComfyKit to execute image/video generation workflows.
|
||||
Supports both image_ and video_ workflow prefixes.
|
||||
|
||||
Usage:
|
||||
# Use default workflow (workflows/image_flux.json)
|
||||
media = await pixelle_video.media(prompt="a cat")
|
||||
if media.is_image:
|
||||
print(f"Generated image: {media.url}")
|
||||
elif media.is_video:
|
||||
print(f"Generated video: {media.url} ({media.duration}s)")
|
||||
|
||||
# Use specific workflow
|
||||
media = await pixelle_video.media(
|
||||
prompt="a cat",
|
||||
workflow="image_flux.json"
|
||||
)
|
||||
|
||||
# List available workflows
|
||||
workflows = pixelle_video.media.list_workflows()
|
||||
"""
|
||||
|
||||
WORKFLOW_PREFIX = "" # Will be overridden by _scan_workflows
|
||||
DEFAULT_WORKFLOW = None # No hardcoded default, must be configured
|
||||
WORKFLOWS_DIR = "workflows"
|
||||
|
||||
def __init__(self, config: dict):
|
||||
"""
|
||||
Initialize media service
|
||||
|
||||
Args:
|
||||
config: Full application config dict
|
||||
"""
|
||||
super().__init__(config, service_name="image") # Keep "image" for config compatibility
|
||||
|
||||
def _scan_workflows(self):
|
||||
"""
|
||||
Scan workflows for both image_ and video_ prefixes
|
||||
|
||||
Override parent method to support multiple prefixes
|
||||
"""
|
||||
from pixelle_video.utils.os_util import list_resource_dirs, list_resource_files, get_resource_path
|
||||
from pathlib import Path
|
||||
|
||||
workflows = []
|
||||
|
||||
# Get all workflow source directories
|
||||
source_dirs = list_resource_dirs("workflows")
|
||||
|
||||
if not source_dirs:
|
||||
logger.warning("No workflow source directories found")
|
||||
return workflows
|
||||
|
||||
# Scan each source directory for workflow files
|
||||
for source_name in source_dirs:
|
||||
# Get all JSON files for this source
|
||||
workflow_files = list_resource_files("workflows", source_name)
|
||||
|
||||
# Filter to only files matching image_ or video_ prefix
|
||||
matching_files = [
|
||||
f for f in workflow_files
|
||||
if (f.startswith("image_") or f.startswith("video_")) and f.endswith('.json')
|
||||
]
|
||||
|
||||
for filename in matching_files:
|
||||
try:
|
||||
# Get actual file path
|
||||
file_path = Path(get_resource_path("workflows", source_name, filename))
|
||||
workflow_info = self._parse_workflow_file(file_path, source_name)
|
||||
workflows.append(workflow_info)
|
||||
logger.debug(f"Found workflow: {workflow_info['key']}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse workflow {source_name}/{filename}: {e}")
|
||||
|
||||
# Sort by key (source/name)
|
||||
return sorted(workflows, key=lambda w: w["key"])
|
||||
|
||||
async def __call__(
|
||||
self,
|
||||
prompt: str,
|
||||
workflow: Optional[str] = None,
|
||||
# Media type specification (required for proper handling)
|
||||
media_type: str = "image", # "image" or "video"
|
||||
# ComfyUI connection (optional overrides)
|
||||
comfyui_url: Optional[str] = None,
|
||||
runninghub_api_key: Optional[str] = None,
|
||||
# Common workflow parameters
|
||||
width: Optional[int] = None,
|
||||
height: Optional[int] = None,
|
||||
negative_prompt: Optional[str] = None,
|
||||
steps: Optional[int] = None,
|
||||
seed: Optional[int] = None,
|
||||
cfg: Optional[float] = None,
|
||||
sampler: Optional[str] = None,
|
||||
**params
|
||||
) -> MediaResult:
|
||||
"""
|
||||
Generate media (image or video) using workflow
|
||||
|
||||
Media type must be specified explicitly via media_type parameter.
|
||||
Returns a MediaResult object containing media type and URL.
|
||||
|
||||
Args:
|
||||
prompt: Media generation prompt
|
||||
workflow: Workflow filename (default: from config or "image_flux.json")
|
||||
media_type: Type of media to generate - "image" or "video" (default: "image")
|
||||
comfyui_url: ComfyUI URL (optional, overrides config)
|
||||
runninghub_api_key: RunningHub API key (optional, overrides config)
|
||||
width: Media width
|
||||
height: Media height
|
||||
negative_prompt: Negative prompt
|
||||
steps: Sampling steps
|
||||
seed: Random seed
|
||||
cfg: CFG scale
|
||||
sampler: Sampler name
|
||||
**params: Additional workflow parameters
|
||||
|
||||
Returns:
|
||||
MediaResult object with media_type ("image" or "video") and url
|
||||
|
||||
Examples:
|
||||
# Simplest: use default workflow (workflows/image_flux.json)
|
||||
media = await pixelle_video.media(prompt="a beautiful cat")
|
||||
if media.is_image:
|
||||
print(f"Image: {media.url}")
|
||||
|
||||
# Use specific workflow
|
||||
media = await pixelle_video.media(
|
||||
prompt="a cat",
|
||||
workflow="image_flux.json"
|
||||
)
|
||||
|
||||
# Video workflow
|
||||
media = await pixelle_video.media(
|
||||
prompt="a cat running",
|
||||
workflow="image_video.json"
|
||||
)
|
||||
if media.is_video:
|
||||
print(f"Video: {media.url}, duration: {media.duration}s")
|
||||
|
||||
# With additional parameters
|
||||
media = await pixelle_video.media(
|
||||
prompt="a cat",
|
||||
workflow="image_flux.json",
|
||||
width=1024,
|
||||
height=1024,
|
||||
steps=20,
|
||||
seed=42
|
||||
)
|
||||
|
||||
# With absolute path
|
||||
media = await pixelle_video.media(
|
||||
prompt="a cat",
|
||||
workflow="/path/to/custom.json"
|
||||
)
|
||||
|
||||
# With custom ComfyUI server
|
||||
media = await pixelle_video.media(
|
||||
prompt="a cat",
|
||||
comfyui_url="http://192.168.1.100:8188"
|
||||
)
|
||||
"""
|
||||
# 1. Resolve workflow (returns structured info)
|
||||
workflow_info = self._resolve_workflow(workflow=workflow)
|
||||
|
||||
# 2. Prepare ComfyKit config (supports both selfhost and runninghub)
|
||||
kit_config = self._prepare_comfykit_config(
|
||||
comfyui_url=comfyui_url,
|
||||
runninghub_api_key=runninghub_api_key
|
||||
)
|
||||
|
||||
# 3. Build workflow parameters
|
||||
workflow_params = {"prompt": prompt}
|
||||
|
||||
# Add optional parameters
|
||||
if width is not None:
|
||||
workflow_params["width"] = width
|
||||
if height is not None:
|
||||
workflow_params["height"] = height
|
||||
if negative_prompt is not None:
|
||||
workflow_params["negative_prompt"] = negative_prompt
|
||||
if steps is not None:
|
||||
workflow_params["steps"] = steps
|
||||
if seed is not None:
|
||||
workflow_params["seed"] = seed
|
||||
if cfg is not None:
|
||||
workflow_params["cfg"] = cfg
|
||||
if sampler is not None:
|
||||
workflow_params["sampler"] = sampler
|
||||
|
||||
# Add any additional parameters
|
||||
workflow_params.update(params)
|
||||
|
||||
logger.debug(f"Workflow parameters: {workflow_params}")
|
||||
|
||||
# 4. Execute workflow (ComfyKit auto-detects based on input type)
|
||||
try:
|
||||
kit = ComfyKit(**kit_config)
|
||||
|
||||
# Determine what to pass to ComfyKit based on source
|
||||
if workflow_info["source"] == "runninghub" and "workflow_id" in workflow_info:
|
||||
# RunningHub: pass workflow_id (ComfyKit will use runninghub backend)
|
||||
workflow_input = workflow_info["workflow_id"]
|
||||
logger.info(f"Executing RunningHub workflow: {workflow_input}")
|
||||
else:
|
||||
# Selfhost: pass file path (ComfyKit will use local ComfyUI)
|
||||
workflow_input = workflow_info["path"]
|
||||
logger.info(f"Executing selfhost workflow: {workflow_input}")
|
||||
|
||||
result = await kit.execute(workflow_input, workflow_params)
|
||||
|
||||
# 5. Handle result based on specified media_type
|
||||
if result.status != "completed":
|
||||
error_msg = result.msg or "Unknown error"
|
||||
logger.error(f"Media generation failed: {error_msg}")
|
||||
raise Exception(f"Media generation failed: {error_msg}")
|
||||
|
||||
# Extract media based on specified type
|
||||
if media_type == "video":
|
||||
# Video workflow - get video from result
|
||||
if not result.videos:
|
||||
logger.error("No video generated (workflow returned no videos)")
|
||||
raise Exception("No video generated")
|
||||
|
||||
video_url = result.videos[0]
|
||||
logger.info(f"✅ Generated video: {video_url}")
|
||||
|
||||
# Try to extract duration from result (if available)
|
||||
duration = None
|
||||
if hasattr(result, 'duration') and result.duration:
|
||||
duration = result.duration
|
||||
|
||||
return MediaResult(
|
||||
media_type="video",
|
||||
url=video_url,
|
||||
duration=duration
|
||||
)
|
||||
else: # image
|
||||
# Image workflow - get image from result
|
||||
if not result.images:
|
||||
logger.error("No image generated (workflow returned no images)")
|
||||
raise Exception("No image generated")
|
||||
|
||||
image_url = result.images[0]
|
||||
logger.info(f"✅ Generated image: {image_url}")
|
||||
|
||||
return MediaResult(
|
||||
media_type="image",
|
||||
url=image_url
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Media generation error: {e}")
|
||||
raise
|
||||
@@ -224,20 +224,88 @@ class VideoService:
|
||||
-map "[v]" -map "[a]" output.mp4
|
||||
"""
|
||||
try:
|
||||
inputs = [ffmpeg.input(v) for v in videos]
|
||||
(
|
||||
ffmpeg
|
||||
.concat(*inputs, v=1, a=1)
|
||||
.output(output)
|
||||
.overwrite_output()
|
||||
.run(capture_stdout=True, capture_stderr=True)
|
||||
# Build filter_complex string manually
|
||||
n = len(videos)
|
||||
|
||||
# Build input stream labels: [0:v][0:a][1:v][1:a]...
|
||||
stream_spec = "".join([f"[{i}:v][{i}:a]" for i in range(n)])
|
||||
filter_complex = f"{stream_spec}concat=n={n}:v=1:a=1[v][a]"
|
||||
|
||||
# Build ffmpeg command
|
||||
cmd = ['ffmpeg']
|
||||
for video in videos:
|
||||
cmd.extend(['-i', video])
|
||||
cmd.extend([
|
||||
'-filter_complex', filter_complex,
|
||||
'-map', '[v]',
|
||||
'-map', '[a]',
|
||||
'-y', # Overwrite output
|
||||
output
|
||||
])
|
||||
|
||||
# Run command
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
|
||||
logger.success(f"Videos concatenated successfully: {output}")
|
||||
return output
|
||||
except ffmpeg.Error as e:
|
||||
error_msg = e.stderr.decode() if e.stderr else str(e)
|
||||
except subprocess.CalledProcessError as e:
|
||||
error_msg = e.stderr if e.stderr else str(e)
|
||||
logger.error(f"FFmpeg concat filter error: {error_msg}")
|
||||
raise RuntimeError(f"Failed to concatenate videos: {error_msg}")
|
||||
except Exception as e:
|
||||
logger.error(f"Concatenation error: {e}")
|
||||
raise RuntimeError(f"Failed to concatenate videos: {e}")
|
||||
|
||||
def _get_video_duration(self, video: str) -> float:
|
||||
"""Get video duration in seconds"""
|
||||
try:
|
||||
probe = ffmpeg.probe(video)
|
||||
duration = float(probe['format']['duration'])
|
||||
return duration
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get video duration: {e}")
|
||||
return 0.0
|
||||
|
||||
def _get_audio_duration(self, audio: str) -> float:
|
||||
"""Get audio duration in seconds"""
|
||||
try:
|
||||
probe = ffmpeg.probe(audio)
|
||||
duration = float(probe['format']['duration'])
|
||||
return duration
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get audio duration: {e}, using estimate")
|
||||
# Fallback: estimate based on file size (very rough)
|
||||
import os
|
||||
file_size = os.path.getsize(audio)
|
||||
# Assume ~16kbps for MP3, so 2KB per second
|
||||
estimated_duration = file_size / 2000
|
||||
return max(1.0, estimated_duration) # At least 1 second
|
||||
|
||||
def has_audio_stream(self, video: str) -> bool:
|
||||
"""
|
||||
Check if video has audio stream
|
||||
|
||||
Args:
|
||||
video: Video file path
|
||||
|
||||
Returns:
|
||||
True if video has audio stream, False otherwise
|
||||
"""
|
||||
try:
|
||||
probe = ffmpeg.probe(video)
|
||||
audio_streams = [s for s in probe.get('streams', []) if s['codec_type'] == 'audio']
|
||||
has_audio = len(audio_streams) > 0
|
||||
logger.debug(f"Video {video} has_audio={has_audio}")
|
||||
return has_audio
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to probe video audio streams: {e}, assuming no audio")
|
||||
return False
|
||||
|
||||
def merge_audio_video(
|
||||
self,
|
||||
@@ -247,9 +315,18 @@ class VideoService:
|
||||
replace_audio: bool = True,
|
||||
audio_volume: float = 1.0,
|
||||
video_volume: float = 0.0,
|
||||
pad_strategy: str = "freeze", # "freeze" (freeze last frame) or "black" (black screen)
|
||||
) -> str:
|
||||
"""
|
||||
Merge audio with video
|
||||
Merge audio with video, using the longer duration
|
||||
|
||||
The output video duration will be the maximum of video and audio duration.
|
||||
If audio is longer than video, the video will be padded using the specified strategy.
|
||||
|
||||
Automatically handles videos with or without audio streams.
|
||||
- If video has no audio: adds the audio track
|
||||
- If video has audio and replace_audio=True: replaces with new audio
|
||||
- If video has audio and replace_audio=False: mixes both audio tracks
|
||||
|
||||
Args:
|
||||
video: Video file path
|
||||
@@ -259,6 +336,9 @@ class VideoService:
|
||||
audio_volume: Volume of the new audio (0.0 to 1.0+)
|
||||
video_volume: Volume of original video audio (0.0 to 1.0+)
|
||||
Only used when replace_audio=False
|
||||
pad_strategy: Strategy to pad video if audio is longer
|
||||
- "freeze": Freeze last frame (default)
|
||||
- "black": Fill with black screen
|
||||
|
||||
Returns:
|
||||
Path to the output video file
|
||||
@@ -267,28 +347,115 @@ class VideoService:
|
||||
RuntimeError: If FFmpeg execution fails
|
||||
|
||||
Note:
|
||||
- When replace_audio=True, video's original audio is removed
|
||||
- When replace_audio=False, original and new audio are mixed
|
||||
- Audio is trimmed/extended to match video duration
|
||||
- Uses the longer duration between video and audio
|
||||
- When audio is longer, video is padded using pad_strategy
|
||||
- When video is longer, audio is looped or extended
|
||||
- Automatically detects if video has audio
|
||||
- When video is silent, audio is added regardless of replace_audio
|
||||
- When replace_audio=True and video has audio, original audio is removed
|
||||
- When replace_audio=False and video has audio, original and new audio are mixed
|
||||
"""
|
||||
# Get durations of video and audio
|
||||
video_duration = self._get_video_duration(video)
|
||||
audio_duration = self._get_audio_duration(audio)
|
||||
|
||||
logger.info(f"Video duration: {video_duration:.2f}s, Audio duration: {audio_duration:.2f}s")
|
||||
|
||||
# Determine target duration (max of both)
|
||||
target_duration = max(video_duration, audio_duration)
|
||||
logger.info(f"Target output duration: {target_duration:.2f}s")
|
||||
|
||||
# Check if video has audio stream
|
||||
video_has_audio = self.has_audio_stream(video)
|
||||
|
||||
# Prepare video stream (potentially with padding)
|
||||
input_video = ffmpeg.input(video)
|
||||
video_stream = input_video.video
|
||||
|
||||
# Pad video if audio is longer
|
||||
if audio_duration > video_duration:
|
||||
pad_duration = audio_duration - video_duration
|
||||
logger.info(f"Audio is longer, padding video by {pad_duration:.2f}s using '{pad_strategy}' strategy")
|
||||
|
||||
if pad_strategy == "freeze":
|
||||
# Freeze last frame: tpad filter
|
||||
video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration)
|
||||
else: # black
|
||||
# Generate black frames for padding duration
|
||||
from pixelle_video.utils.os_util import get_temp_path
|
||||
import os
|
||||
|
||||
# Get video properties
|
||||
probe = ffmpeg.probe(video)
|
||||
video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
|
||||
width = int(video_info['width'])
|
||||
height = int(video_info['height'])
|
||||
fps_str = video_info['r_frame_rate']
|
||||
fps_num, fps_den = map(int, fps_str.split('/'))
|
||||
fps = fps_num / fps_den if fps_den != 0 else 30
|
||||
|
||||
# Create black video for padding
|
||||
black_video_path = get_temp_path(f"black_pad_{os.path.basename(output)}")
|
||||
black_input = ffmpeg.input(
|
||||
f'color=c=black:s={width}x{height}:r={fps}',
|
||||
f='lavfi',
|
||||
t=pad_duration
|
||||
)
|
||||
|
||||
# Concatenate original video with black padding
|
||||
video_stream = ffmpeg.concat(video_stream, black_input.video, v=1, a=0)
|
||||
|
||||
# Prepare audio stream (pad if needed to match target duration)
|
||||
input_audio = ffmpeg.input(audio)
|
||||
audio_stream = input_audio.audio.filter('volume', audio_volume)
|
||||
|
||||
# Pad audio with silence if video is longer
|
||||
if video_duration > audio_duration:
|
||||
pad_duration = video_duration - audio_duration
|
||||
logger.info(f"Video is longer, padding audio with {pad_duration:.2f}s silence")
|
||||
# Use apad to add silence at the end
|
||||
audio_stream = audio_stream.filter('apad', whole_dur=target_duration)
|
||||
|
||||
if not video_has_audio:
|
||||
logger.info(f"Video has no audio stream, adding audio track")
|
||||
# Video is silent, just add the audio
|
||||
try:
|
||||
(
|
||||
ffmpeg
|
||||
.output(
|
||||
video_stream,
|
||||
audio_stream,
|
||||
output,
|
||||
vcodec='libx264', # Re-encode video if padded
|
||||
acodec='aac',
|
||||
audio_bitrate='192k'
|
||||
)
|
||||
.overwrite_output()
|
||||
.run(capture_stdout=True, capture_stderr=True)
|
||||
)
|
||||
|
||||
logger.success(f"Audio added to silent video: {output}")
|
||||
return output
|
||||
except ffmpeg.Error as e:
|
||||
error_msg = e.stderr.decode() if e.stderr else str(e)
|
||||
logger.error(f"FFmpeg error adding audio to silent video: {error_msg}")
|
||||
raise RuntimeError(f"Failed to add audio to video: {error_msg}")
|
||||
|
||||
# Video has audio, proceed with merging
|
||||
logger.info(f"Merging audio with video (replace={replace_audio})")
|
||||
|
||||
try:
|
||||
input_video = ffmpeg.input(video)
|
||||
input_audio = ffmpeg.input(audio)
|
||||
|
||||
if replace_audio:
|
||||
# Replace audio: use only new audio, ignore original
|
||||
(
|
||||
ffmpeg
|
||||
.output(
|
||||
input_video.video,
|
||||
input_audio.audio.filter('volume', audio_volume),
|
||||
video_stream,
|
||||
audio_stream,
|
||||
output,
|
||||
vcodec='copy',
|
||||
vcodec='libx264', # Re-encode video if padded
|
||||
acodec='aac',
|
||||
audio_bitrate='192k',
|
||||
shortest=None
|
||||
audio_bitrate='192k'
|
||||
)
|
||||
.overwrite_output()
|
||||
.run(capture_stdout=True, capture_stderr=True)
|
||||
@@ -298,20 +465,20 @@ class VideoService:
|
||||
mixed_audio = ffmpeg.filter(
|
||||
[
|
||||
input_video.audio.filter('volume', video_volume),
|
||||
input_audio.audio.filter('volume', audio_volume)
|
||||
audio_stream
|
||||
],
|
||||
'amix',
|
||||
inputs=2,
|
||||
duration='first'
|
||||
duration='longest' # Use longest audio
|
||||
)
|
||||
|
||||
(
|
||||
ffmpeg
|
||||
.output(
|
||||
input_video.video,
|
||||
video_stream,
|
||||
mixed_audio,
|
||||
output,
|
||||
vcodec='copy',
|
||||
vcodec='libx264', # Re-encode video if padded
|
||||
acodec='aac',
|
||||
audio_bitrate='192k'
|
||||
)
|
||||
@@ -326,6 +493,92 @@ class VideoService:
|
||||
logger.error(f"FFmpeg merge error: {error_msg}")
|
||||
raise RuntimeError(f"Failed to merge audio and video: {error_msg}")
|
||||
|
||||
def overlay_image_on_video(
|
||||
self,
|
||||
video: str,
|
||||
overlay_image: str,
|
||||
output: str,
|
||||
scale_mode: str = "contain"
|
||||
) -> str:
|
||||
"""
|
||||
Overlay a transparent image on top of video
|
||||
|
||||
Args:
|
||||
video: Base video file path
|
||||
overlay_image: Transparent overlay image path (e.g., rendered HTML with transparent background)
|
||||
output: Output video file path
|
||||
scale_mode: How to scale the base video to fit the overlay size
|
||||
- "contain": Scale video to fit within overlay dimensions (letterbox/pillarbox)
|
||||
- "cover": Scale video to cover overlay dimensions (may crop)
|
||||
- "stretch": Stretch video to exact overlay dimensions
|
||||
|
||||
Returns:
|
||||
Path to the output video file
|
||||
|
||||
Raises:
|
||||
RuntimeError: If FFmpeg execution fails
|
||||
|
||||
Note:
|
||||
- Overlay image should have transparent background
|
||||
- Video is scaled to match overlay dimensions based on scale_mode
|
||||
- Final video size matches overlay image size
|
||||
- Video codec is re-encoded to support overlay
|
||||
"""
|
||||
logger.info(f"Overlaying image on video (scale_mode={scale_mode})")
|
||||
|
||||
try:
|
||||
# Get overlay image dimensions
|
||||
overlay_probe = ffmpeg.probe(overlay_image)
|
||||
overlay_stream = next(s for s in overlay_probe['streams'] if s['codec_type'] == 'video')
|
||||
overlay_width = int(overlay_stream['width'])
|
||||
overlay_height = int(overlay_stream['height'])
|
||||
|
||||
logger.debug(f"Overlay dimensions: {overlay_width}x{overlay_height}")
|
||||
|
||||
input_video = ffmpeg.input(video)
|
||||
input_overlay = ffmpeg.input(overlay_image)
|
||||
|
||||
# Scale video to fit overlay size using scale_mode
|
||||
if scale_mode == "contain":
|
||||
# Scale to fit (letterbox/pillarbox if aspect ratio differs)
|
||||
# Use scale filter with force_original_aspect_ratio=decrease and pad to center
|
||||
scaled_video = (
|
||||
input_video
|
||||
.filter('scale', overlay_width, overlay_height, force_original_aspect_ratio='decrease')
|
||||
.filter('pad', overlay_width, overlay_height, '(ow-iw)/2', '(oh-ih)/2', color='black')
|
||||
)
|
||||
elif scale_mode == "cover":
|
||||
# Scale to cover (crop if aspect ratio differs)
|
||||
scaled_video = (
|
||||
input_video
|
||||
.filter('scale', overlay_width, overlay_height, force_original_aspect_ratio='increase')
|
||||
.filter('crop', overlay_width, overlay_height)
|
||||
)
|
||||
else: # stretch
|
||||
# Stretch to exact dimensions
|
||||
scaled_video = input_video.filter('scale', overlay_width, overlay_height)
|
||||
|
||||
# Overlay the transparent image on top of the scaled video
|
||||
output_stream = ffmpeg.overlay(scaled_video, input_overlay)
|
||||
|
||||
(
|
||||
ffmpeg
|
||||
.output(output_stream, output,
|
||||
vcodec='libx264',
|
||||
pix_fmt='yuv420p',
|
||||
preset='medium',
|
||||
crf=23)
|
||||
.overwrite_output()
|
||||
.run(capture_stdout=True, capture_stderr=True)
|
||||
)
|
||||
|
||||
logger.success(f"Image overlaid on video: {output}")
|
||||
return output
|
||||
except ffmpeg.Error as e:
|
||||
error_msg = e.stderr.decode() if e.stderr else str(e)
|
||||
logger.error(f"FFmpeg overlay error: {error_msg}")
|
||||
raise RuntimeError(f"Failed to overlay image on video: {error_msg}")
|
||||
|
||||
def create_video_from_image(
|
||||
self,
|
||||
image: str,
|
||||
|
||||
@@ -321,6 +321,98 @@ async def generate_image_prompts(
|
||||
return all_prompts
|
||||
|
||||
|
||||
async def generate_video_prompts(
|
||||
llm_service,
|
||||
narrations: List[str],
|
||||
min_words: int = 30,
|
||||
max_words: int = 60,
|
||||
batch_size: int = 10,
|
||||
max_retries: int = 3,
|
||||
progress_callback: Optional[callable] = None
|
||||
) -> List[str]:
|
||||
"""
|
||||
Generate video prompts from narrations (with batching and retry)
|
||||
|
||||
Args:
|
||||
llm_service: LLM service instance
|
||||
narrations: List of narrations
|
||||
min_words: Min video prompt length
|
||||
max_words: Max video prompt length
|
||||
batch_size: Max narrations per batch (default: 10)
|
||||
max_retries: Max retry attempts per batch (default: 3)
|
||||
progress_callback: Optional callback(completed, total, message) for progress updates
|
||||
|
||||
Returns:
|
||||
List of video prompts (base prompts, without prefix applied)
|
||||
"""
|
||||
from pixelle_video.prompts.video_generation import build_video_prompt_prompt
|
||||
|
||||
logger.info(f"Generating video prompts for {len(narrations)} narrations (batch_size={batch_size})")
|
||||
|
||||
# Split narrations into batches
|
||||
batches = [narrations[i:i + batch_size] for i in range(0, len(narrations), batch_size)]
|
||||
logger.info(f"Split into {len(batches)} batches")
|
||||
|
||||
all_prompts = []
|
||||
|
||||
# Process each batch
|
||||
for batch_idx, batch_narrations in enumerate(batches, 1):
|
||||
logger.info(f"Processing batch {batch_idx}/{len(batches)} ({len(batch_narrations)} narrations)")
|
||||
|
||||
# Retry logic for this batch
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
# Generate prompts for this batch
|
||||
prompt = build_video_prompt_prompt(
|
||||
narrations=batch_narrations,
|
||||
min_words=min_words,
|
||||
max_words=max_words
|
||||
)
|
||||
|
||||
response = await llm_service(
|
||||
prompt=prompt,
|
||||
temperature=0.7,
|
||||
max_tokens=8192
|
||||
)
|
||||
|
||||
logger.debug(f"Batch {batch_idx} attempt {attempt}: LLM response length: {len(response)} chars")
|
||||
|
||||
# Parse JSON
|
||||
result = _parse_json(response)
|
||||
|
||||
if "video_prompts" not in result:
|
||||
raise KeyError("Invalid response format: missing 'video_prompts'")
|
||||
|
||||
batch_prompts = result["video_prompts"]
|
||||
|
||||
# Validate batch result
|
||||
if len(batch_prompts) != len(batch_narrations):
|
||||
raise ValueError(
|
||||
f"Prompt count mismatch: expected {len(batch_narrations)}, got {len(batch_prompts)}"
|
||||
)
|
||||
|
||||
# Success - add to all_prompts
|
||||
all_prompts.extend(batch_prompts)
|
||||
logger.info(f"✓ Batch {batch_idx} completed: {len(batch_prompts)} video prompts")
|
||||
|
||||
# Report progress
|
||||
if progress_callback:
|
||||
completed = len(all_prompts)
|
||||
total = len(narrations)
|
||||
progress_callback(completed, total, f"Batch {batch_idx}/{len(batches)} completed")
|
||||
|
||||
break # Success, move to next batch
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"✗ Batch {batch_idx} attempt {attempt} failed: {e}")
|
||||
if attempt >= max_retries:
|
||||
raise
|
||||
logger.info(f"Retrying batch {batch_idx}...")
|
||||
|
||||
logger.info(f"✅ Generated {len(all_prompts)} video prompts")
|
||||
return all_prompts
|
||||
|
||||
|
||||
def _parse_json(text: str) -> dict:
|
||||
"""
|
||||
Parse JSON from text, with fallback to extract JSON from markdown code blocks
|
||||
|
||||
@@ -260,7 +260,7 @@ def get_task_path(task_id: str, *paths: str) -> str:
|
||||
def get_task_frame_path(
|
||||
task_id: str,
|
||||
frame_index: int,
|
||||
file_type: Literal["audio", "image", "composed", "segment"]
|
||||
file_type: Literal["audio", "image", "video", "composed", "segment"]
|
||||
) -> str:
|
||||
"""
|
||||
Get frame file path within task directory
|
||||
@@ -268,7 +268,7 @@ def get_task_frame_path(
|
||||
Args:
|
||||
task_id: Task ID
|
||||
frame_index: Frame index (0-based internally, but filename starts from 01)
|
||||
file_type: File type (audio/image/composed/segment)
|
||||
file_type: File type (audio/image/video/composed/segment)
|
||||
|
||||
Returns:
|
||||
Absolute path to frame file
|
||||
@@ -280,6 +280,7 @@ def get_task_frame_path(
|
||||
ext_map = {
|
||||
"audio": "mp3",
|
||||
"image": "png",
|
||||
"video": "mp4",
|
||||
"composed": "png",
|
||||
"segment": "mp4"
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ import os
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Optional, Literal
|
||||
from pydantic import BaseModel, Field
|
||||
import logging
|
||||
|
||||
from pixelle_video.utils.os_util import (
|
||||
get_resource_path,
|
||||
@@ -26,6 +27,8 @@ from pixelle_video.utils.os_util import (
|
||||
resource_exists
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_template_size(template_path: str) -> Tuple[int, int]:
|
||||
"""
|
||||
@@ -316,7 +319,7 @@ def resolve_template_path(template_input: Optional[str]) -> str:
|
||||
|
||||
Args:
|
||||
template_input: Can be:
|
||||
- None: Use default "1080x1920/default.html"
|
||||
- None: Use default "1080x1920/image_default.html"
|
||||
- "template.html": Use default size + this template
|
||||
- "1080x1920/template.html": Full relative path
|
||||
- "templates/1080x1920/template.html": Absolute-ish path (legacy)
|
||||
@@ -330,15 +333,15 @@ def resolve_template_path(template_input: Optional[str]) -> str:
|
||||
|
||||
Examples:
|
||||
>>> resolve_template_path(None)
|
||||
'templates/1080x1920/default.html'
|
||||
>>> resolve_template_path("modern.html")
|
||||
'templates/1080x1920/modern.html'
|
||||
>>> resolve_template_path("1920x1080/default.html")
|
||||
'templates/1920x1080/default.html'
|
||||
'templates/1080x1920/image_default.html'
|
||||
>>> resolve_template_path("image_modern.html")
|
||||
'templates/1080x1920/image_modern.html'
|
||||
>>> resolve_template_path("1920x1080/image_default.html")
|
||||
'templates/1920x1080/image_default.html'
|
||||
"""
|
||||
# Default case
|
||||
if template_input is None:
|
||||
template_input = "1080x1920/default.html"
|
||||
template_input = "1080x1920/image_default.html"
|
||||
|
||||
# Parse input to extract size and template name
|
||||
size = None
|
||||
@@ -359,6 +362,18 @@ def resolve_template_path(template_input: Optional[str]) -> str:
|
||||
size = "1080x1920"
|
||||
template_name = template_input
|
||||
|
||||
# Backward compatibility: migrate "default.html" to "image_default.html"
|
||||
if template_name == "default.html":
|
||||
migrated_name = "image_default.html"
|
||||
try:
|
||||
# Try migrated name first
|
||||
path = get_resource_path("templates", size, migrated_name)
|
||||
logger.info(f"Backward compatibility: migrated '{template_input}' to '{size}/{migrated_name}'")
|
||||
return path
|
||||
except FileNotFoundError:
|
||||
# Fall through to try original name
|
||||
logger.warning(f"Migrated template '{size}/{migrated_name}' not found, trying original name")
|
||||
|
||||
# Use resource API to resolve path (custom > default)
|
||||
try:
|
||||
return get_resource_path("templates", size, template_name)
|
||||
@@ -367,6 +382,120 @@ def resolve_template_path(template_input: Optional[str]) -> str:
|
||||
raise FileNotFoundError(
|
||||
f"Template not found: {size}/{template_name}\n"
|
||||
f"Available sizes: {available_sizes}\n"
|
||||
f"Hint: Use format 'SIZExSIZE/template.html' (e.g., '1080x1920/default.html')"
|
||||
f"Hint: Use format 'SIZExSIZE/template.html' (e.g., '1080x1920/image_default.html')"
|
||||
)
|
||||
|
||||
|
||||
def get_template_type(template_name: str) -> Literal['static', 'image', 'video']:
|
||||
"""
|
||||
Detect template type from template filename
|
||||
|
||||
Template naming convention:
|
||||
- static_*.html: Static style templates (no AI-generated media)
|
||||
- image_*.html: Templates requiring AI-generated images
|
||||
- video_*.html: Templates requiring AI-generated videos
|
||||
|
||||
Args:
|
||||
template_name: Template filename like "image_default.html" or "video_simple.html"
|
||||
|
||||
Returns:
|
||||
Template type: 'static', 'image', or 'video'
|
||||
|
||||
Examples:
|
||||
>>> get_template_type("static_simple.html")
|
||||
'static'
|
||||
>>> get_template_type("image_default.html")
|
||||
'image'
|
||||
>>> get_template_type("video_simple.html")
|
||||
'video'
|
||||
"""
|
||||
name = Path(template_name).name
|
||||
|
||||
if name.startswith("static_"):
|
||||
return "static"
|
||||
elif name.startswith("video_"):
|
||||
return "video"
|
||||
elif name.startswith("image_"):
|
||||
return "image"
|
||||
else:
|
||||
# Fallback: try to detect from legacy names
|
||||
logger.warning(
|
||||
f"Template '{template_name}' doesn't follow naming convention (static_/image_/video_). "
|
||||
f"Defaulting to 'image' type."
|
||||
)
|
||||
return "image"
|
||||
|
||||
|
||||
def filter_templates_by_type(
|
||||
templates: List[TemplateInfo],
|
||||
template_type: Literal['static', 'image', 'video']
|
||||
) -> List[TemplateInfo]:
|
||||
"""
|
||||
Filter templates by type
|
||||
|
||||
Args:
|
||||
templates: List of TemplateInfo objects
|
||||
template_type: Type to filter by ('static', 'image', or 'video')
|
||||
|
||||
Returns:
|
||||
Filtered list of TemplateInfo objects
|
||||
|
||||
Examples:
|
||||
>>> all_templates = get_all_templates_with_info()
|
||||
>>> image_templates = filter_templates_by_type(all_templates, 'image')
|
||||
>>> len(image_templates) > 0
|
||||
True
|
||||
"""
|
||||
filtered = []
|
||||
for t in templates:
|
||||
template_name = t.display_info.name
|
||||
if get_template_type(template_name) == template_type:
|
||||
filtered.append(t)
|
||||
return filtered
|
||||
|
||||
|
||||
def get_templates_grouped_by_size_and_type(
|
||||
template_type: Optional[Literal['static', 'image', 'video']] = None
|
||||
) -> dict:
|
||||
"""
|
||||
Get templates grouped by size, optionally filtered by type
|
||||
|
||||
Args:
|
||||
template_type: Optional type filter ('static', 'image', or 'video')
|
||||
|
||||
Returns:
|
||||
Dict with size as key, list of TemplateInfo as value
|
||||
Ordered by orientation priority: portrait > landscape > square
|
||||
|
||||
Examples:
|
||||
>>> # Get all templates
|
||||
>>> all_grouped = get_templates_grouped_by_size_and_type()
|
||||
|
||||
>>> # Get only image templates
|
||||
>>> image_grouped = get_templates_grouped_by_size_and_type('image')
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
templates = get_all_templates_with_info()
|
||||
|
||||
# Filter by type if specified
|
||||
if template_type is not None:
|
||||
templates = filter_templates_by_type(templates, template_type)
|
||||
|
||||
grouped = defaultdict(list)
|
||||
|
||||
for t in templates:
|
||||
grouped[t.display_info.size].append(t)
|
||||
|
||||
# Sort groups by orientation priority: portrait > landscape > square
|
||||
orientation_priority = {'portrait': 0, 'landscape': 1, 'square': 2}
|
||||
|
||||
sorted_grouped = {}
|
||||
for size in sorted(grouped.keys(), key=lambda s: (
|
||||
orientation_priority.get(grouped[s][0].display_info.orientation, 3),
|
||||
s
|
||||
)):
|
||||
sorted_grouped[size] = sorted(grouped[size], key=lambda t: t.display_info.name)
|
||||
|
||||
return sorted_grouped
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ dependencies = [
|
||||
"uvicorn[standard]>=0.32.0",
|
||||
"python-multipart>=0.0.12",
|
||||
"comfykit>=0.1.9",
|
||||
"beautifulsoup4>=4.14.2",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=1080, height=1080">
|
||||
<title>极简边框风格 - 1080x1080</title>
|
||||
<style>
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=1080, height=1920">
|
||||
<title>模糊背景卡片 - 1080x1920</title>
|
||||
<!-- Google Fonts - 中文字体 -->
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{{title}}</title>
|
||||
<style>
|
||||
@@ -2,6 +2,8 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<style>
|
||||
html {
|
||||
margin: 0;
|
||||
@@ -2,6 +2,8 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<style>
|
||||
html {
|
||||
margin: 0;
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=1080, height=1920">
|
||||
<title>时尚复古风格 - 1080x1920</title>
|
||||
<style>
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=1080, height=1920">
|
||||
<title>全屏图片 - 1080x1920</title>
|
||||
<!-- Google Fonts - 中文字体 -->
|
||||
@@ -2,6 +2,8 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||
<link href="https://fonts.googleapis.com/css2?family=Ma+Shan+Zheng&family=ZCOOL+KuaiLe&display=swap" rel="stylesheet">
|
||||
@@ -2,6 +2,8 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<style>
|
||||
html {
|
||||
margin: 0;
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>{{title}}</title>
|
||||
<style>
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=1080, height=1920">
|
||||
<title>心理卡片风 - 1080x1920</title>
|
||||
<style>
|
||||
@@ -2,6 +2,8 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<style>
|
||||
html {
|
||||
margin: 0;
|
||||
@@ -2,6 +2,8 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<style>
|
||||
html {
|
||||
margin: 0;
|
||||
185
templates/1080x1920/video_default.html
Normal file
185
templates/1080x1920/video_default.html
Normal file
@@ -0,0 +1,185 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="512">
|
||||
<meta name="template:media-height" content="288">
|
||||
<style>
|
||||
html {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
width: 100%;
|
||||
height: 100vh;
|
||||
font-family: 'PingFang SC', 'Source Han Sans', 'Microsoft YaHei', sans-serif;
|
||||
overflow: hidden;
|
||||
/* background-color: #000; */
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
/* 主容器 - 居中并包含所有内容 */
|
||||
.main-container {
|
||||
position: relative;
|
||||
width: 1080px;
|
||||
height: 1920px;
|
||||
}
|
||||
|
||||
/* Background image layer (customizable using <img> tag) */
|
||||
.background-image {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
z-index: 0;
|
||||
}
|
||||
|
||||
/* Video overlay - 相对于main-container居中 */
|
||||
.video-overlay {
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
width: 1080px;
|
||||
height: 607px;
|
||||
/* background: #f00; */
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
/* Title section - positioned above video */
|
||||
.video-title-wrapper {
|
||||
position: absolute;
|
||||
top: calc(50% - 607px / 2 - 130px);
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
max-width: 900px;
|
||||
width: 900px;
|
||||
text-align: center;
|
||||
z-index: 2;
|
||||
}
|
||||
|
||||
.video-title {
|
||||
font-size: 72px;
|
||||
font-weight: 700;
|
||||
color: #ffffff;
|
||||
line-height: 1.3;
|
||||
letter-spacing: 3px;
|
||||
text-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
/* 字幕区域 - 对齐视频底部 */
|
||||
.content {
|
||||
position: absolute;
|
||||
bottom: calc(50% - 607px / 2 + 0px);
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
width: 900px;
|
||||
z-index: 4;
|
||||
}
|
||||
|
||||
.text {
|
||||
font-size: 40px;
|
||||
color: #ffffff;
|
||||
text-align: center;
|
||||
line-height: 1.6;
|
||||
font-weight: 500;
|
||||
text-shadow:
|
||||
2px 2px 4px rgba(0, 0, 0, 0.9),
|
||||
0 0 8px rgba(0, 0, 0, 0.8),
|
||||
0 0 16px rgba(0, 0, 0, 0.6);
|
||||
padding: 10px 0px;
|
||||
/* background-color: aqua; */
|
||||
}
|
||||
|
||||
/* Footer - positioned below video */
|
||||
.footer {
|
||||
position: absolute;
|
||||
top: calc(50% + 607px / 2 + 50px);
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
width: 900px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding-top: 40px;
|
||||
border-top: 2px solid rgba(255, 255, 255, 0.3);
|
||||
z-index: 2;
|
||||
}
|
||||
|
||||
.author-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.author {
|
||||
font-size: 32px;
|
||||
font-weight: 600;
|
||||
color: #ffffff;
|
||||
text-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
|
||||
.author-desc {
|
||||
font-size: 24px;
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
.logo-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: flex-end;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.logo {
|
||||
font-size: 28px;
|
||||
font-weight: 600;
|
||||
color: #ffffff;
|
||||
letter-spacing: 2px;
|
||||
text-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<!-- 主容器 - 所有元素都在这里面,相对于video-overlay定位 -->
|
||||
<div class="main-container">
|
||||
<!-- Background image layer (customizable via background parameter) -->
|
||||
<div class="background-image">
|
||||
|
||||
</div>
|
||||
|
||||
<!-- Video overlay - 居中参考点 -->
|
||||
<div class="video-overlay"></div>
|
||||
|
||||
<!-- Video title - positioned above video -->
|
||||
<div class="video-title-wrapper">
|
||||
<div class="video-title">{{title}}</div>
|
||||
</div>
|
||||
|
||||
<!-- 字幕区域 - 独立定位在视频底部 -->
|
||||
<div class="content">
|
||||
<div class="text">{{text}}</div>
|
||||
</div>
|
||||
|
||||
<!-- Footer - positioned below video -->
|
||||
<div class="footer">
|
||||
<div class="author-section">
|
||||
<div class="author">{{author=@Pixelle.AI}}</div>
|
||||
<div class="author-desc">{{describe=Open Source Omnimodal AI Creative Agent}}</div>
|
||||
</div>
|
||||
<div class="logo-section">
|
||||
<div class="logo">{{brand=Pixelle-Video}}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=1920, height=1080">
|
||||
<title>视频模板 - 电影风格</title>
|
||||
<style>
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=1920, height=1080">
|
||||
<title>全屏图片 - 1920x1080</title>
|
||||
<!-- Google Fonts - 中文字体 -->
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=1920, height=1080">
|
||||
<title>视频模板 - 极简风格</title>
|
||||
<style>
|
||||
@@ -2,6 +2,8 @@
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="template:media-width" content="1024">
|
||||
<meta name="template:media-height" content="1024">
|
||||
<meta name="viewport" content="width=1920, height=1080">
|
||||
<title>视频模板 - 横屏科技风格</title>
|
||||
<style>
|
||||
24
uv.lock
generated
24
uv.lock
generated
@@ -226,6 +226,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/eb/f25ad1a7726b2fe21005c3580b35fa7bfe09646faf7c8f41867747987a35/beartype-0.22.4-py3-none-any.whl", hash = "sha256:7967a1cee01fee42e47da69c58c92da10ba5bcfb8072686e48487be5201e3d10", size = 1318387 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.14.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "soupsieve" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blinker"
|
||||
version = "1.9.0"
|
||||
@@ -1653,6 +1666,7 @@ name = "pixelle-video"
|
||||
version = "0.1.2"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "certifi" },
|
||||
{ name = "comfykit" },
|
||||
{ name = "edge-tts" },
|
||||
@@ -1680,6 +1694,7 @@ dev = [
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "beautifulsoup4", specifier = ">=4.14.2" },
|
||||
{ name = "certifi", specifier = ">=2025.10.5" },
|
||||
{ name = "comfykit", specifier = ">=0.1.9" },
|
||||
{ name = "edge-tts", specifier = ">=7.2.3" },
|
||||
@@ -2461,6 +2476,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sse-starlette"
|
||||
version = "3.0.3"
|
||||
|
||||
246
web/app.py
246
web/app.py
@@ -684,13 +684,41 @@ def main():
|
||||
st.markdown(f"🔗 [{tr('template.preview_link')}]({template_docs_url})")
|
||||
|
||||
# Import template utilities
|
||||
from pixelle_video.utils.template_util import get_templates_grouped_by_size
|
||||
from pixelle_video.utils.template_util import get_templates_grouped_by_size_and_type, get_template_type
|
||||
|
||||
# Get templates grouped by size
|
||||
grouped_templates = get_templates_grouped_by_size()
|
||||
# Template type selector
|
||||
st.markdown(f"**{tr('template.type_selector')}**")
|
||||
|
||||
template_type_options = {
|
||||
'static': tr('template.type.static'),
|
||||
'image': tr('template.type.image'),
|
||||
'video': tr('template.type.video')
|
||||
}
|
||||
|
||||
# Radio buttons in horizontal layout
|
||||
selected_template_type = st.radio(
|
||||
tr('template.type_selector'),
|
||||
options=list(template_type_options.keys()),
|
||||
format_func=lambda x: template_type_options[x],
|
||||
index=1, # Default to 'image'
|
||||
key="template_type_selector",
|
||||
label_visibility="collapsed",
|
||||
horizontal=True
|
||||
)
|
||||
|
||||
# Display hint based on selected type (below radio buttons)
|
||||
if selected_template_type == 'static':
|
||||
st.info(tr('template.type.static_hint'))
|
||||
elif selected_template_type == 'image':
|
||||
st.info(tr('template.type.image_hint'))
|
||||
elif selected_template_type == 'video':
|
||||
st.info(tr('template.type.video_hint'))
|
||||
|
||||
# Get templates grouped by size, filtered by selected type
|
||||
grouped_templates = get_templates_grouped_by_size_and_type(selected_template_type)
|
||||
|
||||
if not grouped_templates:
|
||||
st.error("No templates found. Please ensure templates are in templates/ directory with proper structure (e.g., templates/1080x1920/default.html).")
|
||||
st.warning(f"No {template_type_options[selected_template_type]} templates found. Please select a different type or add templates.")
|
||||
st.stop()
|
||||
|
||||
# Build display options with group separators
|
||||
@@ -707,7 +735,19 @@ def main():
|
||||
|
||||
# Get default template from config
|
||||
template_config = pixelle_video.config.get("template", {})
|
||||
config_default_template = template_config.get("default_template", "1080x1920/default.html")
|
||||
config_default_template = template_config.get("default_template", "1080x1920/image_default.html")
|
||||
|
||||
# Backward compatibility
|
||||
if config_default_template == "1080x1920/default.html":
|
||||
config_default_template = "1080x1920/image_default.html"
|
||||
|
||||
# Determine type-specific default template
|
||||
type_default_templates = {
|
||||
'static': '1080x1920/static_default.html',
|
||||
'image': '1080x1920/image_default.html',
|
||||
'video': '1080x1920/video_default.html'
|
||||
}
|
||||
type_specific_default = type_default_templates.get(selected_template_type, config_default_template)
|
||||
|
||||
for size, templates in grouped_templates.items():
|
||||
if not templates:
|
||||
@@ -733,10 +773,12 @@ def main():
|
||||
display_options.append(display_name)
|
||||
template_paths_ordered.append(t.template_path) # Add to ordered list
|
||||
|
||||
# Set default based on config (priority: config > first default.html in portrait)
|
||||
# Set default: priority is config > type-specific default > first in portrait
|
||||
if t.template_path == config_default_template:
|
||||
default_index = current_index
|
||||
elif default_index == 0 and "default.html" in t.display_info.name and t.display_info.orientation == 'portrait':
|
||||
elif default_index == 0 and t.template_path == type_specific_default:
|
||||
default_index = current_index
|
||||
elif default_index == 0 and t.display_info.orientation == 'portrait':
|
||||
default_index = current_index
|
||||
|
||||
current_index += 1
|
||||
@@ -782,10 +824,25 @@ def main():
|
||||
generator_for_params = HTMLFrameGenerator(template_path_for_params)
|
||||
custom_params_for_video = generator_for_params.parse_template_parameters()
|
||||
|
||||
# Detect if template requires image generation
|
||||
template_requires_image = generator_for_params.requires_image()
|
||||
# Store in session state for Image Section to read
|
||||
st.session_state['template_requires_image'] = template_requires_image
|
||||
# Get media size from template (for image/video generation)
|
||||
media_width, media_height = generator_for_params.get_media_size()
|
||||
st.session_state['template_media_width'] = media_width
|
||||
st.session_state['template_media_height'] = media_height
|
||||
|
||||
# Detect template media type
|
||||
from pathlib import Path
|
||||
from pixelle_video.utils.template_util import get_template_type
|
||||
|
||||
template_name = Path(frame_template).name
|
||||
template_media_type = get_template_type(template_name)
|
||||
template_requires_media = (template_media_type in ["image", "video"])
|
||||
|
||||
# Store in session state for workflow filtering
|
||||
st.session_state['template_media_type'] = template_media_type
|
||||
st.session_state['template_requires_media'] = template_requires_media
|
||||
|
||||
# Backward compatibility
|
||||
st.session_state['template_requires_image'] = (template_media_type == "image")
|
||||
|
||||
custom_values_for_video = {}
|
||||
if custom_params_for_video:
|
||||
@@ -928,25 +985,51 @@ def main():
|
||||
logger.exception(e)
|
||||
|
||||
# ====================================================================
|
||||
# Image Generation Section (conditional based on template)
|
||||
# Media Generation Section (conditional based on template)
|
||||
# ====================================================================
|
||||
# Check if current template requires image generation
|
||||
if st.session_state.get('template_requires_image', True):
|
||||
# Template requires images - show full Image Section
|
||||
# Check if current template requires media generation
|
||||
template_media_type = st.session_state.get('template_media_type', 'image')
|
||||
template_requires_media = st.session_state.get('template_requires_media', True)
|
||||
|
||||
if template_requires_media:
|
||||
# Template requires media - show Media Generation Section
|
||||
with st.container(border=True):
|
||||
st.markdown(f"**{tr('section.image')}**")
|
||||
# Dynamic section title based on template type
|
||||
if template_media_type == "video":
|
||||
section_title = tr('section.video')
|
||||
else:
|
||||
section_title = tr('section.image')
|
||||
|
||||
st.markdown(f"**{section_title}**")
|
||||
|
||||
# 1. ComfyUI Workflow selection
|
||||
with st.expander(tr("help.feature_description"), expanded=False):
|
||||
st.markdown(f"**{tr('help.what')}**")
|
||||
st.markdown(tr("style.workflow_what"))
|
||||
if template_media_type == "video":
|
||||
st.markdown(tr('style.video_workflow_what'))
|
||||
else:
|
||||
st.markdown(tr("style.workflow_what"))
|
||||
st.markdown(f"**{tr('help.how')}**")
|
||||
st.markdown(tr("style.workflow_how"))
|
||||
if template_media_type == "video":
|
||||
st.markdown(tr('style.video_workflow_how'))
|
||||
else:
|
||||
st.markdown(tr("style.workflow_how"))
|
||||
st.markdown(f"**{tr('help.note')}**")
|
||||
st.markdown(tr("style.image_size_note"))
|
||||
if template_media_type == "video":
|
||||
st.markdown(tr('style.video_size_note'))
|
||||
else:
|
||||
st.markdown(tr("style.image_size_note"))
|
||||
|
||||
# Get available workflows from pixelle_video (with source info)
|
||||
workflows = pixelle_video.image.list_workflows()
|
||||
# Get available workflows and filter by template type
|
||||
all_workflows = pixelle_video.media.list_workflows()
|
||||
|
||||
# Filter workflows based on template media type
|
||||
if template_media_type == "video":
|
||||
# Only show video_ workflows
|
||||
workflows = [wf for wf in all_workflows if "video_" in wf["key"].lower()]
|
||||
else:
|
||||
# Only show image_ workflows (exclude video_)
|
||||
workflows = [wf for wf in all_workflows if "video_" not in wf["key"].lower()]
|
||||
|
||||
# Build options for selectbox
|
||||
# Display: "image_flux.json - Runninghub"
|
||||
@@ -959,7 +1042,9 @@ def main():
|
||||
|
||||
# If user has a saved preference in config, try to match it
|
||||
comfyui_config = config_manager.get_comfyui_config()
|
||||
saved_workflow = comfyui_config["image"]["default_workflow"]
|
||||
# Select config based on template type (image or video)
|
||||
media_config_key = "video" if template_media_type == "video" else "image"
|
||||
saved_workflow = comfyui_config.get(media_config_key, {}).get("default_workflow", "")
|
||||
if saved_workflow and saved_workflow in workflow_keys:
|
||||
default_workflow_index = workflow_keys.index(saved_workflow)
|
||||
|
||||
@@ -978,31 +1063,20 @@ def main():
|
||||
else:
|
||||
workflow_key = "runninghub/image_flux.json" # fallback
|
||||
|
||||
# Get media size from template
|
||||
image_width = st.session_state.get('template_media_width', 1024)
|
||||
image_height = st.session_state.get('template_media_height', 1024)
|
||||
|
||||
# Display media size info (read-only)
|
||||
if template_media_type == "video":
|
||||
size_info_text = tr('style.video_size_info', width=image_width, height=image_height)
|
||||
else:
|
||||
size_info_text = tr('style.image_size_info', width=image_width, height=image_height)
|
||||
st.info(f"📐 {size_info_text}")
|
||||
|
||||
# 2. Image size input
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
image_width = st.number_input(
|
||||
tr('style.image_width'),
|
||||
min_value=128,
|
||||
value=1024,
|
||||
step=1,
|
||||
label_visibility="visible",
|
||||
help=tr('style.image_width_help')
|
||||
)
|
||||
with col2:
|
||||
image_height = st.number_input(
|
||||
tr('style.image_height'),
|
||||
min_value=128,
|
||||
value=1024,
|
||||
step=1,
|
||||
label_visibility="visible",
|
||||
help=tr('style.image_height_help')
|
||||
)
|
||||
|
||||
# 3. Prompt prefix input
|
||||
# Get current prompt_prefix from config
|
||||
current_prefix = comfyui_config["image"]["prompt_prefix"]
|
||||
# Prompt prefix input
|
||||
# Get current prompt_prefix from config (based on media type)
|
||||
current_prefix = comfyui_config.get(media_config_key, {}).get("prompt_prefix", "")
|
||||
|
||||
# Prompt prefix input (temporary, not saved to config)
|
||||
prompt_prefix = st.text_area(
|
||||
@@ -1014,54 +1088,71 @@ def main():
|
||||
help=tr("style.prompt_prefix_help")
|
||||
)
|
||||
|
||||
# Style preview expander (similar to template preview)
|
||||
with st.expander(tr("style.preview_title"), expanded=False):
|
||||
# Media preview expander
|
||||
preview_title = tr("style.video_preview_title") if template_media_type == "video" else tr("style.preview_title")
|
||||
with st.expander(preview_title, expanded=False):
|
||||
# Test prompt input
|
||||
if template_media_type == "video":
|
||||
test_prompt_label = tr("style.test_video_prompt")
|
||||
test_prompt_value = "a dog running in the park"
|
||||
else:
|
||||
test_prompt_label = tr("style.test_prompt")
|
||||
test_prompt_value = "a dog"
|
||||
|
||||
test_prompt = st.text_input(
|
||||
tr("style.test_prompt"),
|
||||
value="a dog",
|
||||
test_prompt_label,
|
||||
value=test_prompt_value,
|
||||
help=tr("style.test_prompt_help"),
|
||||
key="style_test_prompt"
|
||||
)
|
||||
|
||||
# Preview button
|
||||
if st.button(tr("style.preview"), key="preview_style", use_container_width=True):
|
||||
with st.spinner(tr("style.previewing")):
|
||||
preview_button_label = tr("style.video_preview") if template_media_type == "video" else tr("style.preview")
|
||||
if st.button(preview_button_label, key="preview_style", use_container_width=True):
|
||||
previewing_text = tr("style.video_previewing") if template_media_type == "video" else tr("style.previewing")
|
||||
with st.spinner(previewing_text):
|
||||
try:
|
||||
from pixelle_video.utils.prompt_helper import build_image_prompt
|
||||
|
||||
# Build final prompt with prefix
|
||||
final_prompt = build_image_prompt(test_prompt, prompt_prefix)
|
||||
|
||||
# Generate preview image (use user-specified size)
|
||||
preview_image_path = run_async(pixelle_video.image(
|
||||
# Generate preview media (use user-specified size and media type)
|
||||
media_result = run_async(pixelle_video.media(
|
||||
prompt=final_prompt,
|
||||
workflow=workflow_key,
|
||||
media_type=template_media_type,
|
||||
width=int(image_width),
|
||||
height=int(image_height)
|
||||
))
|
||||
preview_media_path = media_result.url
|
||||
|
||||
# Display preview (support both URL and local path)
|
||||
if preview_image_path:
|
||||
st.success(tr("style.preview_success"))
|
||||
if preview_media_path:
|
||||
success_text = tr("style.video_preview_success") if template_media_type == "video" else tr("style.preview_success")
|
||||
st.success(success_text)
|
||||
|
||||
# Read and encode image
|
||||
if preview_image_path.startswith('http'):
|
||||
# URL - use directly
|
||||
img_html = f'<div class="preview-image"><img src="{preview_image_path}" alt="Style Preview"/></div>'
|
||||
if template_media_type == "video":
|
||||
# Display video
|
||||
st.video(preview_media_path)
|
||||
else:
|
||||
# Local file - encode as base64
|
||||
with open(preview_image_path, 'rb') as f:
|
||||
img_data = base64.b64encode(f.read()).decode()
|
||||
img_html = f'<div class="preview-image"><img src="data:image/png;base64,{img_data}" alt="Style Preview"/></div>'
|
||||
|
||||
st.markdown(img_html, unsafe_allow_html=True)
|
||||
# Display image
|
||||
if preview_media_path.startswith('http'):
|
||||
# URL - use directly
|
||||
img_html = f'<div class="preview-image"><img src="{preview_media_path}" alt="Style Preview"/></div>'
|
||||
else:
|
||||
# Local file - encode as base64
|
||||
with open(preview_media_path, 'rb') as f:
|
||||
img_data = base64.b64encode(f.read()).decode()
|
||||
img_html = f'<div class="preview-image"><img src="data:image/png;base64,{img_data}" alt="Style Preview"/></div>'
|
||||
|
||||
st.markdown(img_html, unsafe_allow_html=True)
|
||||
|
||||
# Show the final prompt used
|
||||
st.info(f"**{tr('style.final_prompt_label')}**\n{final_prompt}")
|
||||
|
||||
# Show file path
|
||||
st.caption(f"📁 {preview_image_path}")
|
||||
st.caption(f"📁 {preview_media_path}")
|
||||
else:
|
||||
st.error(tr("style.preview_failed_general"))
|
||||
except Exception as e:
|
||||
@@ -1076,10 +1167,12 @@ def main():
|
||||
st.info("ℹ️ " + tr("image.not_required"))
|
||||
st.caption(tr("image.not_required_hint"))
|
||||
|
||||
# Get media size from template (even though not used, for consistency)
|
||||
image_width = st.session_state.get('template_media_width', 1024)
|
||||
image_height = st.session_state.get('template_media_height', 1024)
|
||||
|
||||
# Set default values for later use
|
||||
workflow_key = None
|
||||
image_width = 1024
|
||||
image_height = 1024
|
||||
prompt_prefix = ""
|
||||
|
||||
|
||||
@@ -1149,14 +1242,13 @@ def main():
|
||||
progress_bar.progress(min(int(event.progress * 100), 99)) # Cap at 99% until complete
|
||||
|
||||
# Generate video (directly pass parameters)
|
||||
# Note: image_width and image_height are now auto-determined from template
|
||||
video_params = {
|
||||
"text": text,
|
||||
"mode": mode,
|
||||
"title": title if title else None,
|
||||
"n_scenes": n_scenes,
|
||||
"image_workflow": workflow_key,
|
||||
"image_width": int(image_width),
|
||||
"image_height": int(image_height),
|
||||
"frame_template": frame_template,
|
||||
"prompt_prefix": prompt_prefix,
|
||||
"bgm_path": bgm_path,
|
||||
@@ -1211,6 +1303,18 @@ def main():
|
||||
# Video preview
|
||||
if os.path.exists(result.video_path):
|
||||
st.video(result.video_path)
|
||||
|
||||
# Download button
|
||||
with open(result.video_path, "rb") as video_file:
|
||||
video_bytes = video_file.read()
|
||||
video_filename = os.path.basename(result.video_path)
|
||||
st.download_button(
|
||||
label="⬇️ 下载视频" if get_language() == "zh_CN" else "⬇️ Download Video",
|
||||
data=video_bytes,
|
||||
file_name=video_filename,
|
||||
mime="video/mp4",
|
||||
use_container_width=True
|
||||
)
|
||||
else:
|
||||
st.error(tr("status.video_not_found", path=result.video_path))
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
"section.bgm": "🎵 Background Music",
|
||||
"section.tts": "🎤 Voiceover",
|
||||
"section.image": "🎨 Image Generation",
|
||||
"section.video": "🎬 Video Generation",
|
||||
"section.media": "🎨 Media Generation",
|
||||
"section.template": "📐 Storyboard Template",
|
||||
"section.video_generation": "🎬 Generate Video",
|
||||
|
||||
@@ -45,12 +47,10 @@
|
||||
"style.workflow": "Workflow Selection",
|
||||
"style.workflow_what": "Determines how each frame's illustration is generated and its effect (e.g., using FLUX, SD models)",
|
||||
"style.workflow_how": "Place the exported image_xxx.json workflow file(API format) into the workflows/selfhost/ folder (for local ComfyUI) or the workflows/runninghub/ folder (for cloud)",
|
||||
"style.image_size": "Image Size",
|
||||
"style.image_width": "Width",
|
||||
"style.image_height": "Height",
|
||||
"style.image_width_help": "Width of AI-generated images (Note: This is the image size, not the final video size. Video size is determined by the template)",
|
||||
"style.image_height_help": "Height of AI-generated images (Note: This is the image size, not the final video size. Video size is determined by the template)",
|
||||
"style.image_size_note": "Image size controls the dimensions of AI-generated illustrations, and does not affect the final video size. Video size is determined by the Storyboard Template below.",
|
||||
"style.video_workflow_what": "Determines how each frame's video clip is generated and its effect (e.g., using different video generation models)",
|
||||
"style.video_workflow_how": "Place the exported video_xxx.json workflow file(API format) into the workflows/selfhost/ folder (for local ComfyUI) or the workflows/runninghub/ folder (for cloud)",
|
||||
"style.image_size_info": "Image Size: {width}x{height} (auto-determined by template)",
|
||||
"style.video_size_info": "Video Size: {width}x{height} (auto-determined by template)",
|
||||
"style.prompt_prefix": "Prompt Prefix",
|
||||
"style.prompt_prefix_what": "Automatically added before all image prompts to control the illustration style uniformly (e.g., cartoon, realistic)",
|
||||
"style.prompt_prefix_how": "Enter style description in the input box below. To save permanently, edit the config.yaml file",
|
||||
@@ -60,11 +60,16 @@
|
||||
"style.description": "Style Description",
|
||||
"style.description_placeholder": "Describe the illustration style you want (any language)...",
|
||||
"style.preview_title": "Preview Style",
|
||||
"style.video_preview_title": "Preview Video",
|
||||
"style.test_prompt": "Test Prompt",
|
||||
"style.test_video_prompt": "Test Video Prompt",
|
||||
"style.test_prompt_help": "Enter test prompt to preview style effect",
|
||||
"style.preview": "🖼️ Generate Preview",
|
||||
"style.video_preview": "🎬 Generate Video Preview",
|
||||
"style.previewing": "Generating style preview...",
|
||||
"style.video_previewing": "Generating video preview...",
|
||||
"style.preview_success": "✅ Preview generated successfully!",
|
||||
"style.video_preview_success": "✅ Video preview generated successfully!",
|
||||
"style.preview_caption": "Style Preview",
|
||||
"style.preview_failed": "Preview failed: {error}",
|
||||
"style.preview_failed_general": "Failed to generate preview image",
|
||||
@@ -81,8 +86,15 @@
|
||||
"template.modern": "Modern",
|
||||
"template.neon": "Neon",
|
||||
"template.what": "Controls the visual layout and design style of each frame (title, text, image arrangement)",
|
||||
"template.how": "Place .html template files in templates/SIZE/ directories (e.g., templates/1080x1920/). Templates are automatically grouped by size. Custom CSS styles are supported.\n\n**Note**\n\nAt least one of the following browsers must be installed on your computer for proper operation:\n1. Google Chrome (Windows, macOS)\n2. Chromium Browser (Linux)\n3. Microsoft Edge",
|
||||
"template.how": "Place .html template files in templates/SIZE/ directories (e.g., templates/1080x1920/). Templates are automatically grouped by size. Custom CSS styles are supported.\n\n**Template Naming Convention**\n\n- `static_*.html` → Static style templates (no AI-generated media)\n- `image_*.html` → Image generation templates (AI-generated images)\n- `video_*.html` → Video generation templates (AI-generated videos)\n\n**Note**\n\nAt least one of the following browsers must be installed on your computer for proper operation:\n1. Google Chrome (Windows, macOS)\n2. Chromium Browser (Linux)\n3. Microsoft Edge",
|
||||
"template.size_info": "Template Size",
|
||||
"template.type_selector": "Template Type",
|
||||
"template.type.static": "📄 Static Style",
|
||||
"template.type.image": "🖼️ Generate Images",
|
||||
"template.type.video": "🎬 Generate Videos",
|
||||
"template.type.static_hint": "Uses template's built-in styles, no AI-generated media required. You can customize background images and other parameters in the template.",
|
||||
"template.type.image_hint": "AI automatically generates illustrations matching the narration content. Image size is determined by the template.",
|
||||
"template.type.video_hint": "AI automatically generates video clips matching the narration content. Video size is determined by the template.",
|
||||
|
||||
"orientation.portrait": "Portrait",
|
||||
"orientation.landscape": "Landscape",
|
||||
@@ -140,12 +152,16 @@
|
||||
"progress.generating_narrations": "Generating narrations...",
|
||||
"progress.splitting_script": "Splitting script...",
|
||||
"progress.generating_image_prompts": "Generating image prompts...",
|
||||
"progress.generating_video_prompts": "Generating video prompts...",
|
||||
"progress.preparing_frames": "Preparing frames...",
|
||||
"progress.frame": "Frame {current}/{total}",
|
||||
"progress.frame_step": "Frame {current}/{total} - Step {step}/4: {action}",
|
||||
"progress.step_audio": "Generating audio...",
|
||||
"progress.step_image": "Generating image...",
|
||||
"progress.step_compose": "Composing frame...",
|
||||
"progress.step_video": "Creating video segment...",
|
||||
"progress.processing_frame": "Processing frame {current}/{total}...",
|
||||
"progress.step_audio": "Generating audio",
|
||||
"progress.step_image": "Generating image",
|
||||
"progress.step_media": "Generating media",
|
||||
"progress.step_compose": "Composing frame",
|
||||
"progress.step_video": "Creating video segment",
|
||||
"progress.concatenating": "Concatenating video...",
|
||||
"progress.finalizing": "Finalizing...",
|
||||
"progress.completed": "✅ Completed",
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
"section.bgm": "🎵 背景音乐",
|
||||
"section.tts": "🎤 配音合成",
|
||||
"section.image": "🎨 插图生成",
|
||||
"section.video": "🎬 视频生成",
|
||||
"section.media": "🎨 媒体生成",
|
||||
"section.template": "📐 分镜模板",
|
||||
"section.video_generation": "🎬 生成视频",
|
||||
|
||||
@@ -45,12 +47,10 @@
|
||||
"style.workflow": "工作流选择",
|
||||
"style.workflow_what": "决定视频中每帧插图的生成方式和效果(如使用 FLUX、SD 等模型)",
|
||||
"style.workflow_how": "将导出的 image_xxx.json 工作流文件(API格式)放入 workflows/selfhost/(本地 ComfyUI)或 workflows/runninghub/(云端)文件夹",
|
||||
"style.image_size": "图片尺寸",
|
||||
"style.image_width": "宽度",
|
||||
"style.image_height": "高度",
|
||||
"style.image_width_help": "AI 生成插图的宽度(注意:这是插图尺寸,不是最终视频尺寸。视频尺寸由模板决定)",
|
||||
"style.image_height_help": "AI 生成插图的高度(注意:这是插图尺寸,不是最终视频尺寸。视频尺寸由模板决定)",
|
||||
"style.image_size_note": "图片尺寸控制 AI 生成的插图大小,不影响最终视频尺寸。视频尺寸由下方的「📐 分镜模板」决定。",
|
||||
"style.video_workflow_what": "决定视频中每帧视频片段的生成方式和效果(如使用不同的视频生成模型)",
|
||||
"style.video_workflow_how": "将导出的 video_xxx.json 工作流文件(API格式)放入 workflows/selfhost/(本地 ComfyUI)或 workflows/runninghub/(云端)文件夹",
|
||||
"style.image_size_info": "插图尺寸:{width}x{height}(由模板自动决定)",
|
||||
"style.video_size_info": "视频尺寸:{width}x{height}(由模板自动决定)",
|
||||
"style.prompt_prefix": "提示词前缀",
|
||||
"style.prompt_prefix_what": "自动添加到所有图片提示词前面,统一控制插图风格(如:卡通风格、写实风格等)",
|
||||
"style.prompt_prefix_how": "直接在下方输入框填写风格描述。若要永久保存,需编辑 config.yaml 文件",
|
||||
@@ -60,11 +60,16 @@
|
||||
"style.description": "风格描述",
|
||||
"style.description_placeholder": "描述您想要的插图风格(任何语言)...",
|
||||
"style.preview_title": "预览风格",
|
||||
"style.video_preview_title": "预览视频",
|
||||
"style.test_prompt": "测试提示词",
|
||||
"style.test_video_prompt": "测试视频提示词",
|
||||
"style.test_prompt_help": "输入测试提示词来预览风格效果",
|
||||
"style.preview": "🖼️ 生成预览",
|
||||
"style.video_preview": "🎬 生成视频预览",
|
||||
"style.previewing": "正在生成风格预览...",
|
||||
"style.video_previewing": "正在生成视频预览...",
|
||||
"style.preview_success": "✅ 预览生成成功!",
|
||||
"style.video_preview_success": "✅ 视频预览生成成功!",
|
||||
"style.preview_caption": "风格预览",
|
||||
"style.preview_failed": "预览失败:{error}",
|
||||
"style.preview_failed_general": "预览图片生成失败",
|
||||
@@ -81,8 +86,15 @@
|
||||
"template.modern": "现代",
|
||||
"template.neon": "霓虹",
|
||||
"template.what": "控制视频每一帧的视觉布局和设计风格(标题、文本、图片的排版样式)",
|
||||
"template.how": "将 .html 模板文件放入 templates/尺寸/ 目录(如 templates/1080x1920/),系统会自动按尺寸分组。支持自定义 CSS 样式。\n\n**注意**\n\n您的计算机上必须安装以下至少一种浏览器才能正常运行:\n1. Google Chrome(Windows、MacOS)\n2. Chromium 浏览器(Linux)\n3. Microsoft Edge",
|
||||
"template.how": "将 .html 模板文件放入 templates/尺寸/ 目录(如 templates/1080x1920/),系统会自动按尺寸分组。支持自定义 CSS 样式。\n\n**模板命名规范**\n\n- `static_*.html` → 静态样式模板(无需AI生成媒体)\n- `image_*.html` → 生成插图模板(AI生成图片)\n- `video_*.html` → 生成视频模板(AI生成视频)\n\n**注意**\n\n您的计算机上必须安装以下至少一种浏览器才能正常运行:\n1. Google Chrome(Windows、MacOS)\n2. Chromium 浏览器(Linux)\n3. Microsoft Edge",
|
||||
"template.size_info": "模板尺寸",
|
||||
"template.type_selector": "分镜类型",
|
||||
"template.type.static": "📄 静态样式",
|
||||
"template.type.image": "🖼️ 生成插图",
|
||||
"template.type.video": "🎬 生成视频",
|
||||
"template.type.static_hint": "使用模板自带样式,无需AI生成媒体。可在模板中自定义背景图片等参数。",
|
||||
"template.type.image_hint": "AI自动根据文案内容生成与之匹配的插图,插图尺寸由模板决定。",
|
||||
"template.type.video_hint": "AI自动根据文案内容生成与之匹配的视频片段,视频尺寸由模板决定。",
|
||||
|
||||
"orientation.portrait": "竖屏",
|
||||
"orientation.landscape": "横屏",
|
||||
@@ -140,12 +152,16 @@
|
||||
"progress.generating_narrations": "生成旁白...",
|
||||
"progress.splitting_script": "切分脚本...",
|
||||
"progress.generating_image_prompts": "生成图片提示词...",
|
||||
"progress.generating_video_prompts": "生成视频提示词...",
|
||||
"progress.preparing_frames": "准备分镜...",
|
||||
"progress.frame": "分镜 {current}/{total}",
|
||||
"progress.frame_step": "分镜 {current}/{total} - 步骤 {step}/4: {action}",
|
||||
"progress.step_audio": "生成语音...",
|
||||
"progress.step_image": "生成插图...",
|
||||
"progress.step_compose": "合成画面...",
|
||||
"progress.step_video": "创建视频片段...",
|
||||
"progress.processing_frame": "处理分镜 {current}/{total}...",
|
||||
"progress.step_audio": "生成语音",
|
||||
"progress.step_image": "生成插图",
|
||||
"progress.step_media": "生成媒体",
|
||||
"progress.step_compose": "合成画面",
|
||||
"progress.step_video": "创建视频片段",
|
||||
"progress.concatenating": "正在拼接视频...",
|
||||
"progress.finalizing": "完成中...",
|
||||
"progress.completed": "✅ 生成完成",
|
||||
|
||||
5
workflows/runninghub/video_wan2.1_fusionx.json
Normal file
5
workflows/runninghub/video_wan2.1_fusionx.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"source": "runninghub",
|
||||
"workflow_id": "1985909483975188481"
|
||||
}
|
||||
|
||||
187
workflows/selfhost/video_wan2.1_fusionx.json
Normal file
187
workflows/selfhost/video_wan2.1_fusionx.json
Normal file
@@ -0,0 +1,187 @@
|
||||
{
|
||||
"3": {
|
||||
"inputs": {
|
||||
"seed": 576600626757621,
|
||||
"steps": 10,
|
||||
"cfg": 1,
|
||||
"sampler_name": "uni_pc",
|
||||
"scheduler": "normal",
|
||||
"denoise": 1,
|
||||
"model": [
|
||||
"48",
|
||||
0
|
||||
],
|
||||
"positive": [
|
||||
"6",
|
||||
0
|
||||
],
|
||||
"negative": [
|
||||
"7",
|
||||
0
|
||||
],
|
||||
"latent_image": [
|
||||
"40",
|
||||
0
|
||||
]
|
||||
},
|
||||
"class_type": "KSampler",
|
||||
"_meta": {
|
||||
"title": "KSampler"
|
||||
}
|
||||
},
|
||||
"6": {
|
||||
"inputs": {
|
||||
"text": [
|
||||
"49",
|
||||
0
|
||||
],
|
||||
"clip": [
|
||||
"38",
|
||||
0
|
||||
]
|
||||
},
|
||||
"class_type": "CLIPTextEncode",
|
||||
"_meta": {
|
||||
"title": "CLIP Text Encode (Positive Prompt)"
|
||||
}
|
||||
},
|
||||
"7": {
|
||||
"inputs": {
|
||||
"text": "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
|
||||
"clip": [
|
||||
"38",
|
||||
0
|
||||
]
|
||||
},
|
||||
"class_type": "CLIPTextEncode",
|
||||
"_meta": {
|
||||
"title": "CLIP Text Encode (Negative Prompt)"
|
||||
}
|
||||
},
|
||||
"8": {
|
||||
"inputs": {
|
||||
"samples": [
|
||||
"3",
|
||||
0
|
||||
],
|
||||
"vae": [
|
||||
"39",
|
||||
0
|
||||
]
|
||||
},
|
||||
"class_type": "VAEDecode",
|
||||
"_meta": {
|
||||
"title": "VAE Decode"
|
||||
}
|
||||
},
|
||||
"30": {
|
||||
"inputs": {
|
||||
"frame_rate": 16,
|
||||
"loop_count": 0,
|
||||
"filename_prefix": "Video",
|
||||
"format": "video/h264-mp4",
|
||||
"pix_fmt": "yuv420p",
|
||||
"crf": 19,
|
||||
"save_metadata": true,
|
||||
"trim_to_audio": false,
|
||||
"pingpong": false,
|
||||
"save_output": true,
|
||||
"images": [
|
||||
"8",
|
||||
0
|
||||
]
|
||||
},
|
||||
"class_type": "VHS_VideoCombine",
|
||||
"_meta": {
|
||||
"title": "Video Combine 🎥🅥🅗🅢"
|
||||
}
|
||||
},
|
||||
"37": {
|
||||
"inputs": {
|
||||
"unet_name": "wan-fusionx/WanT2V_MasterModel.safetensors",
|
||||
"weight_dtype": "default"
|
||||
},
|
||||
"class_type": "UNETLoader",
|
||||
"_meta": {
|
||||
"title": "Load Diffusion Model"
|
||||
}
|
||||
},
|
||||
"38": {
|
||||
"inputs": {
|
||||
"clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
|
||||
"type": "wan",
|
||||
"device": "default"
|
||||
},
|
||||
"class_type": "CLIPLoader",
|
||||
"_meta": {
|
||||
"title": "Load CLIP"
|
||||
}
|
||||
},
|
||||
"39": {
|
||||
"inputs": {
|
||||
"vae_name": "wan_2.1_vae.safetensors"
|
||||
},
|
||||
"class_type": "VAELoader",
|
||||
"_meta": {
|
||||
"title": "Load VAE"
|
||||
}
|
||||
},
|
||||
"40": {
|
||||
"inputs": {
|
||||
"width": [
|
||||
"50",
|
||||
0
|
||||
],
|
||||
"height": [
|
||||
"51",
|
||||
0
|
||||
],
|
||||
"length": 81,
|
||||
"batch_size": 1
|
||||
},
|
||||
"class_type": "EmptyHunyuanLatentVideo",
|
||||
"_meta": {
|
||||
"title": "EmptyHunyuanLatentVideo"
|
||||
}
|
||||
},
|
||||
"48": {
|
||||
"inputs": {
|
||||
"shift": 1,
|
||||
"model": [
|
||||
"37",
|
||||
0
|
||||
]
|
||||
},
|
||||
"class_type": "ModelSamplingSD3",
|
||||
"_meta": {
|
||||
"title": "Shift"
|
||||
}
|
||||
},
|
||||
"49": {
|
||||
"inputs": {
|
||||
"value": "草地上有个小狗在奔跑"
|
||||
},
|
||||
"class_type": "PrimitiveStringMultiline",
|
||||
"_meta": {
|
||||
"title": "$prompt.value!"
|
||||
}
|
||||
},
|
||||
"50": {
|
||||
"inputs": {
|
||||
"value": 512
|
||||
},
|
||||
"class_type": "easy int",
|
||||
"_meta": {
|
||||
"title": "$width.value"
|
||||
}
|
||||
},
|
||||
"51": {
|
||||
"inputs": {
|
||||
"value": 288
|
||||
},
|
||||
"class_type": "easy int",
|
||||
"_meta": {
|
||||
"title": "$height.value"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user