101 lines
4.7 KiB
YAML
101 lines
4.7 KiB
YAML
# Pixelle-Video Configuration
|
|
# Copy this file to config.yaml and fill in your settings
|
|
# ⚠️ Never commit config.yaml to Git!
|
|
|
|
project_name: Pixelle-Video
|
|
|
|
# ==================== LLM Configuration ====================
|
|
# Supports any OpenAI SDK compatible API
|
|
llm:
|
|
api_key: ""
|
|
base_url: ""
|
|
model: ""
|
|
|
|
# Popular presets:
|
|
# Qwen Max: base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1" model: "qwen-max"
|
|
# OpenAI GPT-4o: base_url: "https://api.openai.com/v1" model: "gpt-4o"
|
|
# DeepSeek: base_url: "https://api.deepseek.com" model: "deepseek-chat"
|
|
# Ollama (Local): base_url: "http://localhost:11434/v1" model: "llama3.2"
|
|
|
|
# ==================== VLM Configuration (Vision Language Model) ====================
|
|
# Used for character analysis and image understanding
|
|
# If not configured, will try to use LLM config with vision model auto-detection
|
|
vlm:
|
|
provider: "qwen" # Options: qwen, glm, openai
|
|
api_key: "" # Leave empty to use DASHSCOPE_API_KEY or VLM_API_KEY env var
|
|
base_url: "" # Leave empty for auto-detection based on provider
|
|
model: "" # Leave empty for default model based on provider
|
|
|
|
# VLM Provider presets:
|
|
# Qwen (通义千问): provider: "qwen" model: "qwen-vl-plus" or "qwen-vl-max" or "qwen3-vl-plus"
|
|
# GLM (智谱): provider: "glm" model: "glm-4v-flash" or "glm-4v"
|
|
# OpenAI: provider: "openai" model: "gpt-4-vision-preview" or "gpt-4o"
|
|
|
|
# ==================== ComfyUI Configuration ====================
|
|
comfyui:
|
|
# Global ComfyUI settings
|
|
comfyui_url: http://127.0.0.1:8188 # ComfyUI server URL (required for selfhost workflows)
|
|
comfyui_api_key: "" # ComfyUI API key (optional, get from https://platform.comfy.org/profile/api-keys)
|
|
# Note for Docker users: Use host.docker.internal:8188 (Mac/Windows) or host IP address (Linux)
|
|
runninghub_api_key: "" # RunningHub API key (required for runninghub workflows)
|
|
runninghub_concurrent_limit: 1 # Concurrent execution limit for RunningHub (1-10, default 1 for regular members)
|
|
|
|
# TTS-specific configuration
|
|
tts:
|
|
default_workflow: selfhost/tts_edge.json # TTS workflow to use
|
|
|
|
# Image-specific configuration
|
|
image:
|
|
# Required: Default workflow to use (no fallback)
|
|
# Options: runninghub/image_flux.json (recommended, no local setup)
|
|
# selfhost/image_flux.json (requires local ComfyUI)
|
|
default_workflow: runninghub/image_flux.json
|
|
|
|
# Image prompt prefix (optional)
|
|
prompt_prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style"
|
|
|
|
# Video-specific configuration
|
|
video:
|
|
# Required: Default workflow to use (no fallback)
|
|
# Options: runninghub/video_wan2.1_fusionx.json (recommended, no local setup)
|
|
# selfhost/video_wan2.1_fusionx.json (requires local ComfyUI)
|
|
default_workflow: runninghub/video_wan2.1_fusionx.json
|
|
|
|
# Video prompt prefix (optional)
|
|
prompt_prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style"
|
|
|
|
# ==================== Template Configuration ====================
|
|
# Configure default template for video generation
|
|
template:
|
|
# Default frame template to use when not explicitly specified
|
|
# Determines video aspect ratio and layout style
|
|
# Template naming convention:
|
|
# - static_*.html: Static style templates (no AI-generated media)
|
|
# - image_*.html: Templates requiring AI-generated images
|
|
# - video_*.html: Templates requiring AI-generated videos
|
|
# Options:
|
|
# - 1080x1920 (vertical/portrait): image_default.html, image_modern.html, image_elegant.html, static_simple.html, etc.
|
|
# - 1080x1080 (square): image_minimal_framed.html, etc.
|
|
# - 1920x1080 (horizontal/landscape): image_film.html, image_full.html, etc.
|
|
# See templates/ directory for all available templates
|
|
default_template: "1080x1920/image_default.html"
|
|
|
|
# ==================== Quality Control Configuration ====================
|
|
# Configure quality evaluation for generated content
|
|
quality:
|
|
# Enable quality checking (set to false to skip all quality checks)
|
|
enable_quality_check: true
|
|
|
|
# Hybrid evaluation settings
|
|
hybrid:
|
|
enable_clip_score: true # Use CLIP for image-text matching
|
|
clip_model: "ViT-B/32" # CLIP model variant
|
|
enable_technical_metrics: true # Use technical quality metrics
|
|
enable_smart_skip: true # Skip VLM when objective scores are good
|
|
smart_skip_threshold: 0.75 # Threshold for smart skip
|
|
|
|
# Character consistency settings
|
|
character:
|
|
enable_visual_features: true # Enable CLIP visual features for characters
|
|
visual_similarity_threshold: 0.75 # Min similarity for character consistency
|