AI-Video/config.example.yaml

# Pixelle-Video Configuration
# Copy this file to config.yaml and fill in your settings
# ⚠️ Never commit config.yaml to Git!

project_name: Pixelle-Video

# ==================== LLM Configuration ====================
# Supports any OpenAI SDK compatible API
llm:
  api_key: ""
  base_url: ""
  model: ""

# Popular presets:
# Qwen Max:        base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1"  model: "qwen-max"
# OpenAI GPT-4o:   base_url: "https://api.openai.com/v1"                          model: "gpt-4o"
# DeepSeek:        base_url: "https://api.deepseek.com"                           model: "deepseek-chat"
# Ollama (Local):  base_url: "http://localhost:11434/v1"                          model: "llama3.2"

# ==================== VLM Configuration (Vision Language Model) ====================
# Used for character analysis and image understanding
# If not configured, will try to use LLM config with vision model auto-detection
vlm:
  provider: "qwen"  # Options: qwen, glm, openai
  api_key: ""       # Leave empty to use DASHSCOPE_API_KEY or VLM_API_KEY env var
  base_url: ""      # Leave empty for auto-detection based on provider
  model: ""         # Leave empty for default model based on provider

# VLM Provider presets:
# Qwen (通义千问):   provider: "qwen"   model: "qwen-vl-plus" or "qwen-vl-max" or "qwen3-vl-plus"
# GLM (智谱):       provider: "glm"    model: "glm-4v-flash" or "glm-4v"
# OpenAI:          provider: "openai" model: "gpt-4-vision-preview" or "gpt-4o"

# ==================== ComfyUI Configuration ====================
comfyui:
  # Global ComfyUI settings
  comfyui_url: http://127.0.0.1:8188  # ComfyUI server URL (required for selfhost workflows)
  comfyui_api_key: ""  # ComfyUI API key (optional, get from https://platform.comfy.org/profile/api-keys)
  # Note for Docker users: Use host.docker.internal:8188 (Mac/Windows) or host IP address (Linux)
  runninghub_api_key: ""  # RunningHub API key (required for runninghub workflows)
  runninghub_concurrent_limit: 1  # Concurrent execution limit for RunningHub (1-10, default 1 for regular members)

  # TTS-specific configuration
  tts:
    default_workflow: selfhost/tts_edge.json  # TTS workflow to use

  # Image-specific configuration
  image:
    # Required: Default workflow to use (no fallback)
    # Options: runninghub/image_flux.json (recommended, no local setup)
    #          selfhost/image_flux.json (requires local ComfyUI)
    default_workflow: runninghub/image_flux.json

    # Image prompt prefix (optional)
    prompt_prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style"

  # Video-specific configuration
  video:
    # Required: Default workflow to use (no fallback)
    # Options: runninghub/video_wan2.1_fusionx.json (recommended, no local setup)
    #          selfhost/video_wan2.1_fusionx.json (requires local ComfyUI)
    default_workflow: runninghub/video_wan2.1_fusionx.json

    # Video prompt prefix (optional)
    prompt_prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style"

# ==================== Template Configuration ====================
# Configure default template for video generation
template:
  # Default frame template to use when not explicitly specified
  # Determines video aspect ratio and layout style
  # Template naming convention:
  #   - static_*.html: Static style templates (no AI-generated media)
  #   - image_*.html: Templates requiring AI-generated images
  #   - video_*.html: Templates requiring AI-generated videos
  # Options:
  #   - 1080x1920 (vertical/portrait): image_default.html, image_modern.html, image_elegant.html, static_simple.html, etc.
  #   - 1080x1080 (square): image_minimal_framed.html, etc.
  #   - 1920x1080 (horizontal/landscape): image_film.html, image_full.html, etc.
  # See templates/ directory for all available templates
  default_template: "1080x1920/image_default.html"

# ==================== Quality Control Configuration ====================
# Configure quality evaluation for generated content
quality:
  # Enable quality checking (set to false to skip all quality checks)
  enable_quality_check: true

  # Hybrid evaluation settings
  hybrid:
    enable_clip_score: true           # Use CLIP for image-text matching
    clip_model: "ViT-B/32"            # CLIP model variant
    enable_technical_metrics: true    # Use technical quality metrics
    enable_smart_skip: true           # Skip VLM when objective scores are good
    smart_skip_threshold: 0.75        # Threshold for smart skip

  # Character consistency settings
  character:
    enable_visual_features: true      # Enable CLIP visual features for characters
    visual_similarity_threshold: 0.75 # Min similarity for character consistency