diff --git a/config.example.yaml b/config.example.yaml index 4a55a0a..71c1470 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -17,6 +17,20 @@ llm: # DeepSeek: base_url: "https://api.deepseek.com" model: "deepseek-chat" # Ollama (Local): base_url: "http://localhost:11434/v1" model: "llama3.2" +# ==================== VLM Configuration (Vision Language Model) ==================== +# Used for character analysis and image understanding +# If not configured, will try to use LLM config with vision model auto-detection +vlm: + provider: "qwen" # Options: qwen, glm, openai + api_key: "" # Leave empty to use DASHSCOPE_API_KEY or VLM_API_KEY env var + base_url: "" # Leave empty for auto-detection based on provider + model: "" # Leave empty for default model based on provider + +# VLM Provider presets: +# Qwen (通义千问): provider: "qwen" model: "qwen-vl-plus" or "qwen-vl-max" or "qwen3-vl-plus" +# GLM (智谱): provider: "glm" model: "glm-4v-flash" or "glm-4v" +# OpenAI: provider: "openai" model: "gpt-4-vision-preview" or "gpt-4o" + # ==================== ComfyUI Configuration ==================== comfyui: # Global ComfyUI settings diff --git a/pixelle_video/services/quality/character_analyzer.py b/pixelle_video/services/quality/character_analyzer.py index 349559e..4cf607e 100644 --- a/pixelle_video/services/quality/character_analyzer.py +++ b/pixelle_video/services/quality/character_analyzer.py @@ -145,12 +145,18 @@ Output ONLY the JSON object, no additional text.""" } ] - # Get VLM configuration from environment or fallback to LLM config + # Get VLM configuration + # Priority: Environment variables > config.yaml > defaults import os - vlm_provider = os.getenv("VLM_PROVIDER", "qwen") # qwen, glm, openai - vlm_api_key = os.getenv("VLM_API_KEY") or os.getenv("DASHSCOPE_API_KEY") - vlm_base_url = os.getenv("VLM_BASE_URL") - vlm_model = os.getenv("VLM_MODEL") + from pixelle_video.config import config_manager + + # Try to get VLM config from config.yaml + vlm_config = getattr(config_manager.config, 'vlm', None) + + vlm_provider = os.getenv("VLM_PROVIDER") or (vlm_config.provider if vlm_config and hasattr(vlm_config, 'provider') else "qwen") + vlm_api_key = os.getenv("VLM_API_KEY") or os.getenv("DASHSCOPE_API_KEY") or (vlm_config.api_key if vlm_config and hasattr(vlm_config, 'api_key') else None) + vlm_base_url = os.getenv("VLM_BASE_URL") or (vlm_config.base_url if vlm_config and hasattr(vlm_config, 'base_url') else None) + vlm_model = os.getenv("VLM_MODEL") or (vlm_config.model if vlm_config and hasattr(vlm_config, 'model') else None) # Configure based on provider if vlm_provider == "qwen":