feat: Add VLM config to config.example.yaml with config.yaml support

2026-01-07 09:32:16 +08:00
parent 92183b083b
commit 9675b9c23b
2 changed files with 25 additions and 5 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -17,6 +17,20 @@ llm:
 # DeepSeek:        base_url: "https://api.deepseek.com"                           model: "deepseek-chat"
 # Ollama (Local):  base_url: "http://localhost:11434/v1"                          model: "llama3.2"

+# ==================== VLM Configuration (Vision Language Model) ====================
+# Used for character analysis and image understanding
+# If not configured, will try to use LLM config with vision model auto-detection
+vlm:
+  provider: "qwen"  # Options: qwen, glm, openai
+  api_key: ""       # Leave empty to use DASHSCOPE_API_KEY or VLM_API_KEY env var
+  base_url: ""      # Leave empty for auto-detection based on provider
+  model: ""         # Leave empty for default model based on provider
+
+# VLM Provider presets:
+# Qwen (通义千问):   provider: "qwen"   model: "qwen-vl-plus" or "qwen-vl-max" or "qwen3-vl-plus"
+# GLM (智谱):       provider: "glm"    model: "glm-4v-flash" or "glm-4v"
+# OpenAI:          provider: "openai" model: "gpt-4-vision-preview" or "gpt-4o"
+
 # ==================== ComfyUI Configuration ====================
 comfyui:
  # Global ComfyUI settings
--- a/pixelle_video/services/quality/character_analyzer.py
+++ b/pixelle_video/services/quality/character_analyzer.py
@@ -145,12 +145,18 @@ Output ONLY the JSON object, no additional text."""
                }
            ]
            
-            # Get VLM configuration from environment or fallback to LLM config
+            # Get VLM configuration
+            # Priority: Environment variables > config.yaml > defaults
            import os
-            vlm_provider = os.getenv("VLM_PROVIDER", "qwen")  # qwen, glm, openai
-            vlm_api_key = os.getenv("VLM_API_KEY") or os.getenv("DASHSCOPE_API_KEY")
-            vlm_base_url = os.getenv("VLM_BASE_URL")
-            vlm_model = os.getenv("VLM_MODEL")
+            from pixelle_video.config import config_manager
+            
+            # Try to get VLM config from config.yaml
+            vlm_config = getattr(config_manager.config, 'vlm', None)
+            
+            vlm_provider = os.getenv("VLM_PROVIDER") or (vlm_config.provider if vlm_config and hasattr(vlm_config, 'provider') else "qwen")
+            vlm_api_key = os.getenv("VLM_API_KEY") or os.getenv("DASHSCOPE_API_KEY") or (vlm_config.api_key if vlm_config and hasattr(vlm_config, 'api_key') else None)
+            vlm_base_url = os.getenv("VLM_BASE_URL") or (vlm_config.base_url if vlm_config and hasattr(vlm_config, 'base_url') else None)
+            vlm_model = os.getenv("VLM_MODEL") or (vlm_config.model if vlm_config and hasattr(vlm_config, 'model') else None)
            
            # Configure based on provider
            if vlm_provider == "qwen":