feat: Add VLMConfig to schema for proper config.yaml VLM support

2026-01-07 09:38:45 +08:00
parent bc077475c6
commit f19804facb
3 changed files with 19 additions and 8 deletions
--- a/pixelle_video/config/init.py
+++ b/pixelle_video/config/init.py
@@ -29,7 +29,7 @@ Usage:
    if config_manager.validate():
        print("Config is valid!")
 """
-from .schema import PixelleVideoConfig, LLMConfig, ComfyUIConfig, TTSSubConfig, ImageSubConfig, VideoSubConfig
+from .schema import PixelleVideoConfig, LLMConfig, VLMConfig, ComfyUIConfig, TTSSubConfig, ImageSubConfig, VideoSubConfig
 from .manager import ConfigManager
 from .loader import load_config_dict, save_config_dict
@@ -38,7 +38,8 @@ config_manager = ConfigManager()
 __all__ = [
    "PixelleVideoConfig",
-    "LLMConfig", 
+    "LLMConfig",
    "VLMConfig",
    "ComfyUIConfig",
    "TTSSubConfig",
    "ImageSubConfig",
--- a/pixelle_video/config/schema.py
+++ b/pixelle_video/config/schema.py
@@ -26,6 +26,14 @@ class LLMConfig(BaseModel):
    model: str = Field(default="", description="LLM Model Name")
 class VLMConfig(BaseModel):
    """VLM (Vision Language Model) configuration for character analysis"""
    provider: str = Field(default="qwen", description="VLM provider: qwen, glm, openai")
    api_key: str = Field(default="", description="VLM API Key")
    base_url: str = Field(default="", description="VLM API Base URL (auto-detected if empty)")
    model: str = Field(default="", description="VLM Model Name (defaults based on provider)")
 class TTSLocalConfig(BaseModel):
    """Local TTS configuration (Edge TTS)"""
    voice: str = Field(default="zh-CN-YunjianNeural", description="Edge TTS voice ID")
@@ -92,6 +100,7 @@ class PixelleVideoConfig(BaseModel):
    """Pixelle-Video main configuration"""
    project_name: str = Field(default="Pixelle-Video", description="Project name")
    llm: LLMConfig = Field(default_factory=LLMConfig)
    vlm: VLMConfig = Field(default_factory=VLMConfig)
    comfyui: ComfyUIConfig = Field(default_factory=ComfyUIConfig)
    template: TemplateConfig = Field(default_factory=TemplateConfig)
--- a/pixelle_video/services/quality/character_analyzer.py
+++ b/pixelle_video/services/quality/character_analyzer.py
@@ -149,13 +149,14 @@ Output ONLY the JSON object, no additional text."""
            # Priority: Environment variables > config.yaml > defaults
            from pixelle_video.config import config_manager
-            # Try to get VLM config from config.yaml
+            # VLM config from config.yaml (now part of PixelleVideoConfig)
-            vlm_config = getattr(config_manager.config, 'vlm', None)
+            vlm_config = config_manager.config.vlm
-            vlm_provider = os.getenv("VLM_PROVIDER") or (vlm_config.provider if vlm_config and hasattr(vlm_config, 'provider') else "qwen")
+            # Environment variables override config.yaml
-            vlm_api_key = os.getenv("VLM_API_KEY") or os.getenv("DASHSCOPE_API_KEY") or (vlm_config.api_key if vlm_config and hasattr(vlm_config, 'api_key') else None)
+            vlm_provider = os.getenv("VLM_PROVIDER") or vlm_config.provider or "qwen"
-            vlm_base_url = os.getenv("VLM_BASE_URL") or (vlm_config.base_url if vlm_config and hasattr(vlm_config, 'base_url') else None)
+            vlm_api_key = os.getenv("VLM_API_KEY") or os.getenv("DASHSCOPE_API_KEY") or vlm_config.api_key
-            vlm_model = os.getenv("VLM_MODEL") or (vlm_config.model if vlm_config and hasattr(vlm_config, 'model') else None)
+            vlm_base_url = os.getenv("VLM_BASE_URL") or vlm_config.base_url
            vlm_model = os.getenv("VLM_MODEL") or vlm_config.model
            # Configure based on provider
            if vlm_provider == "qwen":