feat: Add Qwen VL support for character analysis, configurable via VLM_PROVIDER

2026-01-07 09:29:43 +08:00
parent be216eacad
commit 92183b083b
1 changed files with 37 additions and 15 deletions
--- a/pixelle_video/services/quality/character_analyzer.py
+++ b/pixelle_video/services/quality/character_analyzer.py
@@ -145,29 +145,51 @@ Output ONLY the JSON object, no additional text."""
                }
            ]
-            # Get LLM config
+            # Get VLM configuration from environment or fallback to LLM config
-            from pixelle_video.config import config_manager
+            import os
-            llm_config = config_manager.config.llm
+            vlm_provider = os.getenv("VLM_PROVIDER", "qwen")  # qwen, glm, openai
            vlm_api_key = os.getenv("VLM_API_KEY") or os.getenv("DASHSCOPE_API_KEY")
            vlm_base_url = os.getenv("VLM_BASE_URL")
            vlm_model = os.getenv("VLM_MODEL")
-            # Create OpenAI client for VLM call
+            # Configure based on provider
            if vlm_provider == "qwen":
                # 通义千问 Qwen VL
                vlm_base_url = vlm_base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
                vlm_model = vlm_model or "qwen-vl-plus"  # or qwen-vl-max, qwen3-vl-plus
                logger.info(f"Using Qwen VL: model={vlm_model}")
            elif vlm_provider == "glm":
                # 智谱 GLM-4V
                from pixelle_video.config import config_manager
                llm_config = config_manager.config.llm
                vlm_api_key = vlm_api_key or llm_config.api_key
                vlm_base_url = vlm_base_url or llm_config.base_url
                vlm_model = vlm_model or "glm-4v-flash"
                logger.info(f"Using GLM VL: model={vlm_model}")
            else:  # openai or other
                from pixelle_video.config import config_manager
                llm_config = config_manager.config.llm
                vlm_api_key = vlm_api_key or llm_config.api_key
                vlm_base_url = vlm_base_url or llm_config.base_url
                vlm_model = vlm_model or llm_config.model
                logger.info(f"Using {vlm_provider} VL: model={vlm_model}")
            if not vlm_api_key:
                logger.error("No VLM API key configured. Set VLM_API_KEY or DASHSCOPE_API_KEY environment variable.")
                return CharacterAnalysisResult()
            # Create OpenAI-compatible client
            client = AsyncOpenAI(
-                api_key=llm_config.api_key,
+                api_key=vlm_api_key,
-                base_url=llm_config.base_url
+                base_url=vlm_base_url
            )
            # Use vision model - GLM-4V for ZhiPu, or fall back to configured model
            # Vision models: glm-4v, glm-4v-flash, gpt-4-vision-preview
            vision_model = llm_config.model
            if "glm" in llm_config.model.lower() and "v" not in llm_config.model.lower():
                vision_model = "glm-4v-flash"  # Use GLM-4V for vision tasks
            logger.info(f"Using vision model: {vision_model}")
            # Call VLM
            response = await client.chat.completions.create(
-                model=vision_model,
+                model=vlm_model,
                messages=messages,
                temperature=0.3,
-                max_tokens=2000  # Increased to avoid truncation
+                max_tokens=2000
            )
            vlm_response = response.choices[0].message.content if response.choices else None