fix: Auto-detect and use GLM-4V vision model for character analysis

2026-01-07 03:33:56 +08:00
parent 8c35b0066f
commit 8d82cf91d5
1 changed files with 15 additions and 3 deletions
--- a/pixelle_video/services/quality/character_analyzer.py
+++ b/pixelle_video/services/quality/character_analyzer.py
@@ -155,15 +155,27 @@ Output ONLY the JSON object, no additional text."""
                base_url=llm_config.base_url
            )
            
+            # Use vision model - GLM-4V for ZhiPu, or fall back to configured model
+            # Vision models: glm-4v, glm-4v-flash, gpt-4-vision-preview
+            vision_model = llm_config.model
+            if "glm" in llm_config.model.lower() and "v" not in llm_config.model.lower():
+                vision_model = "glm-4v-flash"  # Use GLM-4V for vision tasks
+            logger.info(f"Using vision model: {vision_model}")
+            
            # Call VLM
            response = await client.chat.completions.create(
-                model=llm_config.model,
+                model=vision_model,
                messages=messages,
                temperature=0.3,
                max_tokens=800
            )
-            vlm_response = response.choices[0].message.content
-            logger.debug(f"VLM character analysis response: {vlm_response[:150]}...")
+            
+            vlm_response = response.choices[0].message.content if response.choices else None
+            
+            if vlm_response:
+                logger.debug(f"VLM character analysis response: {vlm_response[:150] if len(vlm_response) > 150 else vlm_response}...")
+            else:
+                logger.warning(f"VLM returned empty content. Full response: {response}")
            
            # Parse response
            return self._parse_response(vlm_response)