From 8d82cf91d54d0cf2ce07cdc557fd402bb43a659f Mon Sep 17 00:00:00 2001
From: empty <let5sne.mac@gmail.com>
Date: Wed, 7 Jan 2026 03:33:56 +0800
Subject: [PATCH] fix: Auto-detect and use GLM-4V vision model for character
 analysis

---
 .../services/quality/character_analyzer.py     | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/pixelle_video/services/quality/character_analyzer.py b/pixelle_video/services/quality/character_analyzer.py
index bd9249a..db906fa 100644
--- a/pixelle_video/services/quality/character_analyzer.py
+++ b/pixelle_video/services/quality/character_analyzer.py
@@ -155,15 +155,27 @@ Output ONLY the JSON object, no additional text."""
                 base_url=llm_config.base_url
             )
             
+            # Use vision model - GLM-4V for ZhiPu, or fall back to configured model
+            # Vision models: glm-4v, glm-4v-flash, gpt-4-vision-preview
+            vision_model = llm_config.model
+            if "glm" in llm_config.model.lower() and "v" not in llm_config.model.lower():
+                vision_model = "glm-4v-flash"  # Use GLM-4V for vision tasks
+            logger.info(f"Using vision model: {vision_model}")
+            
             # Call VLM
             response = await client.chat.completions.create(
-                model=llm_config.model,
+                model=vision_model,
                 messages=messages,
                 temperature=0.3,
                 max_tokens=800
             )
-            vlm_response = response.choices[0].message.content
-            logger.debug(f"VLM character analysis response: {vlm_response[:150]}...")
+            
+            vlm_response = response.choices[0].message.content if response.choices else None
+            
+            if vlm_response:
+                logger.debug(f"VLM character analysis response: {vlm_response[:150] if len(vlm_response) > 150 else vlm_response}...")
+            else:
+                logger.warning(f"VLM returned empty content. Full response: {response}")
             
             # Parse response
             return self._parse_response(vlm_response)