From 8d82cf91d54d0cf2ce07cdc557fd402bb43a659f Mon Sep 17 00:00:00 2001 From: empty Date: Wed, 7 Jan 2026 03:33:56 +0800 Subject: [PATCH] fix: Auto-detect and use GLM-4V vision model for character analysis --- .../services/quality/character_analyzer.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pixelle_video/services/quality/character_analyzer.py b/pixelle_video/services/quality/character_analyzer.py index bd9249a..db906fa 100644 --- a/pixelle_video/services/quality/character_analyzer.py +++ b/pixelle_video/services/quality/character_analyzer.py @@ -155,15 +155,27 @@ Output ONLY the JSON object, no additional text.""" base_url=llm_config.base_url ) + # Use vision model - GLM-4V for ZhiPu, or fall back to configured model + # Vision models: glm-4v, glm-4v-flash, gpt-4-vision-preview + vision_model = llm_config.model + if "glm" in llm_config.model.lower() and "v" not in llm_config.model.lower(): + vision_model = "glm-4v-flash" # Use GLM-4V for vision tasks + logger.info(f"Using vision model: {vision_model}") + # Call VLM response = await client.chat.completions.create( - model=llm_config.model, + model=vision_model, messages=messages, temperature=0.3, max_tokens=800 ) - vlm_response = response.choices[0].message.content - logger.debug(f"VLM character analysis response: {vlm_response[:150]}...") + + vlm_response = response.choices[0].message.content if response.choices else None + + if vlm_response: + logger.debug(f"VLM character analysis response: {vlm_response[:150] if len(vlm_response) > 150 else vlm_response}...") + else: + logger.warning(f"VLM returned empty content. Full response: {response}") # Parse response return self._parse_response(vlm_response)