fix: Enhance VLM response parsing to handle markdown code blocks

2026-01-07 03:31:42 +08:00
parent 44249889df
commit 8c35b0066f
1 changed files with 35 additions and 8 deletions
--- a/pixelle_video/services/quality/character_analyzer.py
+++ b/pixelle_video/services/quality/character_analyzer.py
@@ -174,13 +174,33 @@ Output ONLY the JSON object, no additional text."""
    def _parse_response(self, response: str) -> CharacterAnalysisResult:
        """Parse VLM response into CharacterAnalysisResult"""
        if not response:
            logger.warning("Empty VLM response")
            return CharacterAnalysisResult()
        # Log full response for debugging
        logger.debug(f"Full VLM response:\n{response}")
        try:
            # Remove markdown code blocks if present
            cleaned = response.strip()
            if cleaned.startswith("```json"):
                cleaned = cleaned[7:]
            elif cleaned.startswith("```"):
                cleaned = cleaned[3:]
            if cleaned.endswith("```"):
                cleaned = cleaned[:-3]
            cleaned = cleaned.strip()
            # Try to extract JSON from response
-            match = re.search(r'\{[\s\S]*\}', response)
+            match = re.search(r'\{[\s\S]*\}', cleaned)
            if match:
-                data = json.loads(match.group())
+                json_str = match.group()
                logger.debug(f"Extracted JSON: {json_str[:200]}...")
                data = json.loads(json_str)
            else:
-                data = json.loads(response)
+                logger.warning(f"No JSON found in response, trying direct parse")
                data = json.loads(cleaned)
            result = CharacterAnalysisResult(
                appearance_description=data.get("appearance_description", ""),
@@ -193,11 +213,18 @@ Output ONLY the JSON object, no additional text."""
        except (json.JSONDecodeError, KeyError) as e:
            logger.warning(f"Failed to parse VLM response: {e}")
            logger.debug(f"Response that failed to parse: {response[:500]}")
-            # Try to use the raw response as appearance description
+            # Try to use the raw response as appearance description (fallback)
-            if len(response) < 500 and len(response) > 20:
+            if response and 20 < len(response) < 500:
-                return CharacterAnalysisResult(
+                # Clean up the response
-                    appearance_description=response.strip()
+                fallback = response.strip()
-                )
+                if "```" in fallback:
                    fallback = re.sub(r'```.*?```', '', fallback, flags=re.DOTALL).strip()
                if fallback:
                    logger.info(f"Using raw response as appearance: {fallback[:80]}...")
                    return CharacterAnalysisResult(
                        appearance_description=fallback
                    )
            return CharacterAnalysisResult()