Improve screenshot analysis prompt and add debug logs

- Simplified prompt to force JSON-only response - Added debug logs to track VLM response and parsing - Better error messages for troubleshooting Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-09 23:37:35 +08:00
parent c4325d57d4
commit a823c03788
1 changed files with 14 additions and 15 deletions
--- a/phone_agent/video_learning.py
+++ b/phone_agent/video_learning.py
@@ -104,20 +104,13 @@ class LearningSession:
 class ScreenshotAnalyzer:
    """分析视频截图，提取内容信息"""

-    ANALYSIS_PROMPT = """分析这张短视频截图，提取以下信息并以JSON格式返回：
-{
-    "description": "视频描述文案（屏幕上显示的文字，如果有的话）",
-    "likes": 点赞数（纯数字，如12000，没有则为null）,
-    "comments": 评论数（纯数字，没有则为null）,
-    "shares": 分享数（纯数字，没有则为null）,
-    "tags": ["标签1", "标签2"],
-    "category": "视频类型（美食/旅行/搞笑/知识/生活/音乐/舞蹈/其他）",
-    "elements": ["画面中的主要元素，如：人物、食物、风景等"]
-}
-注意：
-1. 只返回JSON，不要其他文字
-2. 数字不要带单位，如"1.2万"应转为12000
-3. 如果无法识别某项，设为null或空数组"""
+    ANALYSIS_PROMPT = """分析这张短视频截图，只返回JSON，不要任何其他文字。
+
+格式：{"description":"描述","likes":数字,"comments":数字,"tags":["标签"],"category":"类型","elements":["元素"]}
+
+示例：{"description":"美食探店","likes":12000,"comments":500,"tags":["美食"],"category":"美食","elements":["食物"]}
+
+注意：数字如"1.2万"转为12000，无法识别则用null。只返回JSON："""

    def __init__(self, model_config: ModelConfig):
        """初始化分析器"""
@@ -151,13 +144,18 @@ class ScreenshotAnalyzer:
        """解析 VLM 返回的 JSON 结果"""
        import re

+        # 调试日志
+        print(f"[ScreenshotAnalyzer] Raw response: {text[:200]}...")
+
        # 尝试提取 JSON
        json_match = re.search(r'\{[\s\S]*\}', text)
        if not json_match:
+            print("[ScreenshotAnalyzer] No JSON found in response")
            return {}

        try:
            result = json.loads(json_match.group())
+            print(f"[ScreenshotAnalyzer] Parsed: {result}")
            # 确保数字字段是整数
            for field in ['likes', 'comments', 'shares']:
                if field in result and result[field] is not None:
@@ -166,7 +164,8 @@ class ScreenshotAnalyzer:
                    except (ValueError, TypeError):
                        result[field] = None
            return result
-        except json.JSONDecodeError:
+        except json.JSONDecodeError as e:
+            print(f"[ScreenshotAnalyzer] JSON parse error: {e}")
            return {}