diff --git a/phone_agent/video_learning.py b/phone_agent/video_learning.py index caa6245..0bfa636 100644 --- a/phone_agent/video_learning.py +++ b/phone_agent/video_learning.py @@ -104,20 +104,13 @@ class LearningSession: class ScreenshotAnalyzer: """分析视频截图,提取内容信息""" - ANALYSIS_PROMPT = """分析这张短视频截图,提取以下信息并以JSON格式返回: -{ - "description": "视频描述文案(屏幕上显示的文字,如果有的话)", - "likes": 点赞数(纯数字,如12000,没有则为null), - "comments": 评论数(纯数字,没有则为null), - "shares": 分享数(纯数字,没有则为null), - "tags": ["标签1", "标签2"], - "category": "视频类型(美食/旅行/搞笑/知识/生活/音乐/舞蹈/其他)", - "elements": ["画面中的主要元素,如:人物、食物、风景等"] -} -注意: -1. 只返回JSON,不要其他文字 -2. 数字不要带单位,如"1.2万"应转为12000 -3. 如果无法识别某项,设为null或空数组""" + ANALYSIS_PROMPT = """分析这张短视频截图,只返回JSON,不要任何其他文字。 + +格式:{"description":"描述","likes":数字,"comments":数字,"tags":["标签"],"category":"类型","elements":["元素"]} + +示例:{"description":"美食探店","likes":12000,"comments":500,"tags":["美食"],"category":"美食","elements":["食物"]} + +注意:数字如"1.2万"转为12000,无法识别则用null。只返回JSON:""" def __init__(self, model_config: ModelConfig): """初始化分析器""" @@ -151,13 +144,18 @@ class ScreenshotAnalyzer: """解析 VLM 返回的 JSON 结果""" import re + # 调试日志 + print(f"[ScreenshotAnalyzer] Raw response: {text[:200]}...") + # 尝试提取 JSON json_match = re.search(r'\{[\s\S]*\}', text) if not json_match: + print("[ScreenshotAnalyzer] No JSON found in response") return {} try: result = json.loads(json_match.group()) + print(f"[ScreenshotAnalyzer] Parsed: {result}") # 确保数字字段是整数 for field in ['likes', 'comments', 'shares']: if field in result and result[field] is not None: @@ -166,7 +164,8 @@ class ScreenshotAnalyzer: except (ValueError, TypeError): result[field] = None return result - except json.JSONDecodeError: + except json.JSONDecodeError as e: + print(f"[ScreenshotAnalyzer] JSON parse error: {e}") return {}