Improve Video Learning Agent with action-based detection and analysis toggle

- Change video detection from screenshot hash to action-based (Swipe detection) - Add enable_analysis toggle to disable VLM screenshot analysis - Improve task prompt to prevent VLM from stopping prematurely - Add debug logging for action detection troubleshooting - Fix ModelResponse attribute error (content -> raw_content) Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-10 01:47:09 +08:00
parent 6b770832aa
commit b97d3f3a9f
4 changed files with 116 additions and 72 deletions
--- a/dashboard/api/video_learning.py
+++ b/dashboard/api/video_learning.py
@@ -26,6 +26,7 @@ class SessionCreateRequest(BaseModel):
    target_count: int = Field(10, description="Number of videos to watch", ge=1, le=100)
    category: Optional[str] = Field(None, description="Target category filter")
    watch_duration: float = Field(3.0, description="Watch duration per video (seconds)", ge=1.0, le=30.0)
+    enable_analysis: bool = Field(True, description="Enable VLM screenshot analysis")


 class SessionControlRequest(BaseModel):
@@ -100,6 +101,7 @@ async def create_session(
        model_config=model_config,
        platform=request.platform,
        output_dir=config.VIDEO_LEARNING_OUTPUT_DIR,
+        enable_analysis=request.enable_analysis,
    )

    # Setup callbacks for real-time updates
@@ -180,45 +182,53 @@ async def start_session(session_id: str) -> Dict[str, str]:
    app_name = info["name"]

    # Build clear task instructions
+    # 重要：不让 VLM 自己计数，而是持续执行直到被外部停止
+    # 这样可以确保 VideoLearningAgent 的计数与实际观看数一致
    if category:
-        task = f"""你是一个视频学习助手。请严格按照以下步骤执行：
+        task = f"""你是一个视频学习助手。你的任务是持续观看视频，不要自己判断任务是否完成。

-步骤1：启动应用
- 回到主屏幕
- 打开{app_name}应用
-
-步骤2：搜索内容
- 在{app_name}中搜索"{category}"
- 点击第一个搜索结果或进入相关页面
-
-步骤3：观看视频
- 观看视频，每个视频停留约{watch_duration}秒
- 记录视频的描述、点赞数、评论数
+【核心要求】你必须持续执行以下循环，直到系统主动停止你：
+- 观看视频约{watch_duration}秒
 - 向上滑动切换到下一个视频
- 重复观看和记录，直到完成{target_count}个视频
+- 继续观看
+- 再次滑动
+- 一直重复，不要停止

-步骤4：完成任务
- 完成观看{target_count}个视频后，总结所有视频信息
+执行步骤：
+1. 回到主屏幕，打开{app_name}应用
+2. 在{app_name}中搜索"{category}"
+3. 点击第一个搜索结果
+4. 开始无限循环：观看视频 → 向上滑动 → 观看下一个视频 → 再次滑动...

-请现在开始执行。"""
+【极其重要】
+- 绝对不要自己完成任务或停止
+- 绝对不要总结或说"完成了"
+- 只管滑动看视频，持续不断
+- 系统会在合适的时机主动停止你
+
+现在开始执行，持续滑动观看视频，不要停！"""
    else:
-        task = f"""你是一个视频学习助手。请严格按照以下步骤执行：
+        task = f"""你是一个视频学习助手。你的任务是持续观看视频，不要自己判断任务是否完成。

-步骤1：启动应用
- 回到主屏幕
- 打开{app_name}应用
-
-步骤2：观看推荐视频
- 进入{app_name}的推荐页面
- 观看推荐视频，每个视频停留约{watch_duration}秒
- 记录视频的描述、点赞数、评论数
+【核心要求】你必须持续执行以下循环，直到系统主动停止你：
+- 观看视频约{watch_duration}秒
 - 向上滑动切换到下一个视频
- 重复观看和记录，直到完成{target_count}个视频
+- 继续观看
+- 再次滑动
+- 一直重复，不要停止

-步骤3：完成任务
- 完成观看{target_count}个视频后，总结所有视频信息
+执行步骤：
+1. 回到主屏幕，打开{app_name}应用
+2. 进入推荐页面
+3. 开始无限循环：观看视频 → 向上滑动 → 观看下一个视频 → 再次滑动...

-请现在开始执行。"""
+【极其重要】
+- 绝对不要自己完成任务或停止
+- 绝对不要总结或说"完成了"
+- 只管滑动看视频，持续不断
+- 系统会在合适的时机主动停止你
+
+现在开始执行，持续滑动观看视频，不要停！"""

    # Run in background
    asyncio.create_task(asyncio.to_thread(agent.run_learning_task, task))
@@ -244,8 +254,8 @@ async def control_session(
        return {"session_id": session_id, "status": "resumed"}
    elif request.action == "stop":
        agent.stop_session()
-        # Remove from active sessions
-        del _active_sessions[session_id]
+        # Don't delete immediately - let status queries still work
+        # Session will be cleaned up when is_active becomes False
        return {"session_id": session_id, "status": "stopped"}
    else:
        raise HTTPException(status_code=400, detail=f"Invalid action: {request.action}")
--- a/dashboard/static/js/video-learning.js
+++ b/dashboard/static/js/video-learning.js
@@ -19,6 +19,7 @@ const VideoLearningModule = {
            targetCount = 10,
            category = null,
            watchDuration = 3.0,
+            enableAnalysis = true,
        } = options;

        try {
@@ -28,6 +29,7 @@ const VideoLearningModule = {
                target_count: targetCount,
                category: category,
                watch_duration: watchDuration,
+                enable_analysis: enableAnalysis,
            });

            this.currentSessionId = response.data.session_id;
--- a/dashboard/static/video-learning.html
+++ b/dashboard/static/video-learning.html
@@ -97,6 +97,14 @@
                        <small>Leave empty to watch recommended videos</small>
                    </div>

+                    <div class="form-group checkbox-group">
+                        <label>
+                            <input type="checkbox" v-model="config.enableAnalysis" :disabled="loading">
+                            <span>Enable Screenshot Analysis</span>
+                        </label>
+                        <small>Analyze video content using VLM to extract description, likes, comments, tags, etc.</small>
+                    </div>
+
                    <button @click="createAndStartSession" class="btn btn-primary" :disabled="loading || !config.deviceId">
                        <svg v-if="loading" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" class="spinning">
                            <path d="M21 12a9 9 0 1 1-6.219-8.56"></path>
@@ -269,6 +277,7 @@
                        targetCount: 10,
                        category: '',
                        watchDuration: 3.0,
+                        enableAnalysis: true,
                    },
                };
            },
@@ -304,6 +313,7 @@
                                targetCount: this.config.targetCount,
                                category: this.config.category || null,
                                watchDuration: this.config.watchDuration,
+                                enableAnalysis: this.config.enableAnalysis,
                            }
                        );

@@ -354,7 +364,12 @@

                    try {
                        await VideoLearningModule.controlSession(this.currentSessionId, 'stop');
-                        await this.updateSessionStatus();
+                        // Stop polling first
+                        VideoLearningModule.stopPolling();
+                        // Mark session as stopped in UI
+                        if (this.sessionStatus) {
+                            this.sessionStatus.is_active = false;
+                        }
                        this.showToast('Session stopped', 'info');
                    } catch (error) {
                        this.showToast('Failed to stop session', 'error');