Add Video Learning Agent for short video platforms

Features:
- VideoLearningAgent for automated video watching on Douyin/Kuaishou/TikTok
- Web dashboard UI for video learning sessions
- Real-time progress tracking with screenshot capture
- App detection using get_current_app() for accurate recording
- Session management with pause/resume/stop controls

Technical improvements:
- Simplified video detection logic using direct app detection
- Full base64 hash for sensitive screenshot change detection
- Immediate stop when target video count is reached
- Fixed circular import issues with ModelConfig

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
let5sne.win10
2026-01-09 22:54:57 +08:00
parent 3552df23d6
commit 5b3f214e20
15 changed files with 2317 additions and 1 deletions

View File

@@ -0,0 +1,161 @@
"""
Video Learning Agent Demo
This script demonstrates how to use the VideoLearningAgent to watch
and learn from short video platforms like Douyin.
Usage:
python examples/video_learning_demo.py --device-id <device_id> --count 10
"""
import os
import sys
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from phone_agent.model.client import ModelConfig
from phone_agent.video_learning import VideoLearningAgent
def main():
"""Main demo function."""
# Load configuration from environment
base_url = os.getenv("MODEL_BASE_URL", "http://localhost:8000/v1")
api_key = os.getenv("MODEL_API_KEY", "your-api-key")
model_name = os.getenv("MODEL_NAME", "autoglm-phone-9b")
# Configuration
device_id = os.getenv("DEVICE_ID", "emulator-5554")
target_count = int(os.getenv("TARGET_COUNT", "10"))
watch_duration = float(os.getenv("WATCH_DURATION", "3.0"))
category = os.getenv("CATEGORY", None) # e.g., "美食", "旅行", "搞笑"
print("=" * 60)
print("Video Learning Agent Demo")
print("=" * 60)
print(f"Device: {device_id}")
print(f"Platform: Douyin")
print(f"Target videos: {target_count}")
print(f"Watch duration: {watch_duration}s per video")
if category:
print(f"Category filter: {category}")
print("=" * 60)
# Create agent
model_config = ModelConfig(
base_url=base_url,
model_name=model_name,
api_key=api_key,
lang="cn",
)
agent = VideoLearningAgent(
model_config=model_config,
platform="douyin",
output_dir="./video_learning_data",
)
# Setup callbacks
def on_video_watched(record):
print(f"\n[Video {record.sequence_id}] Watched!")
if record.description:
print(f" Description: {record.description}")
if record.likes:
print(f" Likes: {record.likes}")
print(f" Screenshot: {record.screenshot_path}")
def on_progress_update(current, total):
percent = (current / total * 100) if total > 0 else 0
print(f"\nProgress: {current}/{total} ({percent:.1f}%)")
def on_session_complete(session):
print("\n" + "=" * 60)
print("Session Complete!")
print("=" * 60)
print(f"Total videos watched: {session.total_videos}")
print(f"Total duration: {session.total_duration:.1f}s")
print(f"Data saved to: ./video_learning_data/{session.session_id}.json")
agent.on_video_watched = on_video_watched
agent.on_progress_update = on_progress_update
agent.on_session_complete = on_session_complete
# Start session
session_id = agent.start_session(
device_id=device_id,
target_count=target_count,
category=category,
watch_duration=watch_duration,
)
print(f"\nSession started: {session_id}")
print("Starting video watching task...\n")
# Construct the task
if category:
task = f"""
请帮我学习抖音上的"{category}"类视频。具体任务如下:
1. 打开抖音应用
2. 搜索"{category}"
3. 开始观看视频,每个视频观看约{watch_duration}
4. 记录每个视频的描述、点赞数、评论数等信息
5. 滑动到下一个视频
6. 重复步骤3-5直到观看完{target_count}个视频
请按照以下格式记录每个视频:
- 视频序号
- 描述文案(屏幕上的文字)
- 点赞数(如果有显示)
- 评论数(如果有显示)
- 截图
每个视频观看时,请等待{watch_duration}秒后再滑动到下一个。
"""
else:
task = f"""
请帮我学习抖音上的推荐视频。具体任务如下:
1. 打开抖音应用
2. 在推荐页开始观看视频,每个视频观看约{watch_duration}
3. 记录每个视频的描述、点赞数、评论数等信息
4. 向上滑动到下一个视频
5. 重复步骤3-4直到观看完{target_count}个视频
请按照以下格式记录每个视频:
- 视频序号
- 描述文案(屏幕上的文字)
- 点赞数(如果有显示)
- 评论数(如果有显示)
- 截图
每个视频观看时,请等待{watch_duration}秒后再滑动到下一个。
"""
# Run the task
success = agent.run_learning_task(task)
if success:
print("\n✓ Learning task completed successfully!")
# Export data
json_file = agent.export_data("json")
print(f"✓ Data exported to: {json_file}")
csv_file = agent.export_data("csv")
print(f"✓ Data exported to: {csv_file}")
else:
print("\n✗ Learning task failed")
print("\nSession progress:")
progress = agent.get_session_progress()
for key, value in progress.items():
print(f" {key}: {value}")
if __name__ == "__main__":
main()