Add Video Learning Agent for short video platforms
Features: - VideoLearningAgent for automated video watching on Douyin/Kuaishou/TikTok - Web dashboard UI for video learning sessions - Real-time progress tracking with screenshot capture - App detection using get_current_app() for accurate recording - Session management with pause/resume/stop controls Technical improvements: - Simplified video detection logic using direct app detection - Full base64 hash for sensitive screenshot change detection - Immediate stop when target video count is reached - Fixed circular import issues with ModelConfig Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
161
examples/video_learning_demo.py
Normal file
161
examples/video_learning_demo.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""
|
||||
Video Learning Agent Demo
|
||||
|
||||
This script demonstrates how to use the VideoLearningAgent to watch
|
||||
and learn from short video platforms like Douyin.
|
||||
|
||||
Usage:
|
||||
python examples/video_learning_demo.py --device-id <device_id> --count 10
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from phone_agent.model.client import ModelConfig
|
||||
from phone_agent.video_learning import VideoLearningAgent
|
||||
|
||||
|
||||
def main():
|
||||
"""Main demo function."""
|
||||
|
||||
# Load configuration from environment
|
||||
base_url = os.getenv("MODEL_BASE_URL", "http://localhost:8000/v1")
|
||||
api_key = os.getenv("MODEL_API_KEY", "your-api-key")
|
||||
model_name = os.getenv("MODEL_NAME", "autoglm-phone-9b")
|
||||
|
||||
# Configuration
|
||||
device_id = os.getenv("DEVICE_ID", "emulator-5554")
|
||||
target_count = int(os.getenv("TARGET_COUNT", "10"))
|
||||
watch_duration = float(os.getenv("WATCH_DURATION", "3.0"))
|
||||
category = os.getenv("CATEGORY", None) # e.g., "美食", "旅行", "搞笑"
|
||||
|
||||
print("=" * 60)
|
||||
print("Video Learning Agent Demo")
|
||||
print("=" * 60)
|
||||
print(f"Device: {device_id}")
|
||||
print(f"Platform: Douyin")
|
||||
print(f"Target videos: {target_count}")
|
||||
print(f"Watch duration: {watch_duration}s per video")
|
||||
if category:
|
||||
print(f"Category filter: {category}")
|
||||
print("=" * 60)
|
||||
|
||||
# Create agent
|
||||
model_config = ModelConfig(
|
||||
base_url=base_url,
|
||||
model_name=model_name,
|
||||
api_key=api_key,
|
||||
lang="cn",
|
||||
)
|
||||
|
||||
agent = VideoLearningAgent(
|
||||
model_config=model_config,
|
||||
platform="douyin",
|
||||
output_dir="./video_learning_data",
|
||||
)
|
||||
|
||||
# Setup callbacks
|
||||
def on_video_watched(record):
|
||||
print(f"\n[Video {record.sequence_id}] Watched!")
|
||||
if record.description:
|
||||
print(f" Description: {record.description}")
|
||||
if record.likes:
|
||||
print(f" Likes: {record.likes}")
|
||||
print(f" Screenshot: {record.screenshot_path}")
|
||||
|
||||
def on_progress_update(current, total):
|
||||
percent = (current / total * 100) if total > 0 else 0
|
||||
print(f"\nProgress: {current}/{total} ({percent:.1f}%)")
|
||||
|
||||
def on_session_complete(session):
|
||||
print("\n" + "=" * 60)
|
||||
print("Session Complete!")
|
||||
print("=" * 60)
|
||||
print(f"Total videos watched: {session.total_videos}")
|
||||
print(f"Total duration: {session.total_duration:.1f}s")
|
||||
print(f"Data saved to: ./video_learning_data/{session.session_id}.json")
|
||||
|
||||
agent.on_video_watched = on_video_watched
|
||||
agent.on_progress_update = on_progress_update
|
||||
agent.on_session_complete = on_session_complete
|
||||
|
||||
# Start session
|
||||
session_id = agent.start_session(
|
||||
device_id=device_id,
|
||||
target_count=target_count,
|
||||
category=category,
|
||||
watch_duration=watch_duration,
|
||||
)
|
||||
|
||||
print(f"\nSession started: {session_id}")
|
||||
print("Starting video watching task...\n")
|
||||
|
||||
# Construct the task
|
||||
if category:
|
||||
task = f"""
|
||||
请帮我学习抖音上的"{category}"类视频。具体任务如下:
|
||||
|
||||
1. 打开抖音应用
|
||||
2. 搜索"{category}"
|
||||
3. 开始观看视频,每个视频观看约{watch_duration}秒
|
||||
4. 记录每个视频的描述、点赞数、评论数等信息
|
||||
5. 滑动到下一个视频
|
||||
6. 重复步骤3-5,直到观看完{target_count}个视频
|
||||
|
||||
请按照以下格式记录每个视频:
|
||||
- 视频序号
|
||||
- 描述文案(屏幕上的文字)
|
||||
- 点赞数(如果有显示)
|
||||
- 评论数(如果有显示)
|
||||
- 截图
|
||||
|
||||
每个视频观看时,请等待{watch_duration}秒后再滑动到下一个。
|
||||
"""
|
||||
else:
|
||||
task = f"""
|
||||
请帮我学习抖音上的推荐视频。具体任务如下:
|
||||
|
||||
1. 打开抖音应用
|
||||
2. 在推荐页开始观看视频,每个视频观看约{watch_duration}秒
|
||||
3. 记录每个视频的描述、点赞数、评论数等信息
|
||||
4. 向上滑动到下一个视频
|
||||
5. 重复步骤3-4,直到观看完{target_count}个视频
|
||||
|
||||
请按照以下格式记录每个视频:
|
||||
- 视频序号
|
||||
- 描述文案(屏幕上的文字)
|
||||
- 点赞数(如果有显示)
|
||||
- 评论数(如果有显示)
|
||||
- 截图
|
||||
|
||||
每个视频观看时,请等待{watch_duration}秒后再滑动到下一个。
|
||||
"""
|
||||
|
||||
# Run the task
|
||||
success = agent.run_learning_task(task)
|
||||
|
||||
if success:
|
||||
print("\n✓ Learning task completed successfully!")
|
||||
|
||||
# Export data
|
||||
json_file = agent.export_data("json")
|
||||
print(f"✓ Data exported to: {json_file}")
|
||||
|
||||
csv_file = agent.export_data("csv")
|
||||
print(f"✓ Data exported to: {csv_file}")
|
||||
|
||||
else:
|
||||
print("\n✗ Learning task failed")
|
||||
|
||||
print("\nSession progress:")
|
||||
progress = agent.get_session_progress()
|
||||
for key, value in progress.items():
|
||||
print(f" {key}: {value}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user