Files
Open-AutoGLM/examples/video_learning_demo.py
let5sne.win10 5b3f214e20 Add Video Learning Agent for short video platforms
Features:
- VideoLearningAgent for automated video watching on Douyin/Kuaishou/TikTok
- Web dashboard UI for video learning sessions
- Real-time progress tracking with screenshot capture
- App detection using get_current_app() for accurate recording
- Session management with pause/resume/stop controls

Technical improvements:
- Simplified video detection logic using direct app detection
- Full base64 hash for sensitive screenshot change detection
- Immediate stop when target video count is reached
- Fixed circular import issues with ModelConfig

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-09 22:54:57 +08:00

162 lines
4.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Video Learning Agent Demo
This script demonstrates how to use the VideoLearningAgent to watch
and learn from short video platforms like Douyin.
Usage:
python examples/video_learning_demo.py --device-id <device_id> --count 10
"""
import os
import sys
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from phone_agent.model.client import ModelConfig
from phone_agent.video_learning import VideoLearningAgent
def main():
"""Main demo function."""
# Load configuration from environment
base_url = os.getenv("MODEL_BASE_URL", "http://localhost:8000/v1")
api_key = os.getenv("MODEL_API_KEY", "your-api-key")
model_name = os.getenv("MODEL_NAME", "autoglm-phone-9b")
# Configuration
device_id = os.getenv("DEVICE_ID", "emulator-5554")
target_count = int(os.getenv("TARGET_COUNT", "10"))
watch_duration = float(os.getenv("WATCH_DURATION", "3.0"))
category = os.getenv("CATEGORY", None) # e.g., "美食", "旅行", "搞笑"
print("=" * 60)
print("Video Learning Agent Demo")
print("=" * 60)
print(f"Device: {device_id}")
print(f"Platform: Douyin")
print(f"Target videos: {target_count}")
print(f"Watch duration: {watch_duration}s per video")
if category:
print(f"Category filter: {category}")
print("=" * 60)
# Create agent
model_config = ModelConfig(
base_url=base_url,
model_name=model_name,
api_key=api_key,
lang="cn",
)
agent = VideoLearningAgent(
model_config=model_config,
platform="douyin",
output_dir="./video_learning_data",
)
# Setup callbacks
def on_video_watched(record):
print(f"\n[Video {record.sequence_id}] Watched!")
if record.description:
print(f" Description: {record.description}")
if record.likes:
print(f" Likes: {record.likes}")
print(f" Screenshot: {record.screenshot_path}")
def on_progress_update(current, total):
percent = (current / total * 100) if total > 0 else 0
print(f"\nProgress: {current}/{total} ({percent:.1f}%)")
def on_session_complete(session):
print("\n" + "=" * 60)
print("Session Complete!")
print("=" * 60)
print(f"Total videos watched: {session.total_videos}")
print(f"Total duration: {session.total_duration:.1f}s")
print(f"Data saved to: ./video_learning_data/{session.session_id}.json")
agent.on_video_watched = on_video_watched
agent.on_progress_update = on_progress_update
agent.on_session_complete = on_session_complete
# Start session
session_id = agent.start_session(
device_id=device_id,
target_count=target_count,
category=category,
watch_duration=watch_duration,
)
print(f"\nSession started: {session_id}")
print("Starting video watching task...\n")
# Construct the task
if category:
task = f"""
请帮我学习抖音上的"{category}"类视频。具体任务如下:
1. 打开抖音应用
2. 搜索"{category}"
3. 开始观看视频,每个视频观看约{watch_duration}
4. 记录每个视频的描述、点赞数、评论数等信息
5. 滑动到下一个视频
6. 重复步骤3-5直到观看完{target_count}个视频
请按照以下格式记录每个视频:
- 视频序号
- 描述文案(屏幕上的文字)
- 点赞数(如果有显示)
- 评论数(如果有显示)
- 截图
每个视频观看时,请等待{watch_duration}秒后再滑动到下一个。
"""
else:
task = f"""
请帮我学习抖音上的推荐视频。具体任务如下:
1. 打开抖音应用
2. 在推荐页开始观看视频,每个视频观看约{watch_duration}
3. 记录每个视频的描述、点赞数、评论数等信息
4. 向上滑动到下一个视频
5. 重复步骤3-4直到观看完{target_count}个视频
请按照以下格式记录每个视频:
- 视频序号
- 描述文案(屏幕上的文字)
- 点赞数(如果有显示)
- 评论数(如果有显示)
- 截图
每个视频观看时,请等待{watch_duration}秒后再滑动到下一个。
"""
# Run the task
success = agent.run_learning_task(task)
if success:
print("\n✓ Learning task completed successfully!")
# Export data
json_file = agent.export_data("json")
print(f"✓ Data exported to: {json_file}")
csv_file = agent.export_data("csv")
print(f"✓ Data exported to: {csv_file}")
else:
print("\n✗ Learning task failed")
print("\nSession progress:")
progress = agent.get_session_progress()
for key, value in progress.items():
print(f" {key}: {value}")
if __name__ == "__main__":
main()