""" Video Learning Agent Demo This script demonstrates how to use the VideoLearningAgent to watch and learn from short video platforms like Douyin. Usage: python examples/video_learning_demo.py --device-id --count 10 """ import os import sys from pathlib import Path # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent.parent)) from phone_agent.model.client import ModelConfig from phone_agent.video_learning import VideoLearningAgent def main(): """Main demo function.""" # Load configuration from environment base_url = os.getenv("MODEL_BASE_URL", "http://localhost:8000/v1") api_key = os.getenv("MODEL_API_KEY", "your-api-key") model_name = os.getenv("MODEL_NAME", "autoglm-phone-9b") # Configuration device_id = os.getenv("DEVICE_ID", "emulator-5554") target_count = int(os.getenv("TARGET_COUNT", "10")) watch_duration = float(os.getenv("WATCH_DURATION", "3.0")) category = os.getenv("CATEGORY", None) # e.g., "美食", "旅行", "搞笑" print("=" * 60) print("Video Learning Agent Demo") print("=" * 60) print(f"Device: {device_id}") print(f"Platform: Douyin") print(f"Target videos: {target_count}") print(f"Watch duration: {watch_duration}s per video") if category: print(f"Category filter: {category}") print("=" * 60) # Create agent model_config = ModelConfig( base_url=base_url, model_name=model_name, api_key=api_key, lang="cn", ) agent = VideoLearningAgent( model_config=model_config, platform="douyin", output_dir="./video_learning_data", ) # Setup callbacks def on_video_watched(record): print(f"\n[Video {record.sequence_id}] Watched!") if record.description: print(f" Description: {record.description}") if record.likes: print(f" Likes: {record.likes}") print(f" Screenshot: {record.screenshot_path}") def on_progress_update(current, total): percent = (current / total * 100) if total > 0 else 0 print(f"\nProgress: {current}/{total} ({percent:.1f}%)") def on_session_complete(session): print("\n" + "=" * 60) print("Session Complete!") print("=" * 60) print(f"Total videos watched: {session.total_videos}") print(f"Total duration: {session.total_duration:.1f}s") print(f"Data saved to: ./video_learning_data/{session.session_id}.json") agent.on_video_watched = on_video_watched agent.on_progress_update = on_progress_update agent.on_session_complete = on_session_complete # Start session session_id = agent.start_session( device_id=device_id, target_count=target_count, category=category, watch_duration=watch_duration, ) print(f"\nSession started: {session_id}") print("Starting video watching task...\n") # Construct the task if category: task = f""" 请帮我学习抖音上的"{category}"类视频。具体任务如下: 1. 打开抖音应用 2. 搜索"{category}" 3. 开始观看视频,每个视频观看约{watch_duration}秒 4. 记录每个视频的描述、点赞数、评论数等信息 5. 滑动到下一个视频 6. 重复步骤3-5,直到观看完{target_count}个视频 请按照以下格式记录每个视频: - 视频序号 - 描述文案(屏幕上的文字) - 点赞数(如果有显示) - 评论数(如果有显示) - 截图 每个视频观看时,请等待{watch_duration}秒后再滑动到下一个。 """ else: task = f""" 请帮我学习抖音上的推荐视频。具体任务如下: 1. 打开抖音应用 2. 在推荐页开始观看视频,每个视频观看约{watch_duration}秒 3. 记录每个视频的描述、点赞数、评论数等信息 4. 向上滑动到下一个视频 5. 重复步骤3-4,直到观看完{target_count}个视频 请按照以下格式记录每个视频: - 视频序号 - 描述文案(屏幕上的文字) - 点赞数(如果有显示) - 评论数(如果有显示) - 截图 每个视频观看时,请等待{watch_duration}秒后再滑动到下一个。 """ # Run the task success = agent.run_learning_task(task) if success: print("\n✓ Learning task completed successfully!") # Export data json_file = agent.export_data("json") print(f"✓ Data exported to: {json_file}") csv_file = agent.export_data("csv") print(f"✓ Data exported to: {csv_file}") else: print("\n✗ Learning task failed") print("\nSession progress:") progress = agent.get_session_progress() for key, value in progress.items(): print(f" {key}: {value}") if __name__ == "__main__": main()