Add Video Learning Agent for short video platforms
Features: - VideoLearningAgent for automated video watching on Douyin/Kuaishou/TikTok - Web dashboard UI for video learning sessions - Real-time progress tracking with screenshot capture - App detection using get_current_app() for accurate recording - Session management with pause/resume/stop controls Technical improvements: - Simplified video detection logic using direct app detection - Full base64 hash for sensitive screenshot change detection - Immediate stop when target video count is reached - Fixed circular import issues with ModelConfig Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
13
.env.example
13
.env.example
@@ -108,3 +108,16 @@ SCREENSHOT_THROTTLE_MS=500
|
||||
|
||||
# Maximum task history to keep / 保留的最大任务历史数
|
||||
MAX_TASK_HISTORY=100
|
||||
|
||||
# ============================================================================
|
||||
# Video Learning Configuration / 视频学习配置
|
||||
# ============================================================================
|
||||
|
||||
# Output directory for video learning data / 视频学习数据输出目录
|
||||
VIDEO_LEARNING_OUTPUT_DIR=./video_learning_data
|
||||
|
||||
# Model parameters for video learning / 视频学习模型参数
|
||||
PHONE_AGENT_MAX_TOKENS=3000
|
||||
PHONE_AGENT_TEMPERATURE=0.0
|
||||
PHONE_AGENT_TOP_P=0.85
|
||||
PHONE_AGENT_FREQUENCY_PENALTY=0.2
|
||||
|
||||
@@ -5,9 +5,11 @@ API endpoints for the dashboard.
|
||||
from dashboard.api.devices import router as devices_router
|
||||
from dashboard.api.tasks import router as tasks_router
|
||||
from dashboard.api.websocket import router as websocket_router
|
||||
from dashboard.api.video_learning import router as video_learning_router
|
||||
|
||||
__all__ = [
|
||||
"devices_router",
|
||||
"tasks_router",
|
||||
"websocket_router",
|
||||
"video_learning_router",
|
||||
]
|
||||
|
||||
328
dashboard/api/video_learning.py
Normal file
328
dashboard/api/video_learning.py
Normal file
@@ -0,0 +1,328 @@
|
||||
"""
|
||||
Video Learning API endpoints for the dashboard.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from dashboard.config import config
|
||||
from dashboard.dependencies import get_device_manager
|
||||
from dashboard.services.device_manager import DeviceManager
|
||||
from phone_agent import VideoLearningAgent
|
||||
from phone_agent.model.client import ModelConfig
|
||||
|
||||
router = APIRouter(prefix="/api/video-learning", tags=["video-learning"])
|
||||
|
||||
|
||||
class SessionCreateRequest(BaseModel):
|
||||
"""Request to create a new learning session."""
|
||||
|
||||
device_id: str = Field(..., description="Target device ID")
|
||||
platform: str = Field("douyin", description="Platform name (douyin, kuaishou, tiktok)")
|
||||
target_count: int = Field(10, description="Number of videos to watch", ge=1, le=100)
|
||||
category: Optional[str] = Field(None, description="Target category filter")
|
||||
watch_duration: float = Field(3.0, description="Watch duration per video (seconds)", ge=1.0, le=30.0)
|
||||
|
||||
|
||||
class SessionControlRequest(BaseModel):
|
||||
"""Request to control a session."""
|
||||
|
||||
action: str = Field(..., description="Action: pause, resume, stop")
|
||||
|
||||
|
||||
class SessionStatus(BaseModel):
|
||||
"""Session status response."""
|
||||
|
||||
session_id: str
|
||||
platform: str
|
||||
target_count: int
|
||||
watched_count: int
|
||||
progress_percent: float
|
||||
is_active: bool
|
||||
is_paused: bool
|
||||
total_duration: float
|
||||
current_video: Optional[Dict] = None
|
||||
|
||||
|
||||
class VideoInfo(BaseModel):
|
||||
"""Information about a watched video."""
|
||||
|
||||
sequence_id: int
|
||||
timestamp: str
|
||||
screenshot_path: Optional[str] = None
|
||||
watch_duration: float
|
||||
description: Optional[str] = None
|
||||
likes: Optional[int] = None
|
||||
comments: Optional[int] = None
|
||||
tags: List[str] = []
|
||||
category: Optional[str] = None
|
||||
|
||||
|
||||
# Global session storage (in production, use database)
|
||||
_active_sessions: Dict[str, VideoLearningAgent] = {}
|
||||
|
||||
|
||||
@router.post("/sessions", response_model=Dict[str, str])
|
||||
async def create_session(
|
||||
request: SessionCreateRequest,
|
||||
device_manager: DeviceManager = Depends(get_device_manager),
|
||||
) -> Dict[str, str]:
|
||||
"""Create a new video learning session."""
|
||||
# Check device availability
|
||||
device = await device_manager.get_device(request.device_id)
|
||||
if not device:
|
||||
raise HTTPException(status_code=404, detail="Device not found")
|
||||
|
||||
if not device.is_connected:
|
||||
raise HTTPException(status_code=400, detail="Device not connected")
|
||||
|
||||
if device.status == "busy":
|
||||
raise HTTPException(status_code=409, detail="Device is busy")
|
||||
|
||||
# Create model config from environment
|
||||
model_config = ModelConfig(
|
||||
base_url=config.MODEL_BASE_URL,
|
||||
model_name=config.MODEL_NAME,
|
||||
api_key=config.MODEL_API_KEY,
|
||||
max_tokens=config.MAX_TOKENS,
|
||||
temperature=config.TEMPERATURE,
|
||||
top_p=config.TOP_P,
|
||||
frequency_penalty=config.FREQUENCY_PENALTY,
|
||||
lang="cn",
|
||||
)
|
||||
|
||||
# Create video learning agent
|
||||
agent = VideoLearningAgent(
|
||||
model_config=model_config,
|
||||
platform=request.platform,
|
||||
output_dir=config.VIDEO_LEARNING_OUTPUT_DIR,
|
||||
)
|
||||
|
||||
# Setup callbacks for real-time updates
|
||||
session_id = None
|
||||
|
||||
def on_video_watched(record):
|
||||
"""Callback when a video is watched."""
|
||||
# Broadcast via WebSocket
|
||||
if session_id:
|
||||
# This would be integrated with WebSocket manager
|
||||
pass
|
||||
|
||||
def on_progress_update(current, total):
|
||||
"""Callback for progress updates."""
|
||||
if session_id:
|
||||
# Broadcast progress
|
||||
pass
|
||||
|
||||
def on_session_complete(session):
|
||||
"""Callback when session completes."""
|
||||
if session_id and session_id in _active_sessions:
|
||||
del _active_sessions[session_id]
|
||||
|
||||
agent.on_video_watched = on_video_watched
|
||||
agent.on_progress_update = on_progress_update
|
||||
agent.on_session_complete = on_session_complete
|
||||
|
||||
# Start session
|
||||
session_id = agent.start_session(
|
||||
device_id=request.device_id,
|
||||
target_count=request.target_count,
|
||||
category=request.category,
|
||||
watch_duration=request.watch_duration,
|
||||
max_steps=500,
|
||||
)
|
||||
|
||||
# Store session
|
||||
_active_sessions[session_id] = agent
|
||||
|
||||
return {"session_id": session_id, "status": "created"}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/start", response_model=Dict[str, str])
|
||||
async def start_session(session_id: str) -> Dict[str, str]:
|
||||
"""Start executing a learning session."""
|
||||
if session_id not in _active_sessions:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
agent = _active_sessions[session_id]
|
||||
|
||||
# Build task based on session parameters
|
||||
session = agent.current_session
|
||||
if not session:
|
||||
raise HTTPException(status_code=400, detail="Session not initialized")
|
||||
|
||||
category = session.target_category
|
||||
target_count = session.target_count
|
||||
watch_duration = agent._watch_duration
|
||||
platform = agent.platform
|
||||
|
||||
# Platform-specific app name and package
|
||||
platform_info = {
|
||||
"douyin": {
|
||||
"name": "抖音",
|
||||
"package": "com.ss.android.ugc.aweme",
|
||||
},
|
||||
"kuaishou": {
|
||||
"name": "快手",
|
||||
"package": "com.smile.gifmaker",
|
||||
},
|
||||
"tiktok": {
|
||||
"name": "TikTok",
|
||||
"package": "com.zhiliaoapp.musically",
|
||||
},
|
||||
}
|
||||
|
||||
info = platform_info.get(platform, platform_info["douyin"])
|
||||
app_name = info["name"]
|
||||
|
||||
# Build clear task instructions
|
||||
if category:
|
||||
task = f"""你是一个视频学习助手。请严格按照以下步骤执行:
|
||||
|
||||
步骤1:启动应用
|
||||
- 回到主屏幕
|
||||
- 打开{app_name}应用
|
||||
|
||||
步骤2:搜索内容
|
||||
- 在{app_name}中搜索"{category}"
|
||||
- 点击第一个搜索结果或进入相关页面
|
||||
|
||||
步骤3:观看视频
|
||||
- 观看视频,每个视频停留约{watch_duration}秒
|
||||
- 记录视频的描述、点赞数、评论数
|
||||
- 向上滑动切换到下一个视频
|
||||
- 重复观看和记录,直到完成{target_count}个视频
|
||||
|
||||
步骤4:完成任务
|
||||
- 完成观看{target_count}个视频后,总结所有视频信息
|
||||
|
||||
请现在开始执行。"""
|
||||
else:
|
||||
task = f"""你是一个视频学习助手。请严格按照以下步骤执行:
|
||||
|
||||
步骤1:启动应用
|
||||
- 回到主屏幕
|
||||
- 打开{app_name}应用
|
||||
|
||||
步骤2:观看推荐视频
|
||||
- 进入{app_name}的推荐页面
|
||||
- 观看推荐视频,每个视频停留约{watch_duration}秒
|
||||
- 记录视频的描述、点赞数、评论数
|
||||
- 向上滑动切换到下一个视频
|
||||
- 重复观看和记录,直到完成{target_count}个视频
|
||||
|
||||
步骤3:完成任务
|
||||
- 完成观看{target_count}个视频后,总结所有视频信息
|
||||
|
||||
请现在开始执行。"""
|
||||
|
||||
# Run in background
|
||||
asyncio.create_task(asyncio.to_thread(agent.run_learning_task, task))
|
||||
|
||||
return {"session_id": session_id, "status": "started"}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/control", response_model=Dict[str, str])
|
||||
async def control_session(
|
||||
session_id: str, request: SessionControlRequest
|
||||
) -> Dict[str, str]:
|
||||
"""Control a learning session (pause/resume/stop)."""
|
||||
if session_id not in _active_sessions:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
agent = _active_sessions[session_id]
|
||||
|
||||
if request.action == "pause":
|
||||
agent.pause_session()
|
||||
return {"session_id": session_id, "status": "paused"}
|
||||
elif request.action == "resume":
|
||||
agent.resume_session()
|
||||
return {"session_id": session_id, "status": "resumed"}
|
||||
elif request.action == "stop":
|
||||
agent.stop_session()
|
||||
# Remove from active sessions
|
||||
del _active_sessions[session_id]
|
||||
return {"session_id": session_id, "status": "stopped"}
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid action: {request.action}")
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/status", response_model=SessionStatus)
|
||||
async def get_session_status(session_id: str) -> SessionStatus:
|
||||
"""Get session status."""
|
||||
if session_id not in _active_sessions:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
agent = _active_sessions[session_id]
|
||||
progress = agent.get_session_progress()
|
||||
|
||||
# Get current video info if available
|
||||
current_video = None
|
||||
if agent.current_session and agent.current_session.records:
|
||||
latest = agent.current_session.records[-1]
|
||||
current_video = {
|
||||
"sequence_id": latest.sequence_id,
|
||||
"timestamp": latest.timestamp,
|
||||
"screenshot_path": latest.screenshot_path,
|
||||
"description": latest.description,
|
||||
"likes": latest.likes,
|
||||
"comments": latest.comments,
|
||||
}
|
||||
|
||||
return SessionStatus(
|
||||
session_id=progress["session_id"],
|
||||
platform=progress["platform"],
|
||||
target_count=progress["target_count"],
|
||||
watched_count=progress["watched_count"],
|
||||
progress_percent=progress["progress_percent"],
|
||||
is_active=progress["is_active"],
|
||||
is_paused=progress["is_paused"],
|
||||
total_duration=progress["total_duration"],
|
||||
current_video=current_video,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/videos", response_model=List[VideoInfo])
|
||||
async def get_session_videos(session_id: str) -> List[VideoInfo]:
|
||||
"""Get all videos from a session."""
|
||||
if session_id not in _active_sessions:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
agent = _active_sessions[session_id]
|
||||
if not agent.current_session:
|
||||
return []
|
||||
|
||||
return [
|
||||
VideoInfo(
|
||||
sequence_id=r.sequence_id,
|
||||
timestamp=r.timestamp,
|
||||
screenshot_path=r.screenshot_path,
|
||||
watch_duration=r.watch_duration,
|
||||
description=r.description,
|
||||
likes=r.likes,
|
||||
comments=r.comments,
|
||||
tags=r.tags,
|
||||
category=r.category,
|
||||
)
|
||||
for r in agent.current_session.records
|
||||
]
|
||||
|
||||
|
||||
@router.get("/sessions", response_model=List[str])
|
||||
async def list_sessions() -> List[str]:
|
||||
"""List all active session IDs."""
|
||||
return list(_active_sessions.keys())
|
||||
|
||||
|
||||
@router.delete("/sessions/{session_id}", response_model=Dict[str, str])
|
||||
async def delete_session(session_id: str) -> Dict[str, str]:
|
||||
"""Delete a session."""
|
||||
if session_id not in _active_sessions:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
del _active_sessions[session_id]
|
||||
return {"session_id": session_id, "status": "deleted"}
|
||||
@@ -39,6 +39,13 @@ class DashboardConfig:
|
||||
MODEL_BASE_URL: str = os.getenv("PHONE_AGENT_BASE_URL", "http://localhost:8000/v1")
|
||||
MODEL_NAME: str = os.getenv("PHONE_AGENT_MODEL", "autoglm-phone-9b")
|
||||
MODEL_API_KEY: str = os.getenv("PHONE_AGENT_API_KEY", "EMPTY")
|
||||
MAX_TOKENS: int = int(os.getenv("PHONE_AGENT_MAX_TOKENS", "3000"))
|
||||
TEMPERATURE: float = float(os.getenv("PHONE_AGENT_TEMPERATURE", "0.0"))
|
||||
TOP_P: float = float(os.getenv("PHONE_AGENT_TOP_P", "0.85"))
|
||||
FREQUENCY_PENALTY: float = float(os.getenv("PHONE_AGENT_FREQUENCY_PENALTY", "0.2"))
|
||||
|
||||
# Video learning settings
|
||||
VIDEO_LEARNING_OUTPUT_DIR: str = os.getenv("VIDEO_LEARNING_OUTPUT_DIR", "./video_learning_data")
|
||||
|
||||
# Task history
|
||||
MAX_TASK_HISTORY: int = int(os.getenv("MAX_TASK_HISTORY", "100"))
|
||||
|
||||
@@ -16,7 +16,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from dashboard.api import devices_router, tasks_router, websocket_router
|
||||
from dashboard.api import devices_router, tasks_router, websocket_router, video_learning_router
|
||||
from dashboard.config import config
|
||||
from dashboard.dependencies import (
|
||||
get_device_manager,
|
||||
@@ -104,6 +104,7 @@ async def global_exception_handler(request: Request, exc: Exception):
|
||||
app.include_router(devices_router, prefix="/api")
|
||||
app.include_router(tasks_router, prefix="/api")
|
||||
app.include_router(websocket_router)
|
||||
app.include_router(video_learning_router)
|
||||
|
||||
|
||||
# Health check
|
||||
@@ -163,6 +164,12 @@ if static_path.exists():
|
||||
app.mount("/static", StaticFiles(directory=str(static_path)), name="static")
|
||||
|
||||
|
||||
# Mount static files for video learning screenshots
|
||||
video_learning_data_path = Path(config.VIDEO_LEARNING_OUTPUT_DIR)
|
||||
if video_learning_data_path.exists():
|
||||
app.mount("/video-learning-data", StaticFiles(directory=str(video_learning_data_path)), name="video-learning-data")
|
||||
|
||||
|
||||
# Run script entry point
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
283
dashboard/static/css/video-learning.css
Normal file
283
dashboard/static/css/video-learning.css
Normal file
@@ -0,0 +1,283 @@
|
||||
/* Video Learning Module Styles */
|
||||
|
||||
/* Header modifications */
|
||||
.header h1 {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
/* Configuration Section */
|
||||
.config-section {
|
||||
background-color: var(--card-bg);
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 12px;
|
||||
padding: 2rem;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.config-form {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1.5rem;
|
||||
}
|
||||
|
||||
.form-group {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.form-group label {
|
||||
font-size: 0.875rem;
|
||||
font-weight: 500;
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.form-group select,
|
||||
.form-group input {
|
||||
padding: 0.75rem 1rem;
|
||||
background-color: var(--bg-color);
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 8px;
|
||||
color: var(--text-primary);
|
||||
font-size: 0.95rem;
|
||||
}
|
||||
|
||||
.form-group select:focus,
|
||||
.form-group input:focus {
|
||||
outline: none;
|
||||
border-color: var(--primary-color);
|
||||
}
|
||||
|
||||
.form-group select:disabled,
|
||||
.form-group input:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.form-group small {
|
||||
font-size: 0.75rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.form-row {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
/* Session Section */
|
||||
.session-section {
|
||||
background-color: var(--card-bg);
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 12px;
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.session-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.session-header h2 {
|
||||
font-size: 1.25rem;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.session-controls {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
/* Progress Section */
|
||||
.progress-section {
|
||||
background-color: var(--bg-color);
|
||||
border-radius: 8px;
|
||||
padding: 1.5rem;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.progress-info {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 0.5rem;
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.progress-bar-large {
|
||||
height: 8px;
|
||||
background-color: rgba(99, 102, 241, 0.2);
|
||||
border-radius: 4px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.progress-fill {
|
||||
height: 100%;
|
||||
background-color: var(--primary-color);
|
||||
transition: width 0.3s ease;
|
||||
}
|
||||
|
||||
.progress-stats {
|
||||
margin-top: 0.5rem;
|
||||
font-size: 0.8rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
/* Current Video */
|
||||
.current-video {
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.current-video h3 {
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
/* Video Cards */
|
||||
.video-card {
|
||||
background-color: var(--bg-color);
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
transition: border-color 0.2s;
|
||||
}
|
||||
|
||||
.video-card:hover {
|
||||
border-color: var(--primary-color);
|
||||
}
|
||||
|
||||
.video-screenshot {
|
||||
width: 100%;
|
||||
aspect-ratio: 9/16;
|
||||
background-color: #000;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.video-screenshot img {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: contain;
|
||||
}
|
||||
|
||||
.video-placeholder {
|
||||
width: 100%;
|
||||
aspect-ratio: 9/16;
|
||||
background-color: var(--bg-color);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.video-info {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
.video-id {
|
||||
font-size: 0.75rem;
|
||||
font-weight: 600;
|
||||
color: var(--primary-color);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.video-description {
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 0.5rem;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.video-stats {
|
||||
display: flex;
|
||||
gap: 1rem;
|
||||
font-size: 0.75rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.video-stats span {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
}
|
||||
|
||||
.video-stats svg {
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
/* Session Complete */
|
||||
.session-complete {
|
||||
text-align: center;
|
||||
padding: 3rem 2rem;
|
||||
}
|
||||
|
||||
.complete-icon {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
margin-bottom: 1rem;
|
||||
color: var(--success-color);
|
||||
}
|
||||
|
||||
.session-complete h3 {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.session-complete p {
|
||||
color: var(--text-secondary);
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
/* Video Grid */
|
||||
.video-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.video-grid .video-card {
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.video-grid .video-screenshot,
|
||||
.video-grid .video-placeholder {
|
||||
aspect-ratio: 9/16;
|
||||
}
|
||||
|
||||
/* History Section */
|
||||
.history-section {
|
||||
margin-top: 2rem;
|
||||
}
|
||||
|
||||
.history-section h2 {
|
||||
font-size: 1.25rem;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
/* Responsive */
|
||||
@media (max-width: 768px) {
|
||||
.form-row {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.session-header {
|
||||
flex-direction: column;
|
||||
gap: 1rem;
|
||||
align-items: flex-start;
|
||||
}
|
||||
|
||||
.video-grid {
|
||||
grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
|
||||
}
|
||||
}
|
||||
@@ -41,6 +41,13 @@
|
||||
</div>
|
||||
</div>
|
||||
<div class="header-actions">
|
||||
<a href="/static/video-learning.html" class="btn btn-primary">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<polygon points="23 7 16 12 23 17 23 7"></polygon>
|
||||
<rect x="1" y="5" width="15" height="14" rx="2" ry="2"></rect>
|
||||
</svg>
|
||||
Video Learning
|
||||
</a>
|
||||
<button @click="refreshDevices" class="btn btn-secondary" :disabled="refreshing">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" :class="{ spinning: refreshing }">
|
||||
<polyline points="23 4 23 10 17 10"></polyline>
|
||||
|
||||
200
dashboard/static/js/video-learning.js
Normal file
200
dashboard/static/js/video-learning.js
Normal file
@@ -0,0 +1,200 @@
|
||||
/**
|
||||
* Video Learning Module for AutoGLM Dashboard
|
||||
*
|
||||
* This module provides UI and functionality for the Video Learning Agent,
|
||||
* allowing users to watch and learn from short video platforms.
|
||||
*/
|
||||
|
||||
const VideoLearningModule = {
|
||||
// Current session state
|
||||
currentSessionId: null,
|
||||
currentSessionStatus: null,
|
||||
videos: [],
|
||||
isPolling: false,
|
||||
|
||||
// Create a new learning session
|
||||
async createSession(deviceId, options = {}) {
|
||||
const {
|
||||
platform = 'douyin',
|
||||
targetCount = 10,
|
||||
category = null,
|
||||
watchDuration = 3.0,
|
||||
} = options;
|
||||
|
||||
try {
|
||||
const response = await axios.post('/api/video-learning/sessions', {
|
||||
device_id: deviceId,
|
||||
platform: platform,
|
||||
target_count: targetCount,
|
||||
category: category,
|
||||
watch_duration: watchDuration,
|
||||
});
|
||||
|
||||
this.currentSessionId = response.data.session_id;
|
||||
this.startPolling();
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('Error creating session:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
// Start a session
|
||||
async startSession(sessionId) {
|
||||
try {
|
||||
const response = await axios.post(`/api/video-learning/sessions/${sessionId}/start`);
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('Error starting session:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
// Control a session (pause/resume/stop)
|
||||
async controlSession(sessionId, action) {
|
||||
try {
|
||||
const response = await axios.post(`/api/video-learning/sessions/${sessionId}/control`, {
|
||||
action: action,
|
||||
});
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('Error controlling session:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
// Get session status
|
||||
async getSessionStatus(sessionId) {
|
||||
try {
|
||||
const response = await axios.get(`/api/video-learning/sessions/${sessionId}/status`);
|
||||
this.currentSessionStatus = response.data;
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('Error getting session status:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
// Get session videos
|
||||
async getSessionVideos(sessionId) {
|
||||
try {
|
||||
const response = await axios.get(`/api/video-learning/sessions/${sessionId}/videos`);
|
||||
this.videos = response.data;
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('Error getting session videos:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
// List all active sessions
|
||||
async listSessions() {
|
||||
try {
|
||||
const response = await axios.get('/api/video-learning/sessions');
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('Error listing sessions:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
// Delete a session
|
||||
async deleteSession(sessionId) {
|
||||
try {
|
||||
const response = await axios.delete(`/api/video-learning/sessions/${sessionId}`);
|
||||
if (this.currentSessionId === sessionId) {
|
||||
this.currentSessionId = null;
|
||||
this.currentSessionStatus = null;
|
||||
this.stopPolling();
|
||||
}
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('Error deleting session:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
// Start polling for session updates
|
||||
startPolling(intervalMs = 1000) {
|
||||
if (this.isPolling) return;
|
||||
|
||||
this.isPolling = true;
|
||||
this.pollInterval = setInterval(async () => {
|
||||
if (this.currentSessionId) {
|
||||
try {
|
||||
await this.getSessionStatus(this.currentSessionId);
|
||||
await this.getSessionVideos(this.currentSessionId);
|
||||
|
||||
// Trigger custom event for UI updates
|
||||
window.dispatchEvent(new CustomEvent('videoLearningUpdate', {
|
||||
detail: {
|
||||
sessionId: this.currentSessionId,
|
||||
status: this.currentSessionStatus,
|
||||
videos: this.videos,
|
||||
}
|
||||
}));
|
||||
|
||||
// Stop polling if session is complete, but do one final update
|
||||
if (this.currentSessionStatus && !this.currentSessionStatus.is_active) {
|
||||
console.log('[VideoLearning] Session completed, doing final update...');
|
||||
// Do one final update to ensure we have the latest data
|
||||
await this.getSessionStatus(this.currentSessionId);
|
||||
await this.getSessionVideos(this.currentSessionId);
|
||||
|
||||
window.dispatchEvent(new CustomEvent('videoLearningUpdate', {
|
||||
detail: {
|
||||
sessionId: this.currentSessionId,
|
||||
status: this.currentSessionStatus,
|
||||
videos: this.videos,
|
||||
}
|
||||
}));
|
||||
|
||||
console.log('[VideoLearning] Final update complete, stopping poll');
|
||||
this.stopPolling();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error polling session status:', error);
|
||||
// Don't stop polling on error, just log it
|
||||
}
|
||||
}
|
||||
}, intervalMs);
|
||||
console.log(`[VideoLearning] Started polling with ${intervalMs}ms interval`);
|
||||
},
|
||||
|
||||
// Stop polling
|
||||
stopPolling() {
|
||||
if (this.pollInterval) {
|
||||
clearInterval(this.pollInterval);
|
||||
this.pollInterval = null;
|
||||
console.log('[VideoLearning] Stopped polling');
|
||||
}
|
||||
this.isPolling = false;
|
||||
},
|
||||
|
||||
// Format duration
|
||||
formatDuration(seconds) {
|
||||
if (seconds < 60) {
|
||||
return `${seconds.toFixed(1)}s`;
|
||||
}
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
const remainingSeconds = seconds % 60;
|
||||
return `${minutes}m ${remainingSeconds.toFixed(1)}s`;
|
||||
},
|
||||
|
||||
// Format number with K/M suffix
|
||||
formatNumber(num) {
|
||||
if (num === null || num === undefined) return 'N/A';
|
||||
if (num >= 1000000) {
|
||||
return `${(num / 1000000).toFixed(1)}M`;
|
||||
} else if (num >= 1000) {
|
||||
return `${(num / 1000).toFixed(1)}K`;
|
||||
}
|
||||
return num.toString();
|
||||
},
|
||||
};
|
||||
|
||||
// Export for use in other modules
|
||||
if (typeof module !== 'undefined' && module.exports) {
|
||||
module.exports = VideoLearningModule;
|
||||
}
|
||||
412
dashboard/static/video-learning.html
Normal file
412
dashboard/static/video-learning.html
Normal file
@@ -0,0 +1,412 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Video Learning - AutoGLM Dashboard</title>
|
||||
<!-- Vue.js 3 -->
|
||||
<script src="https://unpkg.com/vue@3/dist/vue.global.js"></script>
|
||||
<!-- Axios for API requests -->
|
||||
<script src="https://unpkg.com/axios/dist/axios.min.js"></script>
|
||||
<!-- CSS -->
|
||||
<link rel="stylesheet" href="/static/css/dashboard.css">
|
||||
<link rel="stylesheet" href="/static/css/video-learning.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="app">
|
||||
<!-- Header -->
|
||||
<header class="header">
|
||||
<div class="header-content">
|
||||
<h1>
|
||||
<svg width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<polygon points="23 7 16 12 23 17 23 7"></polygon>
|
||||
<rect x="1" y="5" width="15" height="14" rx="2" ry="2"></rect>
|
||||
</svg>
|
||||
Video Learning Agent
|
||||
</h1>
|
||||
<div class="stats">
|
||||
<span class="stat" title="Session Status">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<circle cx="12" cy="12" r="10"></circle>
|
||||
<polyline points="12 6 12 12 16 14"></polyline>
|
||||
</svg>
|
||||
{{ sessionStatus ? sessionStatus.status : 'No Session' }}
|
||||
</span>
|
||||
<span class="stat" v-if="sessionStatus" title="Progress">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<path d="M22 11.08V12a10 10 0 1 1-5.93-9.14"></path>
|
||||
<polyline points="22 4 12 14.01 9 11.01"></polyline>
|
||||
</svg>
|
||||
{{ sessionStatus.watched_count }} / {{ sessionStatus.target_count }}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="header-actions">
|
||||
<button @click="goBack" class="btn btn-secondary">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<line x1="19" y1="12" x2="5" y2="12"></line>
|
||||
<polyline points="12 19 5 12 12 5"></polyline>
|
||||
</svg>
|
||||
Back
|
||||
</button>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<!-- Main Content -->
|
||||
<main class="main-content">
|
||||
<!-- Configuration Section -->
|
||||
<section class="config-section" v-if="!currentSessionId">
|
||||
<h2>Create Learning Session</h2>
|
||||
<div class="config-form">
|
||||
<div class="form-group">
|
||||
<label>Device</label>
|
||||
<select v-model="config.deviceId" :disabled="loading">
|
||||
<option value="">Select a device...</option>
|
||||
<option v-for="device in devices" :key="device.device_id" :value="device.device_id"
|
||||
:disabled="!device.is_connected || device.status === 'busy'">
|
||||
{{ device.device_id }}
|
||||
{{ !device.is_connected ? '(Disconnected)' : '' }}
|
||||
{{ device.status === 'busy' ? '(Busy)' : '' }}
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label>Platform</label>
|
||||
<select v-model="config.platform" :disabled="loading">
|
||||
<option value="douyin">Douyin (抖音)</option>
|
||||
<option value="kuaishou">Kuaishou (快手)</option>
|
||||
<option value="tiktok">TikTok</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label>Target Videos</label>
|
||||
<input type="number" v-model.number="config.targetCount" min="1" max="100" :disabled="loading">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Watch Duration (s)</label>
|
||||
<input type="number" v-model.number="config.watchDuration" min="1" max="30" step="0.5" :disabled="loading">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label>Category (Optional)</label>
|
||||
<input type="text" v-model="config.category" placeholder="e.g., 美食, 旅行, 搞笑" :disabled="loading">
|
||||
<small>Leave empty to watch recommended videos</small>
|
||||
</div>
|
||||
|
||||
<button @click="createAndStartSession" class="btn btn-primary" :disabled="loading || !config.deviceId">
|
||||
<svg v-if="loading" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" class="spinning">
|
||||
<path d="M21 12a9 9 0 1 1-6.219-8.56"></path>
|
||||
</svg>
|
||||
{{ loading ? 'Creating...' : 'Start Learning' }}
|
||||
</button>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Session Control Section -->
|
||||
<section class="session-section" v-if="currentSessionId && sessionStatus">
|
||||
<div class="session-header">
|
||||
<h2>Session: {{ currentSessionId }}</h2>
|
||||
<div class="session-controls">
|
||||
<button v-if="sessionStatus.is_paused" @click="resumeSession" class="btn btn-primary btn-sm">
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<polygon points="5 3 19 12 5 21 5 3"></polygon>
|
||||
</svg>
|
||||
Resume
|
||||
</button>
|
||||
<button v-else-if="sessionStatus.is_active" @click="pauseSession" class="btn btn-secondary btn-sm">
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<rect x="6" y="4" width="4" height="16"></rect>
|
||||
<rect x="14" y="4" width="4" height="16"></rect>
|
||||
</svg>
|
||||
Pause
|
||||
</button>
|
||||
<button v-if="sessionStatus.is_active" @click="stopSession" class="btn btn-danger btn-sm">
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<rect x="6" y="6" width="12" height="12"></rect>
|
||||
</svg>
|
||||
Stop
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Progress Bar -->
|
||||
<div class="progress-section" v-if="sessionStatus.is_active || sessionStatus.is_paused">
|
||||
<div class="progress-info">
|
||||
<span>Progress: {{ sessionStatus.watched_count }} / {{ sessionStatus.target_count }}</span>
|
||||
<span>{{ Math.round(sessionStatus.progress_percent) }}%</span>
|
||||
</div>
|
||||
<div class="progress-bar-large">
|
||||
<div class="progress-fill" :style="{ width: sessionStatus.progress_percent + '%' }"></div>
|
||||
</div>
|
||||
<div class="progress-stats">
|
||||
<span>Total Duration: {{ formatDuration(sessionStatus.total_duration) }}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Current Video -->
|
||||
<div class="current-video" v-if="sessionStatus.current_video">
|
||||
<h3>Current Video</h3>
|
||||
<div class="video-card">
|
||||
<div class="video-screenshot" v-if="sessionStatus.current_video.screenshot_path">
|
||||
<img :src="sessionStatus.current_video.screenshot_path" alt="Current video">
|
||||
</div>
|
||||
<div class="video-info">
|
||||
<div class="video-id">#{{ sessionStatus.current_video.sequence_id }}</div>
|
||||
<div class="video-description" v-if="sessionStatus.current_video.description">
|
||||
{{ sessionStatus.current_video.description }}
|
||||
</div>
|
||||
<div class="video-stats">
|
||||
<span v-if="sessionStatus.current_video.likes">
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" stroke="none">
|
||||
<path d="M20.84 4.61a5.5 5.5 0 0 0-7.78 0L12 5.67l-1.06-1.06a5.5 5.5 0 0 0-7.78 7.78l1.06 1.06L12 21.23l7.78-7.78 1.06-1.06a5.5 5.5 0 0 0 0-7.78z"></path>
|
||||
</svg>
|
||||
{{ formatNumber(sessionStatus.current_video.likes) }}
|
||||
</span>
|
||||
<span v-if="sessionStatus.current_video.comments">
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<path d="M21 11.5a8.38 8.38 0 0 1-.9 3.8 8.5 8.5 0 0 1-7.6 4.7 8.38 8.38 0 0 1-3.8-.9L3 21l1.9-5.7a8.38 8.38 0 0 1-.9-3.8 8.5 8.5 0 0 1 4.7-7.6 8.38 8.38 0 0 1 3.8-.9h.5a8.48 8.48 0 0 1 8 8v.5z"></path>
|
||||
</svg>
|
||||
{{ formatNumber(sessionStatus.current_video.comments) }}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Session Complete -->
|
||||
<div class="session-complete" v-if="!sessionStatus.is_active && currentSessionId">
|
||||
<div class="complete-icon">
|
||||
<svg width="64" height="64" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<path d="M22 11.08V12a10 10 0 1 1-5.93-9.14"></path>
|
||||
<polyline points="22 4 12 14.01 9 11.01"></polyline>
|
||||
</svg>
|
||||
</div>
|
||||
<h3>Session Complete!</h3>
|
||||
<p>Watched {{ sessionStatus.watched_count }} videos in {{ formatDuration(sessionStatus.total_duration) }}</p>
|
||||
<button @click="resetSession" class="btn btn-primary">Start New Session</button>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Video History -->
|
||||
<section class="history-section" v-if="videos.length > 0">
|
||||
<h2>Watched Videos</h2>
|
||||
<div class="video-grid">
|
||||
<div v-for="video in videos" :key="video.sequence_id" class="video-card">
|
||||
<div class="video-screenshot" v-if="video.screenshot_path">
|
||||
<img :src="video.screenshot_path" :alt="'Video ' + video.sequence_id">
|
||||
</div>
|
||||
<div class="video-placeholder" v-else>
|
||||
<svg width="32" height="32" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<rect x="2" y="2" width="20" height="20" rx="2.18" ry="2.18"></rect>
|
||||
<line x1="7" y1="2" x2="7" y2="22"></line>
|
||||
<line x1="17" y1="2" x2="17" y2="22"></line>
|
||||
<line x1="2" y1="12" x2="22" y2="12"></line>
|
||||
<line x1="2" y1="7" x2="7" y2="7"></line>
|
||||
<line x1="2" y1="17" x2="7" y2="17"></line>
|
||||
<line x1="17" y1="17" x2="22" y2="17"></line>
|
||||
<line x1="17" y1="7" x2="22" y2="7"></line>
|
||||
</svg>
|
||||
</div>
|
||||
<div class="video-info">
|
||||
<div class="video-id">#{{ video.sequence_id }}</div>
|
||||
<div class="video-description" v-if="video.description">{{ video.description }}</div>
|
||||
<div class="video-stats">
|
||||
<span v-if="video.likes">
|
||||
<svg width="12" height="12" viewBox="0 0 24 24" fill="currentColor" stroke="none">
|
||||
<path d="M20.84 4.61a5.5 5.5 0 0 0-7.78 0L12 5.67l-1.06-1.06a5.5 5.5 0 0 0-7.78 7.78l1.06 1.06L12 21.23l7.78-7.78 1.06-1.06a5.5 5.5 0 0 0 0-7.78z"></path>
|
||||
</svg>
|
||||
{{ formatNumber(video.likes) }}
|
||||
</span>
|
||||
<span v-if="video.comments">
|
||||
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<path d="M21 11.5a8.38 8.38 0 0 1-.9 3.8 8.5 8.5 0 0 1-7.6 4.7 8.38 8.38 0 0 1-3.8-.9L3 21l1.9-5.7a8.38 8.38 0 0 1-.9-3.8 8.5 8.5 0 0 1 4.7-7.6 8.38 8.38 0 0 1 3.8-.9h.5a8.48 8.48 0 0 1 8 8v.5z"></path>
|
||||
</svg>
|
||||
{{ formatNumber(video.comments) }}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</main>
|
||||
|
||||
<!-- Toast notifications -->
|
||||
<div class="toast-container">
|
||||
<div v-for="toast in toasts" :key="toast.id" class="toast" :class="toast.type">
|
||||
{{ toast.message }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="/static/js/video-learning.js"></script>
|
||||
<script>
|
||||
const { createApp } = Vue;
|
||||
|
||||
createApp({
|
||||
data() {
|
||||
return {
|
||||
devices: [],
|
||||
currentSessionId: null,
|
||||
sessionStatus: null,
|
||||
videos: [],
|
||||
loading: false,
|
||||
toasts: [],
|
||||
toastIdCounter: 0,
|
||||
|
||||
config: {
|
||||
deviceId: '',
|
||||
platform: 'douyin',
|
||||
targetCount: 10,
|
||||
category: '',
|
||||
watchDuration: 3.0,
|
||||
},
|
||||
};
|
||||
},
|
||||
|
||||
mounted() {
|
||||
this.loadDevices();
|
||||
this.setupVideoLearningEvents();
|
||||
},
|
||||
|
||||
methods: {
|
||||
async loadDevices() {
|
||||
try {
|
||||
const response = await axios.get('/api/devices');
|
||||
this.devices = response.data;
|
||||
} catch (error) {
|
||||
this.showToast('Failed to load devices', 'error');
|
||||
}
|
||||
},
|
||||
|
||||
async createAndStartSession() {
|
||||
if (!this.config.deviceId) {
|
||||
this.showToast('Please select a device', 'error');
|
||||
return;
|
||||
}
|
||||
|
||||
this.loading = true;
|
||||
try {
|
||||
// Create session
|
||||
const createResult = await VideoLearningModule.createSession(
|
||||
this.config.deviceId,
|
||||
{
|
||||
platform: this.config.platform,
|
||||
targetCount: this.config.targetCount,
|
||||
category: this.config.category || null,
|
||||
watchDuration: this.config.watchDuration,
|
||||
}
|
||||
);
|
||||
|
||||
this.currentSessionId = createResult.session_id;
|
||||
this.showToast('Session created! Starting...', 'success');
|
||||
|
||||
// Start session
|
||||
await VideoLearningModule.startSession(this.currentSessionId);
|
||||
this.showToast('Learning session started!', 'success');
|
||||
|
||||
// Initial status update
|
||||
await this.updateSessionStatus();
|
||||
} catch (error) {
|
||||
this.showToast('Failed to create session: ' + error.message, 'error');
|
||||
} finally {
|
||||
this.loading = false;
|
||||
}
|
||||
},
|
||||
|
||||
async pauseSession() {
|
||||
if (!this.currentSessionId) return;
|
||||
|
||||
try {
|
||||
await VideoLearningModule.controlSession(this.currentSessionId, 'pause');
|
||||
await this.updateSessionStatus();
|
||||
this.showToast('Session paused', 'info');
|
||||
} catch (error) {
|
||||
this.showToast('Failed to pause session', 'error');
|
||||
}
|
||||
},
|
||||
|
||||
async resumeSession() {
|
||||
if (!this.currentSessionId) return;
|
||||
|
||||
try {
|
||||
await VideoLearningModule.controlSession(this.currentSessionId, 'resume');
|
||||
await this.updateSessionStatus();
|
||||
this.showToast('Session resumed', 'info');
|
||||
} catch (error) {
|
||||
this.showToast('Failed to resume session', 'error');
|
||||
}
|
||||
},
|
||||
|
||||
async stopSession() {
|
||||
if (!this.currentSessionId) return;
|
||||
|
||||
if (!confirm('Are you sure you want to stop this session?')) return;
|
||||
|
||||
try {
|
||||
await VideoLearningModule.controlSession(this.currentSessionId, 'stop');
|
||||
await this.updateSessionStatus();
|
||||
this.showToast('Session stopped', 'info');
|
||||
} catch (error) {
|
||||
this.showToast('Failed to stop session', 'error');
|
||||
}
|
||||
},
|
||||
|
||||
async updateSessionStatus() {
|
||||
if (!this.currentSessionId) return;
|
||||
|
||||
try {
|
||||
this.sessionStatus = await VideoLearningModule.getSessionStatus(this.currentSessionId);
|
||||
this.videos = await VideoLearningModule.getSessionVideos(this.currentSessionId);
|
||||
} catch (error) {
|
||||
console.error('Error updating session status:', error);
|
||||
}
|
||||
},
|
||||
|
||||
setupVideoLearningEvents() {
|
||||
window.addEventListener('videoLearningUpdate', (event) => {
|
||||
const { status, videos } = event.detail;
|
||||
this.sessionStatus = status;
|
||||
this.videos = videos;
|
||||
});
|
||||
},
|
||||
|
||||
resetSession() {
|
||||
this.currentSessionId = null;
|
||||
this.sessionStatus = null;
|
||||
this.videos = [];
|
||||
VideoLearningModule.stopPolling();
|
||||
},
|
||||
|
||||
goBack() {
|
||||
window.location.href = '/';
|
||||
},
|
||||
|
||||
formatDuration(seconds) {
|
||||
return VideoLearningModule.formatDuration(seconds);
|
||||
},
|
||||
|
||||
formatNumber(num) {
|
||||
return VideoLearningModule.formatNumber(num);
|
||||
},
|
||||
|
||||
showToast(message, type = 'info') {
|
||||
const id = this.toastIdCounter++;
|
||||
this.toasts.push({ id, message, type });
|
||||
|
||||
setTimeout(() => {
|
||||
this.toasts = this.toasts.filter(t => t.id !== id);
|
||||
}, 3000);
|
||||
},
|
||||
},
|
||||
|
||||
beforeUnmount() {
|
||||
VideoLearningModule.stopPolling();
|
||||
},
|
||||
}).mount('#app');
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
253
docs/VIDEO_LEARNING.md
Normal file
253
docs/VIDEO_LEARNING.md
Normal file
@@ -0,0 +1,253 @@
|
||||
# Video Learning Agent
|
||||
|
||||
AI-powered agent for learning from short video platforms like Douyin (抖音), Kuaishou (快手), and TikTok.
|
||||
|
||||
## 功能特性
|
||||
|
||||
### MVP 功能
|
||||
- **自动滑动**: 自动在视频之间滑动切换
|
||||
- **播放控制**: 播放/暂停控制
|
||||
- **截图记录**: 为每个视频截图保存
|
||||
- **数据采集**: 采集视频描述、点赞数、评论数
|
||||
- **可视化管理**: 通过 Web Dashboard 可视化控制
|
||||
- **会话管理**: 创建、暂停、恢复、停止学习会话
|
||||
- **数据导出**: 导出学习数据(JSON/CSV)
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 1. 启动 Dashboard
|
||||
|
||||
```bash
|
||||
# 使用脚本启动(推荐)
|
||||
python scripts/run_video_learning_demo.bat # Windows
|
||||
bash scripts/run_video_learning_demo.sh # Linux/Mac
|
||||
|
||||
# 或手动启动
|
||||
python -m uvicorn dashboard.main:app --host 0.0.0.0 --port 8080 --reload
|
||||
```
|
||||
|
||||
### 2. 访问 Video Learning 页面
|
||||
|
||||
打开浏览器访问: `http://localhost:8080/static/video-learning.html`
|
||||
|
||||
或从主 Dashboard 页面点击 "Video Learning" 按钮。
|
||||
|
||||
### 3. 创建学习会话
|
||||
|
||||
1. 选择设备
|
||||
2. 选择平台(抖音/快手/TikTok)
|
||||
3. 设置目标视频数量
|
||||
4. (可选)设置类别筛选
|
||||
5. 设置观看时长
|
||||
6. 点击 "Start Learning"
|
||||
|
||||
## 使用示例
|
||||
|
||||
### 独立运行
|
||||
|
||||
```bash
|
||||
python examples/video_learning_demo.py \
|
||||
--device-id emulator-5554 \
|
||||
--count 10 \
|
||||
--category "美食" \
|
||||
--watch-duration 3.0
|
||||
```
|
||||
|
||||
### 通过 Dashboard
|
||||
|
||||
1. 打开 Video Learning 页面
|
||||
2. 配置学习参数
|
||||
3. 点击启动
|
||||
4. 实时查看进度
|
||||
|
||||
### API 调用
|
||||
|
||||
```python
|
||||
from phone_agent import VideoLearningAgent
|
||||
from phone_agent.model.client import ModelConfig
|
||||
|
||||
# 创建模型配置
|
||||
model_config = ModelConfig(
|
||||
base_url="https://open.bigmodel.cn/api/paas/v4",
|
||||
model_name="autoglm-phone-9b",
|
||||
api_key="your-api-key",
|
||||
)
|
||||
|
||||
# 创建 Video Learning Agent
|
||||
agent = VideoLearningAgent(
|
||||
model_config=model_config,
|
||||
platform="douyin",
|
||||
output_dir="./video_learning_data",
|
||||
)
|
||||
|
||||
# 启动会话
|
||||
session_id = agent.start_session(
|
||||
device_id="emulator-5554",
|
||||
target_count=10,
|
||||
category="美食",
|
||||
watch_duration=3.0,
|
||||
)
|
||||
|
||||
# 运行任务
|
||||
task = """
|
||||
在抖音上学习"美食"类视频:
|
||||
1. 打开抖音并搜索"美食"
|
||||
2. 观看视频,每个视频约3秒
|
||||
3. 记录描述、点赞数、评论数
|
||||
4. 滑动到下一个视频
|
||||
5. 重复直到观看完10个视频
|
||||
"""
|
||||
|
||||
success = agent.run_learning_task(task)
|
||||
|
||||
# 导出数据
|
||||
agent.export_data("json")
|
||||
agent.export_data("csv")
|
||||
```
|
||||
|
||||
## API 端点
|
||||
|
||||
### 创建会话
|
||||
```http
|
||||
POST /api/video-learning/sessions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"device_id": "emulator-5554",
|
||||
"platform": "douyin",
|
||||
"target_count": 10,
|
||||
"category": "美食",
|
||||
"watch_duration": 3.0
|
||||
}
|
||||
```
|
||||
|
||||
### 启动会话
|
||||
```http
|
||||
POST /api/video-learning/sessions/{session_id}/start
|
||||
```
|
||||
|
||||
### 控制会话
|
||||
```http
|
||||
POST /api/video-learning/sessions/{session_id}/control
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"action": "pause" // pause, resume, stop
|
||||
}
|
||||
```
|
||||
|
||||
### 获取会话状态
|
||||
```http
|
||||
GET /api/video-learning/sessions/{session_id}/status
|
||||
```
|
||||
|
||||
### 获取会话视频列表
|
||||
```http
|
||||
GET /api/video-learning/sessions/{session_id}/videos
|
||||
```
|
||||
|
||||
## 数据结构
|
||||
|
||||
### VideoRecord
|
||||
```python
|
||||
{
|
||||
"sequence_id": 1,
|
||||
"timestamp": "2024-01-09T10:00:00",
|
||||
"screenshot_path": "./video_learning_data/screenshots/...",
|
||||
"watch_duration": 3.0,
|
||||
"description": "视频描述文案",
|
||||
"likes": 1000,
|
||||
"comments": 50,
|
||||
"tags": [],
|
||||
"category": "美食"
|
||||
}
|
||||
```
|
||||
|
||||
### LearningSession
|
||||
```python
|
||||
{
|
||||
"session_id": "session_20240109_100000",
|
||||
"start_time": "2024-01-09T10:00:00",
|
||||
"platform": "douyin",
|
||||
"target_category": "美食",
|
||||
"target_count": 10,
|
||||
"is_active": true,
|
||||
"is_paused": false,
|
||||
"total_videos": 10,
|
||||
"total_duration": 30.0,
|
||||
"records": [...]
|
||||
}
|
||||
```
|
||||
|
||||
## 配置选项
|
||||
|
||||
在 `.env` 文件中配置:
|
||||
|
||||
```bash
|
||||
# 视频学习数据输出目录
|
||||
VIDEO_LEARNING_OUTPUT_DIR=./video_learning_data
|
||||
|
||||
# 模型参数
|
||||
PHONE_AGENT_MAX_TOKENS=3000
|
||||
PHONE_AGENT_TEMPERATURE=0.0
|
||||
PHONE_AGENT_TOP_P=0.85
|
||||
PHONE_AGENT_FREQUENCY_PENALTY=0.2
|
||||
```
|
||||
|
||||
## 后续扩展计划
|
||||
|
||||
### 阶段 2: 高级分析
|
||||
- [ ] 视频内容特征提取
|
||||
- [ ] 常见元素识别
|
||||
- [ ] 视频风格分析
|
||||
- [ ] BGM 识别
|
||||
|
||||
### 阶段 3: 模式学习
|
||||
- [ ] 同类视频模式归纳
|
||||
- [ ] 创作趋势分析
|
||||
- [ ] 热门元素统计
|
||||
- [ ] 最佳实践总结
|
||||
|
||||
### 阶段 4: 创作辅助
|
||||
- [ ] 脚本生成
|
||||
- [ ] 分镜头建议
|
||||
- [ ] 拍摄指导
|
||||
- [ ] 剪辑建议
|
||||
|
||||
## 技术架构
|
||||
|
||||
```
|
||||
VideoLearningAgent
|
||||
├── ModelConfig (VLM 配置)
|
||||
├── LearningSession (会话管理)
|
||||
│ └── VideoRecord[] (视频记录)
|
||||
├── Callbacks (回调函数)
|
||||
│ ├── on_video_watched
|
||||
│ ├── on_progress_update
|
||||
│ └── on_session_complete
|
||||
└── PhoneAgent (底层操作)
|
||||
├── 视觉理解 (VLM)
|
||||
├── 设备控制 (ADB/HDC/iOS)
|
||||
└── 任务执行
|
||||
```
|
||||
|
||||
## 故障排除
|
||||
|
||||
### 问题: 设备未连接
|
||||
- 确保 ADB/HDC 服务正在运行
|
||||
- 检查设备是否通过 USB 连接
|
||||
- 尝试点击 "Refresh" 按钮
|
||||
|
||||
### 问题: 任务无法启动
|
||||
- 检查模型 API 配置
|
||||
- 确保 `.env` 文件正确配置
|
||||
- 查看 Dashboard 控制台日志
|
||||
|
||||
### 问题: 视频信息未采集
|
||||
- 确保 VLM 模型正常工作
|
||||
- 检查网络连接
|
||||
- 增加观看时长
|
||||
|
||||
## 许可证
|
||||
|
||||
MIT License
|
||||
161
examples/video_learning_demo.py
Normal file
161
examples/video_learning_demo.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""
|
||||
Video Learning Agent Demo
|
||||
|
||||
This script demonstrates how to use the VideoLearningAgent to watch
|
||||
and learn from short video platforms like Douyin.
|
||||
|
||||
Usage:
|
||||
python examples/video_learning_demo.py --device-id <device_id> --count 10
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from phone_agent.model.client import ModelConfig
|
||||
from phone_agent.video_learning import VideoLearningAgent
|
||||
|
||||
|
||||
def main():
|
||||
"""Main demo function."""
|
||||
|
||||
# Load configuration from environment
|
||||
base_url = os.getenv("MODEL_BASE_URL", "http://localhost:8000/v1")
|
||||
api_key = os.getenv("MODEL_API_KEY", "your-api-key")
|
||||
model_name = os.getenv("MODEL_NAME", "autoglm-phone-9b")
|
||||
|
||||
# Configuration
|
||||
device_id = os.getenv("DEVICE_ID", "emulator-5554")
|
||||
target_count = int(os.getenv("TARGET_COUNT", "10"))
|
||||
watch_duration = float(os.getenv("WATCH_DURATION", "3.0"))
|
||||
category = os.getenv("CATEGORY", None) # e.g., "美食", "旅行", "搞笑"
|
||||
|
||||
print("=" * 60)
|
||||
print("Video Learning Agent Demo")
|
||||
print("=" * 60)
|
||||
print(f"Device: {device_id}")
|
||||
print(f"Platform: Douyin")
|
||||
print(f"Target videos: {target_count}")
|
||||
print(f"Watch duration: {watch_duration}s per video")
|
||||
if category:
|
||||
print(f"Category filter: {category}")
|
||||
print("=" * 60)
|
||||
|
||||
# Create agent
|
||||
model_config = ModelConfig(
|
||||
base_url=base_url,
|
||||
model_name=model_name,
|
||||
api_key=api_key,
|
||||
lang="cn",
|
||||
)
|
||||
|
||||
agent = VideoLearningAgent(
|
||||
model_config=model_config,
|
||||
platform="douyin",
|
||||
output_dir="./video_learning_data",
|
||||
)
|
||||
|
||||
# Setup callbacks
|
||||
def on_video_watched(record):
|
||||
print(f"\n[Video {record.sequence_id}] Watched!")
|
||||
if record.description:
|
||||
print(f" Description: {record.description}")
|
||||
if record.likes:
|
||||
print(f" Likes: {record.likes}")
|
||||
print(f" Screenshot: {record.screenshot_path}")
|
||||
|
||||
def on_progress_update(current, total):
|
||||
percent = (current / total * 100) if total > 0 else 0
|
||||
print(f"\nProgress: {current}/{total} ({percent:.1f}%)")
|
||||
|
||||
def on_session_complete(session):
|
||||
print("\n" + "=" * 60)
|
||||
print("Session Complete!")
|
||||
print("=" * 60)
|
||||
print(f"Total videos watched: {session.total_videos}")
|
||||
print(f"Total duration: {session.total_duration:.1f}s")
|
||||
print(f"Data saved to: ./video_learning_data/{session.session_id}.json")
|
||||
|
||||
agent.on_video_watched = on_video_watched
|
||||
agent.on_progress_update = on_progress_update
|
||||
agent.on_session_complete = on_session_complete
|
||||
|
||||
# Start session
|
||||
session_id = agent.start_session(
|
||||
device_id=device_id,
|
||||
target_count=target_count,
|
||||
category=category,
|
||||
watch_duration=watch_duration,
|
||||
)
|
||||
|
||||
print(f"\nSession started: {session_id}")
|
||||
print("Starting video watching task...\n")
|
||||
|
||||
# Construct the task
|
||||
if category:
|
||||
task = f"""
|
||||
请帮我学习抖音上的"{category}"类视频。具体任务如下:
|
||||
|
||||
1. 打开抖音应用
|
||||
2. 搜索"{category}"
|
||||
3. 开始观看视频,每个视频观看约{watch_duration}秒
|
||||
4. 记录每个视频的描述、点赞数、评论数等信息
|
||||
5. 滑动到下一个视频
|
||||
6. 重复步骤3-5,直到观看完{target_count}个视频
|
||||
|
||||
请按照以下格式记录每个视频:
|
||||
- 视频序号
|
||||
- 描述文案(屏幕上的文字)
|
||||
- 点赞数(如果有显示)
|
||||
- 评论数(如果有显示)
|
||||
- 截图
|
||||
|
||||
每个视频观看时,请等待{watch_duration}秒后再滑动到下一个。
|
||||
"""
|
||||
else:
|
||||
task = f"""
|
||||
请帮我学习抖音上的推荐视频。具体任务如下:
|
||||
|
||||
1. 打开抖音应用
|
||||
2. 在推荐页开始观看视频,每个视频观看约{watch_duration}秒
|
||||
3. 记录每个视频的描述、点赞数、评论数等信息
|
||||
4. 向上滑动到下一个视频
|
||||
5. 重复步骤3-4,直到观看完{target_count}个视频
|
||||
|
||||
请按照以下格式记录每个视频:
|
||||
- 视频序号
|
||||
- 描述文案(屏幕上的文字)
|
||||
- 点赞数(如果有显示)
|
||||
- 评论数(如果有显示)
|
||||
- 截图
|
||||
|
||||
每个视频观看时,请等待{watch_duration}秒后再滑动到下一个。
|
||||
"""
|
||||
|
||||
# Run the task
|
||||
success = agent.run_learning_task(task)
|
||||
|
||||
if success:
|
||||
print("\n✓ Learning task completed successfully!")
|
||||
|
||||
# Export data
|
||||
json_file = agent.export_data("json")
|
||||
print(f"✓ Data exported to: {json_file}")
|
||||
|
||||
csv_file = agent.export_data("csv")
|
||||
print(f"✓ Data exported to: {csv_file}")
|
||||
|
||||
else:
|
||||
print("\n✗ Learning task failed")
|
||||
|
||||
print("\nSession progress:")
|
||||
progress = agent.get_session_progress()
|
||||
for key, value in progress.items():
|
||||
print(f" {key}: {value}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -7,6 +7,7 @@ using AI models for visual understanding and decision making.
|
||||
|
||||
from phone_agent.agent import AgentConfig, PhoneAgent, StepResult
|
||||
from phone_agent.agent_ios import IOSAgentConfig, IOSPhoneAgent
|
||||
from phone_agent.video_learning import VideoLearningAgent, VideoRecord, LearningSession
|
||||
|
||||
__version__ = "0.1.0"
|
||||
__all__ = [
|
||||
@@ -15,4 +16,7 @@ __all__ = [
|
||||
"AgentConfig",
|
||||
"IOSAgentConfig",
|
||||
"StepResult",
|
||||
"VideoLearningAgent",
|
||||
"VideoRecord",
|
||||
"LearningSession",
|
||||
]
|
||||
|
||||
561
phone_agent/video_learning.py
Normal file
561
phone_agent/video_learning.py
Normal file
@@ -0,0 +1,561 @@
|
||||
"""
|
||||
Video Learning Agent for AutoGLM
|
||||
|
||||
This agent learns from short video platforms (like Douyin/TikTok)
|
||||
by watching videos and collecting information.
|
||||
|
||||
MVP Features:
|
||||
- Automatic video scrolling
|
||||
- Play/Pause control
|
||||
- Screenshot capture for each video
|
||||
- Basic data collection (likes, comments, etc.)
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Any
|
||||
|
||||
from phone_agent import PhoneAgent, AgentConfig
|
||||
from phone_agent.agent import StepResult
|
||||
from phone_agent.model.client import ModelConfig
|
||||
from phone_agent.device_factory import get_device_factory
|
||||
|
||||
|
||||
@dataclass
|
||||
class VideoRecord:
|
||||
"""Record of a watched video."""
|
||||
|
||||
sequence_id: int
|
||||
timestamp: str
|
||||
screenshot_path: Optional[str] = None
|
||||
watch_duration: float = 0.0 # seconds
|
||||
|
||||
# Basic info (extracted via OCR/analysis)
|
||||
description: Optional[str] = None # Video caption/text
|
||||
likes: Optional[int] = None
|
||||
comments: Optional[int] = None
|
||||
shares: Optional[int] = None
|
||||
|
||||
# Content analysis (for future expansion)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
category: Optional[str] = None
|
||||
elements: List[str] = field(default_factory=list)
|
||||
|
||||
# Metadata
|
||||
position_in_session: int = 0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"sequence_id": self.sequence_id,
|
||||
"timestamp": self.timestamp,
|
||||
"screenshot_path": self.screenshot_path,
|
||||
"watch_duration": self.watch_duration,
|
||||
"description": self.description,
|
||||
"likes": self.likes,
|
||||
"comments": self.comments,
|
||||
"shares": self.shares,
|
||||
"tags": self.tags,
|
||||
"category": self.category,
|
||||
"elements": self.elements,
|
||||
"position_in_session": self.position_in_session,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class LearningSession:
|
||||
"""A learning session with multiple videos."""
|
||||
|
||||
session_id: str
|
||||
start_time: str
|
||||
platform: str # "douyin", "tiktok", etc.
|
||||
target_category: Optional[str] = None
|
||||
target_count: int = 10
|
||||
records: List[VideoRecord] = field(default_factory=list)
|
||||
|
||||
# Control flags
|
||||
is_active: bool = True
|
||||
is_paused: bool = False
|
||||
|
||||
# Statistics
|
||||
total_videos: int = 0
|
||||
total_duration: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"session_id": self.session_id,
|
||||
"start_time": self.start_time,
|
||||
"platform": self.platform,
|
||||
"target_category": self.target_category,
|
||||
"target_count": self.target_count,
|
||||
"is_active": self.is_active,
|
||||
"is_paused": self.is_paused,
|
||||
"total_videos": self.total_videos,
|
||||
"total_duration": self.total_duration,
|
||||
"records": [r.to_dict() for r in self.records],
|
||||
}
|
||||
|
||||
|
||||
class VideoLearningAgent:
|
||||
"""
|
||||
Agent for learning from short video platforms.
|
||||
|
||||
MVP Capabilities:
|
||||
- Navigate to video platform
|
||||
- Watch videos automatically
|
||||
- Capture screenshots
|
||||
- Collect basic information
|
||||
- Export learning data
|
||||
"""
|
||||
|
||||
# Platform-specific configurations
|
||||
PLATFORM_CONFIGS = {
|
||||
"douyin": {
|
||||
"package_name": "com.ss.android.ugc.aweme",
|
||||
"activity_hint": "aweme",
|
||||
"scroll_gesture": "up",
|
||||
"like_position": {"x": 0.9, "y": 0.8}, # Relative coordinates
|
||||
"comment_position": {"x": 0.9, "y": 0.7},
|
||||
},
|
||||
"kuaishou": {
|
||||
"package_name": "com.smile.gifmaker",
|
||||
"activity_hint": "gifmaker",
|
||||
"scroll_gesture": "up",
|
||||
"like_position": {"x": 0.9, "y": 0.8},
|
||||
},
|
||||
"tiktok": {
|
||||
"package_name": "com.zhiliaoapp.musically",
|
||||
"activity_hint": "musically",
|
||||
"scroll_gesture": "up",
|
||||
"like_position": {"x": 0.9, "y": 0.8},
|
||||
},
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_config: ModelConfig,
|
||||
platform: str = "douyin",
|
||||
output_dir: str = "./video_learning_data",
|
||||
):
|
||||
"""
|
||||
Initialize Video Learning Agent.
|
||||
|
||||
Args:
|
||||
model_config: Model configuration for VLM
|
||||
platform: Platform name (douyin, kuaishou, tiktok)
|
||||
output_dir: Directory to save screenshots and data
|
||||
"""
|
||||
self.model_config = model_config
|
||||
self.platform = platform
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create screenshots subdirectory
|
||||
self.screenshot_dir = self.output_dir / "screenshots"
|
||||
self.screenshot_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Current session
|
||||
self.current_session: Optional[LearningSession] = None
|
||||
self.video_counter = 0
|
||||
|
||||
# Agent will be created when starting a session
|
||||
self.agent: Optional[PhoneAgent] = None
|
||||
|
||||
# Callbacks for external control
|
||||
self.on_video_watched: Optional[Callable[[VideoRecord], None]] = None
|
||||
self.on_session_complete: Optional[Callable[[LearningSession], None]] = None
|
||||
self.on_progress_update: Optional[Callable[[int, int], None]] = None
|
||||
|
||||
# Video detection: track screenshot changes (simplified)
|
||||
self._last_screenshot_hash: Optional[str] = None
|
||||
|
||||
def start_session(
|
||||
self,
|
||||
device_id: str,
|
||||
target_count: int = 10,
|
||||
category: Optional[str] = None,
|
||||
watch_duration: float = 3.0,
|
||||
max_steps: int = 500,
|
||||
) -> str:
|
||||
"""
|
||||
Start a learning session.
|
||||
|
||||
Args:
|
||||
device_id: Target device ID
|
||||
target_count: Number of videos to watch
|
||||
category: Target category (e.g., "美食", "旅行")
|
||||
watch_duration: How long to watch each video (seconds)
|
||||
max_steps: Maximum execution steps
|
||||
|
||||
Returns:
|
||||
Session ID
|
||||
"""
|
||||
# Create new session
|
||||
session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
self.current_session = LearningSession(
|
||||
session_id=session_id,
|
||||
start_time=datetime.now().isoformat(),
|
||||
platform=self.platform,
|
||||
target_category=category,
|
||||
target_count=target_count,
|
||||
)
|
||||
|
||||
# Configure agent with callbacks
|
||||
agent_config = AgentConfig(
|
||||
device_id=device_id,
|
||||
max_steps=max_steps,
|
||||
lang="cn",
|
||||
step_callback=self._on_step,
|
||||
before_action_callback=self._before_action,
|
||||
)
|
||||
|
||||
# Create phone agent
|
||||
self.agent = PhoneAgent(
|
||||
model_config=self.model_config,
|
||||
agent_config=agent_config,
|
||||
)
|
||||
|
||||
# Store parameters for the task
|
||||
self._watch_duration = watch_duration
|
||||
self._device_id = device_id
|
||||
|
||||
# Reset video detection tracking (simplified)
|
||||
self._last_screenshot_hash = None
|
||||
self.video_counter = 0
|
||||
|
||||
return session_id
|
||||
|
||||
def run_learning_task(self, task: str) -> bool:
|
||||
"""
|
||||
Run the learning task.
|
||||
|
||||
Args:
|
||||
task: Natural language task description
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
if not self.agent or not self.current_session:
|
||||
raise RuntimeError("Session not started. Call start_session() first.")
|
||||
|
||||
try:
|
||||
result = self.agent.run(task)
|
||||
# Mark session as inactive after task completes
|
||||
if self.current_session:
|
||||
self.current_session.is_active = False
|
||||
self._save_session()
|
||||
print(f"[VideoLearning] Session completed. Recorded {self.video_counter} videos.")
|
||||
return bool(result)
|
||||
except Exception as e:
|
||||
print(f"Error during learning: {e}")
|
||||
if self.current_session:
|
||||
self.current_session.is_active = False
|
||||
return False
|
||||
|
||||
def stop_session(self):
|
||||
"""Stop the current learning session."""
|
||||
if self.current_session:
|
||||
self.current_session.is_active = False
|
||||
|
||||
if self.agent:
|
||||
# Agent will stop on next callback check
|
||||
pass
|
||||
|
||||
def pause_session(self):
|
||||
"""Pause the current session (can be resumed)."""
|
||||
if self.current_session:
|
||||
self.current_session.is_paused = True
|
||||
|
||||
def resume_session(self):
|
||||
"""Resume a paused session."""
|
||||
if self.current_session:
|
||||
self.current_session.is_paused = False
|
||||
|
||||
def _on_step(self, result: StepResult) -> Optional[str]:
|
||||
"""
|
||||
Callback after each step.
|
||||
|
||||
Simplified logic:
|
||||
1. Check if we're in the target app using get_current_app()
|
||||
2. Detect screenshot changes
|
||||
3. Record video when screenshot changes
|
||||
|
||||
Args:
|
||||
result: Step execution result
|
||||
|
||||
Returns:
|
||||
"stop" to end session, new task to switch, None to continue
|
||||
"""
|
||||
if not self.current_session:
|
||||
return None
|
||||
|
||||
# Check if session should stop
|
||||
if not self.current_session.is_active:
|
||||
self._save_session()
|
||||
if self.on_session_complete:
|
||||
self.on_session_complete(self.current_session)
|
||||
return "stop"
|
||||
|
||||
# Check if paused
|
||||
if self.current_session.is_paused:
|
||||
return None
|
||||
|
||||
# Check if we've watched enough videos
|
||||
if self.video_counter >= self.current_session.target_count:
|
||||
self.current_session.is_active = False
|
||||
self._save_session()
|
||||
if self.on_session_complete:
|
||||
self.on_session_complete(self.current_session)
|
||||
return "stop"
|
||||
|
||||
try:
|
||||
# Use get_current_app() to detect if we're in target app
|
||||
current_app = get_device_factory().get_current_app(self._device_id)
|
||||
|
||||
# Platform-specific package names
|
||||
platform_packages = {
|
||||
"douyin": ["aweme", "抖音", "douyin"],
|
||||
"kuaishou": ["gifmaker", "快手", "kuaishou"],
|
||||
"tiktok": ["musically", "tiktok"],
|
||||
}
|
||||
packages = platform_packages.get(self.platform, ["aweme"])
|
||||
|
||||
# Check if in target app
|
||||
is_in_target = any(pkg.lower() in current_app.lower() for pkg in packages)
|
||||
|
||||
if not is_in_target:
|
||||
print(f"[VideoLearning] Not in target app: {current_app} (step {result.step_count})")
|
||||
return None
|
||||
|
||||
# Get screenshot
|
||||
screenshot = get_device_factory().get_screenshot(self._device_id)
|
||||
|
||||
# Use full base64 data for hash (more sensitive)
|
||||
current_hash = hashlib.md5(screenshot.base64_data.encode()).hexdigest()
|
||||
|
||||
# Detect screenshot change and record video
|
||||
if self._last_screenshot_hash is None:
|
||||
# First screenshot in target app - record first video
|
||||
self._last_screenshot_hash = current_hash
|
||||
self._record_video_from_screenshot(screenshot)
|
||||
print(f"[VideoLearning] ✓ Recorded video {self.video_counter}/{self.current_session.target_count}")
|
||||
|
||||
# Check if we've reached target after recording
|
||||
if self.video_counter >= self.current_session.target_count:
|
||||
print(f"[VideoLearning] ✓ Target reached! Stopping...")
|
||||
self.current_session.is_active = False
|
||||
self._save_session()
|
||||
return "stop"
|
||||
|
||||
elif current_hash != self._last_screenshot_hash:
|
||||
# Screenshot changed - record new video
|
||||
self._last_screenshot_hash = current_hash
|
||||
self._record_video_from_screenshot(screenshot)
|
||||
print(f"[VideoLearning] ✓ Recorded video {self.video_counter}/{self.current_session.target_count}")
|
||||
|
||||
# Check if we've reached target after recording
|
||||
if self.video_counter >= self.current_session.target_count:
|
||||
print(f"[VideoLearning] ✓ Target reached! Stopping...")
|
||||
self.current_session.is_active = False
|
||||
self._save_session()
|
||||
return "stop"
|
||||
|
||||
except Exception as e:
|
||||
print(f"[VideoLearning] Warning: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def _record_video_from_screenshot(self, screenshot):
|
||||
"""Helper method to record video from screenshot."""
|
||||
import base64
|
||||
screenshot_bytes = base64.b64decode(screenshot.base64_data)
|
||||
self.record_video(
|
||||
screenshot=screenshot_bytes,
|
||||
description=f"Video #{self.video_counter + 1}",
|
||||
)
|
||||
|
||||
def _before_action(self, action: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Callback before executing an action.
|
||||
|
||||
Args:
|
||||
action: Action to execute
|
||||
|
||||
Returns:
|
||||
Modified action or None
|
||||
"""
|
||||
# Could be used for action logging or modification
|
||||
return None
|
||||
|
||||
def record_video(
|
||||
self,
|
||||
screenshot: Optional[bytes] = None,
|
||||
description: Optional[str] = None,
|
||||
likes: Optional[int] = None,
|
||||
comments: Optional[int] = None,
|
||||
) -> VideoRecord:
|
||||
"""
|
||||
Record a watched video.
|
||||
|
||||
Args:
|
||||
screenshot: Screenshot image data
|
||||
description: Video description/caption
|
||||
likes: Number of likes
|
||||
comments: Number of comments
|
||||
|
||||
Returns:
|
||||
VideoRecord object
|
||||
"""
|
||||
self.video_counter += 1
|
||||
|
||||
# Save screenshot if provided
|
||||
screenshot_path = None
|
||||
if screenshot:
|
||||
screenshot_filename = f"{self.current_session.session_id}_video_{self.video_counter}.png"
|
||||
screenshot_full_path = self.screenshot_dir / screenshot_filename
|
||||
# Store relative path for web access: /video-learning-data/screenshots/filename.png
|
||||
screenshot_path = f"/video-learning-data/screenshots/{screenshot_filename}"
|
||||
with open(str(screenshot_full_path), "wb") as f:
|
||||
f.write(screenshot)
|
||||
|
||||
# Create record
|
||||
record = VideoRecord(
|
||||
sequence_id=self.video_counter,
|
||||
timestamp=datetime.now().isoformat(),
|
||||
screenshot_path=screenshot_path,
|
||||
watch_duration=self._watch_duration,
|
||||
description=description,
|
||||
likes=likes,
|
||||
comments=comments,
|
||||
position_in_session=self.video_counter,
|
||||
)
|
||||
|
||||
# Add to session
|
||||
if self.current_session:
|
||||
self.current_session.records.append(record)
|
||||
self.current_session.total_videos = self.video_counter
|
||||
self.current_session.total_duration += self._watch_duration
|
||||
|
||||
# Notify callback
|
||||
if self.on_video_watched:
|
||||
self.on_video_watched(record)
|
||||
|
||||
# Notify progress
|
||||
if self.on_progress_update:
|
||||
self.on_progress_update(self.video_counter, self.current_session.target_count)
|
||||
|
||||
return record
|
||||
|
||||
def _save_session(self):
|
||||
"""Save session data to JSON file."""
|
||||
if not self.current_session:
|
||||
return
|
||||
|
||||
session_file = self.output_dir / f"{self.current_session.session_id}.json"
|
||||
with open(session_file, "w", encoding="utf-8") as f:
|
||||
json.dump(self.current_session.to_dict(), f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"Session saved to {session_file}")
|
||||
|
||||
def export_data(self, format: str = "json") -> str:
|
||||
"""
|
||||
Export session data.
|
||||
|
||||
Args:
|
||||
format: Export format (json, csv)
|
||||
|
||||
Returns:
|
||||
Path to exported file
|
||||
"""
|
||||
if not self.current_session:
|
||||
raise RuntimeError("No session to export")
|
||||
|
||||
if format == "json":
|
||||
return self._export_json()
|
||||
elif format == "csv":
|
||||
return self._export_csv()
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {format}")
|
||||
|
||||
def _export_json(self) -> str:
|
||||
"""Export as JSON."""
|
||||
output_file = self.output_dir / f"{self.current_session.session_id}_export.json"
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(self.current_session.to_dict(), f, ensure_ascii=False, indent=2)
|
||||
return str(output_file)
|
||||
|
||||
def _export_csv(self) -> str:
|
||||
"""Export as CSV."""
|
||||
import csv
|
||||
|
||||
output_file = self.output_dir / f"{self.current_session.session_id}_export.csv"
|
||||
with open(output_file, "w", encoding="utf-8", newline="") as f:
|
||||
if not self.current_session.records:
|
||||
return str(output_file)
|
||||
|
||||
writer = csv.DictWriter(f, fieldnames=self.current_session.records[0].to_dict().keys())
|
||||
writer.writeheader()
|
||||
for record in self.current_session.records:
|
||||
writer.writerow(record.to_dict())
|
||||
|
||||
return str(output_file)
|
||||
|
||||
def get_session_progress(self) -> Dict[str, Any]:
|
||||
"""Get current session progress."""
|
||||
if not self.current_session:
|
||||
return {"status": "no_session"}
|
||||
|
||||
return {
|
||||
"session_id": self.current_session.session_id,
|
||||
"platform": self.current_session.platform,
|
||||
"target_count": self.current_session.target_count,
|
||||
"watched_count": self.video_counter,
|
||||
"progress_percent": (self.video_counter / self.current_session.target_count * 100)
|
||||
if self.current_session.target_count > 0
|
||||
else 0,
|
||||
"is_active": self.current_session.is_active,
|
||||
"is_paused": self.current_session.is_paused,
|
||||
"total_duration": self.current_session.total_duration,
|
||||
}
|
||||
|
||||
|
||||
# Convenience function for standalone usage
|
||||
def create_video_learning_agent(
|
||||
base_url: str,
|
||||
api_key: str,
|
||||
model_name: str = "autoglm-phone-9b",
|
||||
platform: str = "douyin",
|
||||
output_dir: str = "./video_learning_data",
|
||||
**model_kwargs,
|
||||
) -> VideoLearningAgent:
|
||||
"""
|
||||
Create a Video Learning Agent with standard configuration.
|
||||
|
||||
Args:
|
||||
base_url: Model API base URL
|
||||
api_key: API key
|
||||
model_name: Model name
|
||||
platform: Platform name
|
||||
output_dir: Output directory
|
||||
**model_kwargs: Additional model parameters
|
||||
|
||||
Returns:
|
||||
VideoLearningAgent instance
|
||||
"""
|
||||
model_config = ModelConfig(
|
||||
base_url=base_url,
|
||||
model_name=model_name,
|
||||
api_key=api_key,
|
||||
**model_kwargs,
|
||||
)
|
||||
|
||||
return VideoLearningAgent(
|
||||
model_config=model_config,
|
||||
platform=platform,
|
||||
output_dir=output_dir,
|
||||
)
|
||||
35
scripts/run_video_learning_demo.bat
Normal file
35
scripts/run_video_learning_demo.bat
Normal file
@@ -0,0 +1,35 @@
|
||||
@echo off
|
||||
REM Video Learning Demo Script for Windows
|
||||
REM This script starts the dashboard and opens the video learning page
|
||||
|
||||
echo ============================================
|
||||
echo AutoGLM Video Learning Demo
|
||||
echo ============================================
|
||||
echo.
|
||||
echo Starting Dashboard...
|
||||
echo.
|
||||
|
||||
REM Start the dashboard in background
|
||||
start "AutoGLM Dashboard" python -m uvicorn dashboard.main:app --host 0.0.0.0 --port 8080 --reload
|
||||
|
||||
echo Waiting for dashboard to start...
|
||||
timeout /t 3 /nobreak > nul
|
||||
|
||||
echo.
|
||||
echo Dashboard starting at: http://localhost:8080
|
||||
echo Opening Video Learning page in browser...
|
||||
echo.
|
||||
|
||||
REM Open the video learning page
|
||||
start http://localhost:8080/static/video-learning.html
|
||||
|
||||
echo.
|
||||
echo ============================================
|
||||
echo Video Learning Demo is ready!
|
||||
echo ============================================
|
||||
echo.
|
||||
echo Press Ctrl+C to stop the dashboard
|
||||
echo.
|
||||
|
||||
REM Keep the script running
|
||||
pause
|
||||
43
scripts/run_video_learning_demo.sh
Normal file
43
scripts/run_video_learning_demo.sh
Normal file
@@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
# Video Learning Demo Script for Linux/Mac
|
||||
# This script starts the dashboard and opens the video learning page
|
||||
|
||||
echo "============================================"
|
||||
echo "AutoGLM Video Learning Demo"
|
||||
echo "============================================"
|
||||
echo ""
|
||||
echo "Starting Dashboard..."
|
||||
echo ""
|
||||
|
||||
# Start the dashboard in background
|
||||
python -m uvicorn dashboard.main:app --host 0.0.0.0 --port 8080 --reload &
|
||||
DASHBOARD_PID=$!
|
||||
|
||||
echo "Waiting for dashboard to start..."
|
||||
sleep 3
|
||||
|
||||
echo ""
|
||||
echo "Dashboard starting at: http://localhost:8080"
|
||||
echo "Opening Video Learning page in browser..."
|
||||
echo ""
|
||||
|
||||
# Open the video learning page
|
||||
if command -v xdg-open > /dev/null; then
|
||||
xdg-open http://localhost:8080/static/video-learning.html
|
||||
elif command -v open > /dev/null; then
|
||||
open http://localhost:8080/static/video-learning.html
|
||||
else
|
||||
echo "Please open your browser and navigate to:"
|
||||
echo "http://localhost:8080/static/video-learning.html"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "============================================"
|
||||
echo "Video Learning Demo is ready!"
|
||||
echo "============================================"
|
||||
echo ""
|
||||
echo "Press Ctrl+C to stop the dashboard"
|
||||
echo ""
|
||||
|
||||
# Wait for dashboard process
|
||||
wait $DASHBOARD_PID
|
||||
Reference in New Issue
Block a user