diff --git a/.env.example b/.env.example index cbc3ed6..e984cd0 100644 --- a/.env.example +++ b/.env.example @@ -108,3 +108,16 @@ SCREENSHOT_THROTTLE_MS=500 # Maximum task history to keep / 保留的最大任务历史数 MAX_TASK_HISTORY=100 + +# ============================================================================ +# Video Learning Configuration / 视频学习配置 +# ============================================================================ + +# Output directory for video learning data / 视频学习数据输出目录 +VIDEO_LEARNING_OUTPUT_DIR=./video_learning_data + +# Model parameters for video learning / 视频学习模型参数 +PHONE_AGENT_MAX_TOKENS=3000 +PHONE_AGENT_TEMPERATURE=0.0 +PHONE_AGENT_TOP_P=0.85 +PHONE_AGENT_FREQUENCY_PENALTY=0.2 diff --git a/dashboard/api/__init__.py b/dashboard/api/__init__.py index 1689499..df1260c 100644 --- a/dashboard/api/__init__.py +++ b/dashboard/api/__init__.py @@ -5,9 +5,11 @@ API endpoints for the dashboard. from dashboard.api.devices import router as devices_router from dashboard.api.tasks import router as tasks_router from dashboard.api.websocket import router as websocket_router +from dashboard.api.video_learning import router as video_learning_router __all__ = [ "devices_router", "tasks_router", "websocket_router", + "video_learning_router", ] diff --git a/dashboard/api/video_learning.py b/dashboard/api/video_learning.py new file mode 100644 index 0000000..6f907c2 --- /dev/null +++ b/dashboard/api/video_learning.py @@ -0,0 +1,328 @@ +""" +Video Learning API endpoints for the dashboard. +""" + +import asyncio +from datetime import datetime +from typing import Dict, List, Optional + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel, Field + +from dashboard.config import config +from dashboard.dependencies import get_device_manager +from dashboard.services.device_manager import DeviceManager +from phone_agent import VideoLearningAgent +from phone_agent.model.client import ModelConfig + +router = APIRouter(prefix="/api/video-learning", tags=["video-learning"]) + + +class SessionCreateRequest(BaseModel): + """Request to create a new learning session.""" + + device_id: str = Field(..., description="Target device ID") + platform: str = Field("douyin", description="Platform name (douyin, kuaishou, tiktok)") + target_count: int = Field(10, description="Number of videos to watch", ge=1, le=100) + category: Optional[str] = Field(None, description="Target category filter") + watch_duration: float = Field(3.0, description="Watch duration per video (seconds)", ge=1.0, le=30.0) + + +class SessionControlRequest(BaseModel): + """Request to control a session.""" + + action: str = Field(..., description="Action: pause, resume, stop") + + +class SessionStatus(BaseModel): + """Session status response.""" + + session_id: str + platform: str + target_count: int + watched_count: int + progress_percent: float + is_active: bool + is_paused: bool + total_duration: float + current_video: Optional[Dict] = None + + +class VideoInfo(BaseModel): + """Information about a watched video.""" + + sequence_id: int + timestamp: str + screenshot_path: Optional[str] = None + watch_duration: float + description: Optional[str] = None + likes: Optional[int] = None + comments: Optional[int] = None + tags: List[str] = [] + category: Optional[str] = None + + +# Global session storage (in production, use database) +_active_sessions: Dict[str, VideoLearningAgent] = {} + + +@router.post("/sessions", response_model=Dict[str, str]) +async def create_session( + request: SessionCreateRequest, + device_manager: DeviceManager = Depends(get_device_manager), +) -> Dict[str, str]: + """Create a new video learning session.""" + # Check device availability + device = await device_manager.get_device(request.device_id) + if not device: + raise HTTPException(status_code=404, detail="Device not found") + + if not device.is_connected: + raise HTTPException(status_code=400, detail="Device not connected") + + if device.status == "busy": + raise HTTPException(status_code=409, detail="Device is busy") + + # Create model config from environment + model_config = ModelConfig( + base_url=config.MODEL_BASE_URL, + model_name=config.MODEL_NAME, + api_key=config.MODEL_API_KEY, + max_tokens=config.MAX_TOKENS, + temperature=config.TEMPERATURE, + top_p=config.TOP_P, + frequency_penalty=config.FREQUENCY_PENALTY, + lang="cn", + ) + + # Create video learning agent + agent = VideoLearningAgent( + model_config=model_config, + platform=request.platform, + output_dir=config.VIDEO_LEARNING_OUTPUT_DIR, + ) + + # Setup callbacks for real-time updates + session_id = None + + def on_video_watched(record): + """Callback when a video is watched.""" + # Broadcast via WebSocket + if session_id: + # This would be integrated with WebSocket manager + pass + + def on_progress_update(current, total): + """Callback for progress updates.""" + if session_id: + # Broadcast progress + pass + + def on_session_complete(session): + """Callback when session completes.""" + if session_id and session_id in _active_sessions: + del _active_sessions[session_id] + + agent.on_video_watched = on_video_watched + agent.on_progress_update = on_progress_update + agent.on_session_complete = on_session_complete + + # Start session + session_id = agent.start_session( + device_id=request.device_id, + target_count=request.target_count, + category=request.category, + watch_duration=request.watch_duration, + max_steps=500, + ) + + # Store session + _active_sessions[session_id] = agent + + return {"session_id": session_id, "status": "created"} + + +@router.post("/sessions/{session_id}/start", response_model=Dict[str, str]) +async def start_session(session_id: str) -> Dict[str, str]: + """Start executing a learning session.""" + if session_id not in _active_sessions: + raise HTTPException(status_code=404, detail="Session not found") + + agent = _active_sessions[session_id] + + # Build task based on session parameters + session = agent.current_session + if not session: + raise HTTPException(status_code=400, detail="Session not initialized") + + category = session.target_category + target_count = session.target_count + watch_duration = agent._watch_duration + platform = agent.platform + + # Platform-specific app name and package + platform_info = { + "douyin": { + "name": "抖音", + "package": "com.ss.android.ugc.aweme", + }, + "kuaishou": { + "name": "快手", + "package": "com.smile.gifmaker", + }, + "tiktok": { + "name": "TikTok", + "package": "com.zhiliaoapp.musically", + }, + } + + info = platform_info.get(platform, platform_info["douyin"]) + app_name = info["name"] + + # Build clear task instructions + if category: + task = f"""你是一个视频学习助手。请严格按照以下步骤执行: + +步骤1:启动应用 +- 回到主屏幕 +- 打开{app_name}应用 + +步骤2:搜索内容 +- 在{app_name}中搜索"{category}" +- 点击第一个搜索结果或进入相关页面 + +步骤3:观看视频 +- 观看视频,每个视频停留约{watch_duration}秒 +- 记录视频的描述、点赞数、评论数 +- 向上滑动切换到下一个视频 +- 重复观看和记录,直到完成{target_count}个视频 + +步骤4:完成任务 +- 完成观看{target_count}个视频后,总结所有视频信息 + +请现在开始执行。""" + else: + task = f"""你是一个视频学习助手。请严格按照以下步骤执行: + +步骤1:启动应用 +- 回到主屏幕 +- 打开{app_name}应用 + +步骤2:观看推荐视频 +- 进入{app_name}的推荐页面 +- 观看推荐视频,每个视频停留约{watch_duration}秒 +- 记录视频的描述、点赞数、评论数 +- 向上滑动切换到下一个视频 +- 重复观看和记录,直到完成{target_count}个视频 + +步骤3:完成任务 +- 完成观看{target_count}个视频后,总结所有视频信息 + +请现在开始执行。""" + + # Run in background + asyncio.create_task(asyncio.to_thread(agent.run_learning_task, task)) + + return {"session_id": session_id, "status": "started"} + + +@router.post("/sessions/{session_id}/control", response_model=Dict[str, str]) +async def control_session( + session_id: str, request: SessionControlRequest +) -> Dict[str, str]: + """Control a learning session (pause/resume/stop).""" + if session_id not in _active_sessions: + raise HTTPException(status_code=404, detail="Session not found") + + agent = _active_sessions[session_id] + + if request.action == "pause": + agent.pause_session() + return {"session_id": session_id, "status": "paused"} + elif request.action == "resume": + agent.resume_session() + return {"session_id": session_id, "status": "resumed"} + elif request.action == "stop": + agent.stop_session() + # Remove from active sessions + del _active_sessions[session_id] + return {"session_id": session_id, "status": "stopped"} + else: + raise HTTPException(status_code=400, detail=f"Invalid action: {request.action}") + + +@router.get("/sessions/{session_id}/status", response_model=SessionStatus) +async def get_session_status(session_id: str) -> SessionStatus: + """Get session status.""" + if session_id not in _active_sessions: + raise HTTPException(status_code=404, detail="Session not found") + + agent = _active_sessions[session_id] + progress = agent.get_session_progress() + + # Get current video info if available + current_video = None + if agent.current_session and agent.current_session.records: + latest = agent.current_session.records[-1] + current_video = { + "sequence_id": latest.sequence_id, + "timestamp": latest.timestamp, + "screenshot_path": latest.screenshot_path, + "description": latest.description, + "likes": latest.likes, + "comments": latest.comments, + } + + return SessionStatus( + session_id=progress["session_id"], + platform=progress["platform"], + target_count=progress["target_count"], + watched_count=progress["watched_count"], + progress_percent=progress["progress_percent"], + is_active=progress["is_active"], + is_paused=progress["is_paused"], + total_duration=progress["total_duration"], + current_video=current_video, + ) + + +@router.get("/sessions/{session_id}/videos", response_model=List[VideoInfo]) +async def get_session_videos(session_id: str) -> List[VideoInfo]: + """Get all videos from a session.""" + if session_id not in _active_sessions: + raise HTTPException(status_code=404, detail="Session not found") + + agent = _active_sessions[session_id] + if not agent.current_session: + return [] + + return [ + VideoInfo( + sequence_id=r.sequence_id, + timestamp=r.timestamp, + screenshot_path=r.screenshot_path, + watch_duration=r.watch_duration, + description=r.description, + likes=r.likes, + comments=r.comments, + tags=r.tags, + category=r.category, + ) + for r in agent.current_session.records + ] + + +@router.get("/sessions", response_model=List[str]) +async def list_sessions() -> List[str]: + """List all active session IDs.""" + return list(_active_sessions.keys()) + + +@router.delete("/sessions/{session_id}", response_model=Dict[str, str]) +async def delete_session(session_id: str) -> Dict[str, str]: + """Delete a session.""" + if session_id not in _active_sessions: + raise HTTPException(status_code=404, detail="Session not found") + + del _active_sessions[session_id] + return {"session_id": session_id, "status": "deleted"} diff --git a/dashboard/config.py b/dashboard/config.py index 3cffe2f..fd8f30a 100644 --- a/dashboard/config.py +++ b/dashboard/config.py @@ -39,6 +39,13 @@ class DashboardConfig: MODEL_BASE_URL: str = os.getenv("PHONE_AGENT_BASE_URL", "http://localhost:8000/v1") MODEL_NAME: str = os.getenv("PHONE_AGENT_MODEL", "autoglm-phone-9b") MODEL_API_KEY: str = os.getenv("PHONE_AGENT_API_KEY", "EMPTY") + MAX_TOKENS: int = int(os.getenv("PHONE_AGENT_MAX_TOKENS", "3000")) + TEMPERATURE: float = float(os.getenv("PHONE_AGENT_TEMPERATURE", "0.0")) + TOP_P: float = float(os.getenv("PHONE_AGENT_TOP_P", "0.85")) + FREQUENCY_PENALTY: float = float(os.getenv("PHONE_AGENT_FREQUENCY_PENALTY", "0.2")) + + # Video learning settings + VIDEO_LEARNING_OUTPUT_DIR: str = os.getenv("VIDEO_LEARNING_OUTPUT_DIR", "./video_learning_data") # Task history MAX_TASK_HISTORY: int = int(os.getenv("MAX_TASK_HISTORY", "100")) diff --git a/dashboard/main.py b/dashboard/main.py index acc1aea..e47341e 100644 --- a/dashboard/main.py +++ b/dashboard/main.py @@ -16,7 +16,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, JSONResponse from fastapi.staticfiles import StaticFiles -from dashboard.api import devices_router, tasks_router, websocket_router +from dashboard.api import devices_router, tasks_router, websocket_router, video_learning_router from dashboard.config import config from dashboard.dependencies import ( get_device_manager, @@ -104,6 +104,7 @@ async def global_exception_handler(request: Request, exc: Exception): app.include_router(devices_router, prefix="/api") app.include_router(tasks_router, prefix="/api") app.include_router(websocket_router) +app.include_router(video_learning_router) # Health check @@ -163,6 +164,12 @@ if static_path.exists(): app.mount("/static", StaticFiles(directory=str(static_path)), name="static") +# Mount static files for video learning screenshots +video_learning_data_path = Path(config.VIDEO_LEARNING_OUTPUT_DIR) +if video_learning_data_path.exists(): + app.mount("/video-learning-data", StaticFiles(directory=str(video_learning_data_path)), name="video-learning-data") + + # Run script entry point if __name__ == "__main__": import uvicorn diff --git a/dashboard/static/css/video-learning.css b/dashboard/static/css/video-learning.css new file mode 100644 index 0000000..2ce29fa --- /dev/null +++ b/dashboard/static/css/video-learning.css @@ -0,0 +1,283 @@ +/* Video Learning Module Styles */ + +/* Header modifications */ +.header h1 { + display: flex; + align-items: center; + gap: 0.75rem; +} + +/* Configuration Section */ +.config-section { + background-color: var(--card-bg); + border: 1px solid var(--border-color); + border-radius: 12px; + padding: 2rem; + max-width: 800px; + margin: 0 auto; +} + +.config-form { + display: flex; + flex-direction: column; + gap: 1.5rem; +} + +.form-group { + display: flex; + flex-direction: column; + gap: 0.5rem; +} + +.form-group label { + font-size: 0.875rem; + font-weight: 500; + color: var(--text-primary); +} + +.form-group select, +.form-group input { + padding: 0.75rem 1rem; + background-color: var(--bg-color); + border: 1px solid var(--border-color); + border-radius: 8px; + color: var(--text-primary); + font-size: 0.95rem; +} + +.form-group select:focus, +.form-group input:focus { + outline: none; + border-color: var(--primary-color); +} + +.form-group select:disabled, +.form-group input:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.form-group small { + font-size: 0.75rem; + color: var(--text-secondary); +} + +.form-row { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 1rem; +} + +/* Session Section */ +.session-section { + background-color: var(--card-bg); + border: 1px solid var(--border-color); + border-radius: 12px; + padding: 2rem; +} + +.session-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 1.5rem; +} + +.session-header h2 { + font-size: 1.25rem; + font-weight: 600; + color: var(--text-primary); +} + +.session-controls { + display: flex; + gap: 0.5rem; +} + +/* Progress Section */ +.progress-section { + background-color: var(--bg-color); + border-radius: 8px; + padding: 1.5rem; + margin-bottom: 1.5rem; +} + +.progress-info { + display: flex; + justify-content: space-between; + margin-bottom: 0.5rem; + font-size: 0.875rem; + color: var(--text-secondary); +} + +.progress-bar-large { + height: 8px; + background-color: rgba(99, 102, 241, 0.2); + border-radius: 4px; + overflow: hidden; +} + +.progress-fill { + height: 100%; + background-color: var(--primary-color); + transition: width 0.3s ease; +} + +.progress-stats { + margin-top: 0.5rem; + font-size: 0.8rem; + color: var(--text-secondary); +} + +/* Current Video */ +.current-video { + margin-bottom: 2rem; +} + +.current-video h3 { + font-size: 1rem; + font-weight: 600; + color: var(--text-primary); + margin-bottom: 1rem; +} + +/* Video Cards */ +.video-card { + background-color: var(--bg-color); + border: 1px solid var(--border-color); + border-radius: 8px; + overflow: hidden; + transition: border-color 0.2s; +} + +.video-card:hover { + border-color: var(--primary-color); +} + +.video-screenshot { + width: 100%; + aspect-ratio: 9/16; + background-color: #000; + overflow: hidden; +} + +.video-screenshot img { + width: 100%; + height: 100%; + object-fit: contain; +} + +.video-placeholder { + width: 100%; + aspect-ratio: 9/16; + background-color: var(--bg-color); + display: flex; + align-items: center; + justify-content: center; + color: var(--text-secondary); +} + +.video-info { + padding: 1rem; +} + +.video-id { + font-size: 0.75rem; + font-weight: 600; + color: var(--primary-color); + margin-bottom: 0.5rem; +} + +.video-description { + font-size: 0.875rem; + color: var(--text-primary); + margin-bottom: 0.5rem; + line-height: 1.4; +} + +.video-stats { + display: flex; + gap: 1rem; + font-size: 0.75rem; + color: var(--text-secondary); +} + +.video-stats span { + display: flex; + align-items: center; + gap: 0.25rem; +} + +.video-stats svg { + flex-shrink: 0; +} + +/* Session Complete */ +.session-complete { + text-align: center; + padding: 3rem 2rem; +} + +.complete-icon { + display: flex; + justify-content: center; + margin-bottom: 1rem; + color: var(--success-color); +} + +.session-complete h3 { + font-size: 1.5rem; + font-weight: 600; + color: var(--text-primary); + margin-bottom: 0.5rem; +} + +.session-complete p { + color: var(--text-secondary); + margin-bottom: 1.5rem; +} + +/* Video Grid */ +.video-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); + gap: 1rem; +} + +.video-grid .video-card { + font-size: 0.875rem; +} + +.video-grid .video-screenshot, +.video-grid .video-placeholder { + aspect-ratio: 9/16; +} + +/* History Section */ +.history-section { + margin-top: 2rem; +} + +.history-section h2 { + font-size: 1.25rem; + font-weight: 600; + color: var(--text-primary); + margin-bottom: 1rem; +} + +/* Responsive */ +@media (max-width: 768px) { + .form-row { + grid-template-columns: 1fr; + } + + .session-header { + flex-direction: column; + gap: 1rem; + align-items: flex-start; + } + + .video-grid { + grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); + } +} diff --git a/dashboard/static/index.html b/dashboard/static/index.html index b927fbb..980ac81 100644 --- a/dashboard/static/index.html +++ b/dashboard/static/index.html @@ -41,6 +41,13 @@