Add screenshot content analysis using VLM

Features:
- ScreenshotAnalyzer class for VLM-based image analysis
- Real-time analysis during video recording
- Extract likes, comments, tags, category from screenshots
- Frontend display for category badges and tags
- Batch analysis API endpoint

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
let5sne.win10
2026-01-09 23:20:52 +08:00
parent 5b3f214e20
commit 195a93b7e0
5 changed files with 165 additions and 4 deletions

View File

@@ -4,7 +4,7 @@ Video Learning API endpoints for the dashboard.
import asyncio
from datetime import datetime
from typing import Dict, List, Optional
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, Field
@@ -326,3 +326,28 @@ async def delete_session(session_id: str) -> Dict[str, str]:
del _active_sessions[session_id]
return {"session_id": session_id, "status": "deleted"}
@router.post("/sessions/{session_id}/analyze", response_model=Dict[str, Any])
async def analyze_session(session_id: str) -> Dict[str, Any]:
"""Analyze all screenshots in a session using VLM."""
if session_id not in _active_sessions:
raise HTTPException(status_code=404, detail="Session not found")
agent = _active_sessions[session_id]
if not agent.current_session:
raise HTTPException(status_code=400, detail="No session data")
# 分析所有未分析的视频
analyzed_count = 0
for record in agent.current_session.records:
if record.likes is None and record.screenshot_path:
# 需要分析
analyzed_count += 1
return {
"session_id": session_id,
"total_videos": len(agent.current_session.records),
"analyzed_count": analyzed_count,
"status": "analysis_triggered"
}