feat: Implement Character Memory V1 - VLM analysis and prompt injection

2026-01-07 03:08:29 +08:00
parent da98d0842a
commit b3cf9e64e5
3 changed files with 307 additions and 6 deletions
--- a/api/routers/quality.py
+++ b/api/routers/quality.py
@@ -45,6 +45,20 @@ class CharacterCreateRequest(BaseModel):
    clothing_description: str = Field("", description="Clothing description")
    distinctive_features: List[str] = Field(default_factory=list)
    character_type: str = Field("person")
+    reference_image_path: Optional[str] = Field(None, description="Reference image path for VLM analysis")
+
+
+class CharacterAnalyzeRequest(BaseModel):
+    """Request to analyze a character image"""
+    image_path: str = Field(..., description="Path to the reference image")
+
+
+class CharacterAnalyzeResponse(BaseModel):
+    """Response from character image analysis"""
+    appearance_description: str = ""
+    clothing_description: str = ""
+    distinctive_features: List[str] = []
+    prompt_description: str = ""  # Combined description for prompt injection


 class ContentCheckRequest(BaseModel):
@@ -115,20 +129,49 @@ async def create_character(
    storyboard_id: str = Path(..., description="Storyboard ID"),
    request: CharacterCreateRequest = Body(...)
 ):
-    """Register a new character"""
+    """
+    Register a new character
+    
+    If reference_image_path is provided and appearance_description is empty,
+    VLM will analyze the image to extract appearance descriptions automatically.
+    """
    import uuid
    
    if storyboard_id not in _character_stores:
        _character_stores[storyboard_id] = {}
    
+    # Auto-analyze reference image if provided and no description
+    appearance_desc = request.appearance_description
+    clothing_desc = request.clothing_description
+    distinctive = request.distinctive_features
+    ref_image = request.reference_image_path
+    
+    if ref_image and not appearance_desc:
+        try:
+            from pixelle_video.services.quality.character_analyzer import CharacterAnalyzer
+            analyzer = CharacterAnalyzer()
+            result = await analyzer.analyze_reference_image(ref_image)
+            
+            if result.appearance_description:
+                appearance_desc = result.appearance_description
+            if result.clothing_description:
+                clothing_desc = result.clothing_description
+            if result.distinctive_features:
+                distinctive = result.distinctive_features
+            
+            logger.info(f"Auto-analyzed character from image: {ref_image}")
+        except Exception as e:
+            logger.warning(f"Failed to auto-analyze character image: {e}")
+    
    char_id = f"char_{uuid.uuid4().hex[:8]}"
    character = CharacterSchema(
        id=char_id,
        name=request.name,
-        appearance_description=request.appearance_description,
-        clothing_description=request.clothing_description,
-        distinctive_features=request.distinctive_features,
+        appearance_description=appearance_desc,
+        clothing_description=clothing_desc,
+        distinctive_features=distinctive,
        character_type=request.character_type,
+        reference_image=ref_image,
    )
    
    _character_stores[storyboard_id][char_id] = character.model_dump()
@@ -184,6 +227,34 @@ async def delete_character(
    return {"deleted": True}


+@router.post(
+    "/characters/{storyboard_id}/analyze-image",
+    response_model=CharacterAnalyzeResponse
+)
+async def analyze_character_image(
+    storyboard_id: str = Path(..., description="Storyboard ID"),
+    request: CharacterAnalyzeRequest = Body(...)
+):
+    """
+    Analyze a character reference image using VLM
+    
+    Extracts detailed appearance descriptions that can be used
+    to maintain character consistency across frames.
+    """
+    from pixelle_video.services.quality.character_analyzer import CharacterAnalyzer
+    
+    logger.info(f"Analyzing character image for storyboard {storyboard_id}: {request.image_path}")
+    
+    analyzer = CharacterAnalyzer()
+    result = await analyzer.analyze_reference_image(request.image_path)
+    
+    return CharacterAnalyzeResponse(
+        appearance_description=result.appearance_description,
+        clothing_description=result.clothing_description,
+        distinctive_features=result.distinctive_features,
+        prompt_description=result.to_prompt_description()
+    )
+
 # ============================================================
 # Content Filter Endpoints
 # ============================================================