feat: Implement Character Memory V1 - VLM analysis and prompt injection

2026-01-07 03:08:29 +08:00
parent da98d0842a
commit b3cf9e64e5
3 changed files with 307 additions and 6 deletions
--- a/api/routers/editor.py
+++ b/api/routers/editor.py
@@ -473,8 +473,35 @@ async def regenerate_frame_image(
        else:
            logger.warning(f"[REGEN-IMG] No style anchor found for {storyboard_id}")
        
-        # Apply style prefix to prompt
-        final_prompt = f"{style_prefix}, {prompt}" if style_prefix else prompt
+        # Get character descriptions for prompt injection
+        character_prefix = ""
+        from api.routers.quality import _character_stores
+        if storyboard_id in _character_stores:
+            char_descriptions = []
+            for char_data in _character_stores[storyboard_id].values():
+                appearance = char_data.get("appearance_description", "")
+                clothing = char_data.get("clothing_description", "")
+                name = char_data.get("name", "character")
+                
+                if appearance or clothing:
+                    parts = [f"{name}:"]
+                    if appearance:
+                        parts.append(appearance)
+                    if clothing:
+                        parts.append(f"wearing {clothing}")
+                    char_descriptions.append(" ".join(parts))
+            
+            if char_descriptions:
+                character_prefix = "Characters: " + "; ".join(char_descriptions) + ". "
+                logger.info(f"[REGEN-IMG] Injecting character descriptions: {character_prefix[:80]}...")
+        
+        # Apply style prefix and character descriptions to prompt
+        final_prompt = ""
+        if style_prefix:
+            final_prompt += f"{style_prefix}, "
+        if character_prefix:
+            final_prompt += character_prefix
+        final_prompt += prompt
        logger.info(f"[REGEN-IMG] Final prompt: {final_prompt[:120]}...")
        
        # Use MediaService to generate image via RunningHub workflow
--- a/api/routers/quality.py
+++ b/api/routers/quality.py
@@ -45,6 +45,20 @@ class CharacterCreateRequest(BaseModel):
    clothing_description: str = Field("", description="Clothing description")
    distinctive_features: List[str] = Field(default_factory=list)
    character_type: str = Field("person")
+    reference_image_path: Optional[str] = Field(None, description="Reference image path for VLM analysis")
+
+
+class CharacterAnalyzeRequest(BaseModel):
+    """Request to analyze a character image"""
+    image_path: str = Field(..., description="Path to the reference image")
+
+
+class CharacterAnalyzeResponse(BaseModel):
+    """Response from character image analysis"""
+    appearance_description: str = ""
+    clothing_description: str = ""
+    distinctive_features: List[str] = []
+    prompt_description: str = ""  # Combined description for prompt injection


 class ContentCheckRequest(BaseModel):
@@ -115,20 +129,49 @@ async def create_character(
    storyboard_id: str = Path(..., description="Storyboard ID"),
    request: CharacterCreateRequest = Body(...)
 ):
-    """Register a new character"""
+    """
+    Register a new character
+    
+    If reference_image_path is provided and appearance_description is empty,
+    VLM will analyze the image to extract appearance descriptions automatically.
+    """
    import uuid
    
    if storyboard_id not in _character_stores:
        _character_stores[storyboard_id] = {}
    
+    # Auto-analyze reference image if provided and no description
+    appearance_desc = request.appearance_description
+    clothing_desc = request.clothing_description
+    distinctive = request.distinctive_features
+    ref_image = request.reference_image_path
+    
+    if ref_image and not appearance_desc:
+        try:
+            from pixelle_video.services.quality.character_analyzer import CharacterAnalyzer
+            analyzer = CharacterAnalyzer()
+            result = await analyzer.analyze_reference_image(ref_image)
+            
+            if result.appearance_description:
+                appearance_desc = result.appearance_description
+            if result.clothing_description:
+                clothing_desc = result.clothing_description
+            if result.distinctive_features:
+                distinctive = result.distinctive_features
+            
+            logger.info(f"Auto-analyzed character from image: {ref_image}")
+        except Exception as e:
+            logger.warning(f"Failed to auto-analyze character image: {e}")
+    
    char_id = f"char_{uuid.uuid4().hex[:8]}"
    character = CharacterSchema(
        id=char_id,
        name=request.name,
-        appearance_description=request.appearance_description,
-        clothing_description=request.clothing_description,
-        distinctive_features=request.distinctive_features,
+        appearance_description=appearance_desc,
+        clothing_description=clothing_desc,
+        distinctive_features=distinctive,
        character_type=request.character_type,
+        reference_image=ref_image,
    )
    
    _character_stores[storyboard_id][char_id] = character.model_dump()
@@ -184,6 +227,34 @@ async def delete_character(
    return {"deleted": True}


+@router.post(
+    "/characters/{storyboard_id}/analyze-image",
+    response_model=CharacterAnalyzeResponse
+)
+async def analyze_character_image(
+    storyboard_id: str = Path(..., description="Storyboard ID"),
+    request: CharacterAnalyzeRequest = Body(...)
+):
+    """
+    Analyze a character reference image using VLM
+    
+    Extracts detailed appearance descriptions that can be used
+    to maintain character consistency across frames.
+    """
+    from pixelle_video.services.quality.character_analyzer import CharacterAnalyzer
+    
+    logger.info(f"Analyzing character image for storyboard {storyboard_id}: {request.image_path}")
+    
+    analyzer = CharacterAnalyzer()
+    result = await analyzer.analyze_reference_image(request.image_path)
+    
+    return CharacterAnalyzeResponse(
+        appearance_description=result.appearance_description,
+        clothing_description=result.clothing_description,
+        distinctive_features=result.distinctive_features,
+        prompt_description=result.to_prompt_description()
+    )
+
 # ============================================================
 # Content Filter Endpoints
 # ============================================================