feat: Implement Character Memory V1 - VLM analysis and prompt injection

This commit is contained in:
empty
2026-01-07 03:08:29 +08:00
parent da98d0842a
commit b3cf9e64e5
3 changed files with 307 additions and 6 deletions

View File

@@ -473,8 +473,35 @@ async def regenerate_frame_image(
else:
logger.warning(f"[REGEN-IMG] No style anchor found for {storyboard_id}")
# Apply style prefix to prompt
final_prompt = f"{style_prefix}, {prompt}" if style_prefix else prompt
# Get character descriptions for prompt injection
character_prefix = ""
from api.routers.quality import _character_stores
if storyboard_id in _character_stores:
char_descriptions = []
for char_data in _character_stores[storyboard_id].values():
appearance = char_data.get("appearance_description", "")
clothing = char_data.get("clothing_description", "")
name = char_data.get("name", "character")
if appearance or clothing:
parts = [f"{name}:"]
if appearance:
parts.append(appearance)
if clothing:
parts.append(f"wearing {clothing}")
char_descriptions.append(" ".join(parts))
if char_descriptions:
character_prefix = "Characters: " + "; ".join(char_descriptions) + ". "
logger.info(f"[REGEN-IMG] Injecting character descriptions: {character_prefix[:80]}...")
# Apply style prefix and character descriptions to prompt
final_prompt = ""
if style_prefix:
final_prompt += f"{style_prefix}, "
if character_prefix:
final_prompt += character_prefix
final_prompt += prompt
logger.info(f"[REGEN-IMG] Final prompt: {final_prompt[:120]}...")
# Use MediaService to generate image via RunningHub workflow

View File

@@ -45,6 +45,20 @@ class CharacterCreateRequest(BaseModel):
clothing_description: str = Field("", description="Clothing description")
distinctive_features: List[str] = Field(default_factory=list)
character_type: str = Field("person")
reference_image_path: Optional[str] = Field(None, description="Reference image path for VLM analysis")
class CharacterAnalyzeRequest(BaseModel):
"""Request to analyze a character image"""
image_path: str = Field(..., description="Path to the reference image")
class CharacterAnalyzeResponse(BaseModel):
"""Response from character image analysis"""
appearance_description: str = ""
clothing_description: str = ""
distinctive_features: List[str] = []
prompt_description: str = "" # Combined description for prompt injection
class ContentCheckRequest(BaseModel):
@@ -115,20 +129,49 @@ async def create_character(
storyboard_id: str = Path(..., description="Storyboard ID"),
request: CharacterCreateRequest = Body(...)
):
"""Register a new character"""
"""
Register a new character
If reference_image_path is provided and appearance_description is empty,
VLM will analyze the image to extract appearance descriptions automatically.
"""
import uuid
if storyboard_id not in _character_stores:
_character_stores[storyboard_id] = {}
# Auto-analyze reference image if provided and no description
appearance_desc = request.appearance_description
clothing_desc = request.clothing_description
distinctive = request.distinctive_features
ref_image = request.reference_image_path
if ref_image and not appearance_desc:
try:
from pixelle_video.services.quality.character_analyzer import CharacterAnalyzer
analyzer = CharacterAnalyzer()
result = await analyzer.analyze_reference_image(ref_image)
if result.appearance_description:
appearance_desc = result.appearance_description
if result.clothing_description:
clothing_desc = result.clothing_description
if result.distinctive_features:
distinctive = result.distinctive_features
logger.info(f"Auto-analyzed character from image: {ref_image}")
except Exception as e:
logger.warning(f"Failed to auto-analyze character image: {e}")
char_id = f"char_{uuid.uuid4().hex[:8]}"
character = CharacterSchema(
id=char_id,
name=request.name,
appearance_description=request.appearance_description,
clothing_description=request.clothing_description,
distinctive_features=request.distinctive_features,
appearance_description=appearance_desc,
clothing_description=clothing_desc,
distinctive_features=distinctive,
character_type=request.character_type,
reference_image=ref_image,
)
_character_stores[storyboard_id][char_id] = character.model_dump()
@@ -184,6 +227,34 @@ async def delete_character(
return {"deleted": True}
@router.post(
"/characters/{storyboard_id}/analyze-image",
response_model=CharacterAnalyzeResponse
)
async def analyze_character_image(
storyboard_id: str = Path(..., description="Storyboard ID"),
request: CharacterAnalyzeRequest = Body(...)
):
"""
Analyze a character reference image using VLM
Extracts detailed appearance descriptions that can be used
to maintain character consistency across frames.
"""
from pixelle_video.services.quality.character_analyzer import CharacterAnalyzer
logger.info(f"Analyzing character image for storyboard {storyboard_id}: {request.image_path}")
analyzer = CharacterAnalyzer()
result = await analyzer.analyze_reference_image(request.image_path)
return CharacterAnalyzeResponse(
appearance_description=result.appearance_description,
clothing_description=result.clothing_description,
distinctive_features=result.distinctive_features,
prompt_description=result.to_prompt_description()
)
# ============================================================
# Content Filter Endpoints
# ============================================================