# Copyright (C) 2025 AIDC-AI # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ CharacterMemory - Character consistency and memory system Maintains consistent character appearance across video frames by: 1. Detecting and registering characters from narrations 2. Extracting visual descriptions from first appearances 3. Injecting character consistency prompts into subsequent frames 4. Supporting reference images for ComfyUI IP-Adapter/ControlNet """ from dataclasses import dataclass, field from typing import List, Dict, Optional, Set from datetime import datetime from enum import Enum from loguru import logger class CharacterType(Enum): """Type of character""" PERSON = "person" # Human character ANIMAL = "animal" # Animal character CREATURE = "creature" # Fantasy/fictional creature OBJECT = "object" # Personified object ABSTRACT = "abstract" # Abstract entity @dataclass class Character: """ Represents a character in the video narrative Stores visual description, reference images, and appearance history to maintain consistency across frames. """ # Identity id: str # Unique identifier name: str # Character name (e.g., "小明", "the hero") aliases: List[str] = field(default_factory=list) # Alternative names character_type: CharacterType = CharacterType.PERSON # Visual description (for prompt injection) appearance_description: str = "" # Detailed visual description clothing_description: str = "" # Clothing/outfit description distinctive_features: List[str] = field(default_factory=list) # Unique features # Reference images (for IP-Adapter/ControlNet) reference_images: List[str] = field(default_factory=list) # Paths to reference images primary_reference: Optional[str] = None # Primary reference image # Prompt elements prompt_prefix: str = "" # Pre-built prompt prefix negative_prompt: str = "" # Negative prompt additions # Metadata is_active: bool = True # Whether this character is active for logic first_appearance_frame: int = 0 # Frame index of first appearance appearance_frames: List[int] = field(default_factory=list) # All frames with this character created_at: Optional[datetime] = None def __post_init__(self): if self.created_at is None: self.created_at = datetime.now() if not hasattr(self, 'is_active'): self.is_active = True if not self.prompt_prefix: self._build_prompt_prefix() def _build_prompt_prefix(self): """Build prompt prefix from visual descriptions""" elements = [] if self.appearance_description: elements.append(self.appearance_description) if self.clothing_description: elements.append(f"wearing {self.clothing_description}") if self.distinctive_features: elements.append(", ".join(self.distinctive_features)) self.prompt_prefix = ", ".join(elements) if elements else "" def get_prompt_injection(self) -> str: """Get the prompt text to inject for this character""" if self.prompt_prefix: return f"({self.name}: {self.prompt_prefix})" return f"({self.name})" def add_reference_image(self, image_path: str, set_as_primary: bool = False): """Add a reference image for this character""" if image_path not in self.reference_images: self.reference_images.append(image_path) if set_as_primary or self.primary_reference is None: self.primary_reference = image_path def matches_name(self, name: str) -> bool: """Check if a name matches this character""" name_lower = name.lower().strip() if self.name.lower() == name_lower: return True return any(alias.lower() == name_lower for alias in self.aliases) def to_dict(self) -> dict: return { "id": self.id, "name": self.name, "aliases": self.aliases, "type": self.character_type.value, "appearance_description": self.appearance_description, "clothing_description": self.clothing_description, "distinctive_features": self.distinctive_features, "reference_images": self.reference_images, "primary_reference": self.primary_reference, "prompt_prefix": self.prompt_prefix, "first_appearance_frame": self.first_appearance_frame, } @dataclass class CharacterMemoryConfig: """Configuration for character memory system""" # Detection settings auto_detect_characters: bool = True # Automatically detect characters from narrations use_llm_detection: bool = True # Use LLM to extract character info # Consistency settings inject_character_prompts: bool = True # Inject character descriptions into prompts use_reference_images: bool = True # Use reference images for generation # Reference image settings extract_reference_from_first: bool = True # Extract reference from first appearance max_reference_images: int = 3 # Max reference images per character # Prompt injection settings prompt_injection_position: str = "start" # "start" or "end" include_clothing: bool = True # Include clothing in prompts include_features: bool = True # Include distinctive features class CharacterMemory: """ Character memory and consistency manager Tracks characters across video frames and ensures visual consistency by injecting character descriptions and reference images into the generation pipeline. Example: >>> memory = CharacterMemory(llm_service) >>> >>> # Register a character >>> char = memory.register_character( ... name="小明", ... appearance_description="young man with short black hair", ... clothing_description="blue t-shirt" ... ) >>> >>> # Apply to prompt >>> enhanced_prompt = memory.apply_to_prompt( ... prompt="A person walking in the park", ... characters=["小明"] ... ) """ def __init__( self, llm_service=None, config: Optional[CharacterMemoryConfig] = None ): """ Initialize CharacterMemory Args: llm_service: Optional LLM service for character detection config: Character memory configuration """ self.llm_service = llm_service self.config = config or CharacterMemoryConfig() self._characters: Dict[str, Character] = {} self._name_index: Dict[str, str] = {} # name -> character_id mapping def register_character( self, name: str, appearance_description: str = "", clothing_description: str = "", distinctive_features: Optional[List[str]] = None, character_type: CharacterType = CharacterType.PERSON, first_frame: int = 0, ) -> Character: """ Register a new character Args: name: Character name appearance_description: Visual appearance description clothing_description: Clothing/outfit description distinctive_features: List of distinctive features character_type: Type of character first_frame: Frame index of first appearance Returns: Created Character object """ # Generate unique ID char_id = f"char_{len(self._characters)}_{name.replace(' ', '_').lower()}" character = Character( id=char_id, name=name, appearance_description=appearance_description, clothing_description=clothing_description, distinctive_features=distinctive_features or [], character_type=character_type, first_appearance_frame=first_frame, appearance_frames=[first_frame], ) self._characters[char_id] = character self._name_index[name.lower()] = char_id logger.info(f"Registered character: {name} (id={char_id})") return character def get_character(self, name: str) -> Optional[Character]: """Get a character by name""" name_lower = name.lower().strip() char_id = self._name_index.get(name_lower) if char_id: return self._characters.get(char_id) # Search aliases for char in self._characters.values(): if char.matches_name(name): return char return None def get_character_by_id(self, char_id: str) -> Optional[Character]: """Get a character by ID""" return self._characters.get(char_id) @property def characters(self) -> List[Character]: """Get all registered characters""" return list(self._characters.values()) async def detect_characters_from_narration( self, narration: str, frame_index: int = 0, ) -> List[Character]: """ Detect and register characters mentioned in narration Args: narration: Narration text to analyze frame_index: Current frame index Returns: List of detected/registered characters """ if not self.config.auto_detect_characters: return [] detected = [] if self.config.use_llm_detection and self.llm_service: detected = await self._detect_with_llm(narration, frame_index) else: detected = self._detect_basic(narration, frame_index) return detected async def _detect_with_llm( self, narration: str, frame_index: int, ) -> List[Character]: """Detect characters using LLM""" if not self.llm_service: return [] try: prompt = f"""分析以下文案,提取其中提到的角色/人物。 文案: {narration} 请用 JSON 格式返回角色列表,每个角色包含: - name: 角色名称或代称 - type: person/animal/creature/object - appearance: 外貌描述(如有) - clothing: 服装描述(如有) 如果没有明确角色,返回空列表 []。 只返回 JSON,不要其他解释。""" response = await self.llm_service(prompt, temperature=0.1) # Parse response import json import re # Extract JSON from response json_match = re.search(r'\[.*\]', response, re.DOTALL) if json_match: characters_data = json.loads(json_match.group()) result = [] for char_data in characters_data: name = char_data.get("name", "").strip() if not name: continue # Check if already registered existing = self.get_character(name) if existing: existing.appearance_frames.append(frame_index) result.append(existing) else: # Register new character char_type = CharacterType.PERSON type_str = char_data.get("type", "person").lower() if type_str == "animal": char_type = CharacterType.ANIMAL elif type_str == "creature": char_type = CharacterType.CREATURE char = self.register_character( name=name, appearance_description=char_data.get("appearance", ""), clothing_description=char_data.get("clothing", ""), character_type=char_type, first_frame=frame_index, ) result.append(char) return result return [] except Exception as e: logger.warning(f"LLM character detection failed: {e}") return self._detect_basic(narration, frame_index) def _detect_basic( self, narration: str, frame_index: int, ) -> List[Character]: """Basic character detection without LLM""" # Simple pattern matching for common character references import re patterns = [ r'(?:他|她|它)们?', # Chinese pronouns r'(?:小\w{1,2})', # Names like 小明, 小红 r'(?:老\w{1,2})', # Names like 老王, 老李 ] detected = [] for pattern in patterns: matches = re.findall(pattern, narration) for match in matches: existing = self.get_character(match) if existing: existing.appearance_frames.append(frame_index) if existing not in detected: detected.append(existing) return detected def apply_to_prompt( self, prompt: str, character_names: Optional[List[str]] = None, frame_index: Optional[int] = None, ) -> str: """ Apply character consistency to an image prompt Args: prompt: Original image prompt character_names: Specific characters to include (None = auto-detect) frame_index: Current frame index for tracking Returns: Enhanced prompt with character consistency """ if not self.config.inject_character_prompts: return prompt characters_to_include = [] if character_names: for name in character_names: char = self.get_character(name) if char: characters_to_include.append(char) else: # Include all characters that have appeared characters_to_include = self.characters if not characters_to_include: return prompt # Build character injection injections = [] for char in characters_to_include: injection = char.get_prompt_injection() if injection: injections.append(injection) # Track appearance if frame_index is not None and frame_index not in char.appearance_frames: char.appearance_frames.append(frame_index) if not injections: return prompt character_prompt = ", ".join(injections) if self.config.prompt_injection_position == "start": return f"{character_prompt}, {prompt}" else: return f"{prompt}, {character_prompt}" def get_reference_images( self, character_names: Optional[List[str]] = None, ) -> List[str]: """ Get reference images for specified characters Args: character_names: Character names (None = all characters) Returns: List of reference image paths """ if not self.config.use_reference_images: return [] images = [] if character_names: for name in character_names: char = self.get_character(name) if char and char.primary_reference: images.append(char.primary_reference) else: for char in self.characters: if char.primary_reference: images.append(char.primary_reference) return images[:self.config.max_reference_images] def set_reference_image( self, character_name: str, image_path: str, set_as_primary: bool = True, ): """ Set a reference image for a character Args: character_name: Character name image_path: Path to reference image set_as_primary: Whether to set as primary reference """ char = self.get_character(character_name) if char: char.add_reference_image(image_path, set_as_primary) logger.debug(f"Set reference image for {character_name}: {image_path}") else: logger.warning(f"Character not found: {character_name}") def update_character_appearance( self, character_name: str, appearance_description: Optional[str] = None, clothing_description: Optional[str] = None, distinctive_features: Optional[List[str]] = None, ): """Update a character's visual description""" char = self.get_character(character_name) if char: if appearance_description: char.appearance_description = appearance_description if clothing_description: char.clothing_description = clothing_description if distinctive_features: char.distinctive_features = distinctive_features char._build_prompt_prefix() logger.debug(f"Updated appearance for {character_name}") def get_consistency_summary(self) -> str: """Get a summary of character consistency for logging""" if not self._characters: return "No characters registered" lines = [f"Characters ({len(self._characters)}):"] for char in self.characters: lines.append( f" - {char.name}: {len(char.appearance_frames)} appearances, " f"ref_images={len(char.reference_images)}" ) return "\n".join(lines) def reset(self): """Clear all character memory""" self._characters.clear() self._name_index.clear() logger.info("Character memory cleared")