feat: Add comprehensive timeline editor with frame editing and regeneration capabilities

2026-01-05 14:48:43 +08:00
parent 7d78dcd078
commit ca018a9b1f
68 changed files with 14904 additions and 57 deletions
--- a/pixelle_video/services/quality/character_memory.py
+++ b/pixelle_video/services/quality/character_memory.py
@@ -0,0 +1,530 @@
+# Copyright (C) 2025 AIDC-AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+CharacterMemory - Character consistency and memory system
+
+Maintains consistent character appearance across video frames by:
+1. Detecting and registering characters from narrations
+2. Extracting visual descriptions from first appearances
+3. Injecting character consistency prompts into subsequent frames
+4. Supporting reference images for ComfyUI IP-Adapter/ControlNet
+"""
+
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, Set
+from datetime import datetime
+from enum import Enum
+
+from loguru import logger
+
+
+class CharacterType(Enum):
+    """Type of character"""
+    PERSON = "person"           # Human character
+    ANIMAL = "animal"           # Animal character
+    CREATURE = "creature"       # Fantasy/fictional creature
+    OBJECT = "object"           # Personified object
+    ABSTRACT = "abstract"       # Abstract entity
+
+
+@dataclass
+class Character:
+    """
+    Represents a character in the video narrative
+    
+    Stores visual description, reference images, and appearance history
+    to maintain consistency across frames.
+    """
+    
+    # Identity
+    id: str                                    # Unique identifier
+    name: str                                  # Character name (e.g., "小明", "the hero")
+    aliases: List[str] = field(default_factory=list)  # Alternative names
+    character_type: CharacterType = CharacterType.PERSON
+    
+    # Visual description (for prompt injection)
+    appearance_description: str = ""           # Detailed visual description
+    clothing_description: str = ""             # Clothing/outfit description
+    distinctive_features: List[str] = field(default_factory=list)  # Unique features
+    
+    # Reference images (for IP-Adapter/ControlNet)
+    reference_images: List[str] = field(default_factory=list)  # Paths to reference images
+    primary_reference: Optional[str] = None    # Primary reference image
+    
+    # Prompt elements
+    prompt_prefix: str = ""                    # Pre-built prompt prefix
+    negative_prompt: str = ""                  # Negative prompt additions
+    
+    # Metadata
+    is_active: bool = True                     # Whether this character is active for logic
+    first_appearance_frame: int = 0            # Frame index of first appearance
+    appearance_frames: List[int] = field(default_factory=list)  # All frames with this character
+    created_at: Optional[datetime] = None
+    
+    def __post_init__(self):
+        if self.created_at is None:
+            self.created_at = datetime.now()
+        if not hasattr(self, 'is_active'):
+            self.is_active = True
+        if not self.prompt_prefix:
+            self._build_prompt_prefix()
+    
+    def _build_prompt_prefix(self):
+        """Build prompt prefix from visual descriptions"""
+        elements = []
+        
+        if self.appearance_description:
+            elements.append(self.appearance_description)
+        if self.clothing_description:
+            elements.append(f"wearing {self.clothing_description}")
+        if self.distinctive_features:
+            elements.append(", ".join(self.distinctive_features))
+        
+        self.prompt_prefix = ", ".join(elements) if elements else ""
+    
+    def get_prompt_injection(self) -> str:
+        """Get the prompt text to inject for this character"""
+        if self.prompt_prefix:
+            return f"({self.name}: {self.prompt_prefix})"
+        return f"({self.name})"
+    
+    def add_reference_image(self, image_path: str, set_as_primary: bool = False):
+        """Add a reference image for this character"""
+        if image_path not in self.reference_images:
+            self.reference_images.append(image_path)
+        if set_as_primary or self.primary_reference is None:
+            self.primary_reference = image_path
+    
+    def matches_name(self, name: str) -> bool:
+        """Check if a name matches this character"""
+        name_lower = name.lower().strip()
+        if self.name.lower() == name_lower:
+            return True
+        return any(alias.lower() == name_lower for alias in self.aliases)
+    
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "name": self.name,
+            "aliases": self.aliases,
+            "type": self.character_type.value,
+            "appearance_description": self.appearance_description,
+            "clothing_description": self.clothing_description,
+            "distinctive_features": self.distinctive_features,
+            "reference_images": self.reference_images,
+            "primary_reference": self.primary_reference,
+            "prompt_prefix": self.prompt_prefix,
+            "first_appearance_frame": self.first_appearance_frame,
+        }
+
+
+@dataclass
+class CharacterMemoryConfig:
+    """Configuration for character memory system"""
+    
+    # Detection settings
+    auto_detect_characters: bool = True        # Automatically detect characters from narrations
+    use_llm_detection: bool = True             # Use LLM to extract character info
+    
+    # Consistency settings
+    inject_character_prompts: bool = True      # Inject character descriptions into prompts
+    use_reference_images: bool = True          # Use reference images for generation
+    
+    # Reference image settings
+    extract_reference_from_first: bool = True  # Extract reference from first appearance
+    max_reference_images: int = 3              # Max reference images per character
+    
+    # Prompt injection settings
+    prompt_injection_position: str = "start"   # "start" or "end"
+    include_clothing: bool = True              # Include clothing in prompts
+    include_features: bool = True              # Include distinctive features
+
+
+class CharacterMemory:
+    """
+    Character memory and consistency manager
+    
+    Tracks characters across video frames and ensures visual consistency
+    by injecting character descriptions and reference images into the
+    generation pipeline.
+    
+    Example:
+        >>> memory = CharacterMemory(llm_service)
+        >>> 
+        >>> # Register a character
+        >>> char = memory.register_character(
+        ...     name="小明",
+        ...     appearance_description="young man with short black hair",
+        ...     clothing_description="blue t-shirt"
+        ... )
+        >>> 
+        >>> # Apply to prompt
+        >>> enhanced_prompt = memory.apply_to_prompt(
+        ...     prompt="A person walking in the park",
+        ...     characters=["小明"]
+        ... )
+    """
+    
+    def __init__(
+        self,
+        llm_service=None,
+        config: Optional[CharacterMemoryConfig] = None
+    ):
+        """
+        Initialize CharacterMemory
+        
+        Args:
+            llm_service: Optional LLM service for character detection
+            config: Character memory configuration
+        """
+        self.llm_service = llm_service
+        self.config = config or CharacterMemoryConfig()
+        self._characters: Dict[str, Character] = {}
+        self._name_index: Dict[str, str] = {}  # name -> character_id mapping
+    
+    def register_character(
+        self,
+        name: str,
+        appearance_description: str = "",
+        clothing_description: str = "",
+        distinctive_features: Optional[List[str]] = None,
+        character_type: CharacterType = CharacterType.PERSON,
+        first_frame: int = 0,
+    ) -> Character:
+        """
+        Register a new character
+        
+        Args:
+            name: Character name
+            appearance_description: Visual appearance description
+            clothing_description: Clothing/outfit description
+            distinctive_features: List of distinctive features
+            character_type: Type of character
+            first_frame: Frame index of first appearance
+            
+        Returns:
+            Created Character object
+        """
+        # Generate unique ID
+        char_id = f"char_{len(self._characters)}_{name.replace(' ', '_').lower()}"
+        
+        character = Character(
+            id=char_id,
+            name=name,
+            appearance_description=appearance_description,
+            clothing_description=clothing_description,
+            distinctive_features=distinctive_features or [],
+            character_type=character_type,
+            first_appearance_frame=first_frame,
+            appearance_frames=[first_frame],
+        )
+        
+        self._characters[char_id] = character
+        self._name_index[name.lower()] = char_id
+        
+        logger.info(f"Registered character: {name} (id={char_id})")
+        
+        return character
+    
+    def get_character(self, name: str) -> Optional[Character]:
+        """Get a character by name"""
+        name_lower = name.lower().strip()
+        char_id = self._name_index.get(name_lower)
+        if char_id:
+            return self._characters.get(char_id)
+        
+        # Search aliases
+        for char in self._characters.values():
+            if char.matches_name(name):
+                return char
+        
+        return None
+    
+    def get_character_by_id(self, char_id: str) -> Optional[Character]:
+        """Get a character by ID"""
+        return self._characters.get(char_id)
+    
+    @property
+    def characters(self) -> List[Character]:
+        """Get all registered characters"""
+        return list(self._characters.values())
+    
+    async def detect_characters_from_narration(
+        self,
+        narration: str,
+        frame_index: int = 0,
+    ) -> List[Character]:
+        """
+        Detect and register characters mentioned in narration
+        
+        Args:
+            narration: Narration text to analyze
+            frame_index: Current frame index
+            
+        Returns:
+            List of detected/registered characters
+        """
+        if not self.config.auto_detect_characters:
+            return []
+        
+        detected = []
+        
+        if self.config.use_llm_detection and self.llm_service:
+            detected = await self._detect_with_llm(narration, frame_index)
+        else:
+            detected = self._detect_basic(narration, frame_index)
+        
+        return detected
+    
+    async def _detect_with_llm(
+        self,
+        narration: str,
+        frame_index: int,
+    ) -> List[Character]:
+        """Detect characters using LLM"""
+        if not self.llm_service:
+            return []
+        
+        try:
+            prompt = f"""分析以下文案，提取其中提到的角色/人物。
+
+文案: {narration}
+
+请用 JSON 格式返回角色列表，每个角色包含:
+- name: 角色名称或代称
+- type: person/animal/creature/object
+- appearance: 外貌描述（如有）
+- clothing: 服装描述（如有）
+
+如果没有明确角色，返回空列表 []。
+
+只返回 JSON，不要其他解释。"""
+
+            response = await self.llm_service(prompt, temperature=0.1)
+            
+            # Parse response
+            import json
+            import re
+            
+            # Extract JSON from response
+            json_match = re.search(r'\[.*\]', response, re.DOTALL)
+            if json_match:
+                characters_data = json.loads(json_match.group())
+                
+                result = []
+                for char_data in characters_data:
+                    name = char_data.get("name", "").strip()
+                    if not name:
+                        continue
+                    
+                    # Check if already registered
+                    existing = self.get_character(name)
+                    if existing:
+                        existing.appearance_frames.append(frame_index)
+                        result.append(existing)
+                    else:
+                        # Register new character
+                        char_type = CharacterType.PERSON
+                        type_str = char_data.get("type", "person").lower()
+                        if type_str == "animal":
+                            char_type = CharacterType.ANIMAL
+                        elif type_str == "creature":
+                            char_type = CharacterType.CREATURE
+                        
+                        char = self.register_character(
+                            name=name,
+                            appearance_description=char_data.get("appearance", ""),
+                            clothing_description=char_data.get("clothing", ""),
+                            character_type=char_type,
+                            first_frame=frame_index,
+                        )
+                        result.append(char)
+                
+                return result
+            
+            return []
+            
+        except Exception as e:
+            logger.warning(f"LLM character detection failed: {e}")
+            return self._detect_basic(narration, frame_index)
+    
+    def _detect_basic(
+        self,
+        narration: str,
+        frame_index: int,
+    ) -> List[Character]:
+        """Basic character detection without LLM"""
+        # Simple pattern matching for common character references
+        import re
+        
+        patterns = [
+            r'(?:他|她|它)们?',  # Chinese pronouns
+            r'(?:小\w{1,2})',    # Names like 小明, 小红
+            r'(?:老\w{1,2})',    # Names like 老王, 老李
+        ]
+        
+        detected = []
+        for pattern in patterns:
+            matches = re.findall(pattern, narration)
+            for match in matches:
+                existing = self.get_character(match)
+                if existing:
+                    existing.appearance_frames.append(frame_index)
+                    if existing not in detected:
+                        detected.append(existing)
+        
+        return detected
+    
+    def apply_to_prompt(
+        self,
+        prompt: str,
+        character_names: Optional[List[str]] = None,
+        frame_index: Optional[int] = None,
+    ) -> str:
+        """
+        Apply character consistency to an image prompt
+        
+        Args:
+            prompt: Original image prompt
+            character_names: Specific characters to include (None = auto-detect)
+            frame_index: Current frame index for tracking
+            
+        Returns:
+            Enhanced prompt with character consistency
+        """
+        if not self.config.inject_character_prompts:
+            return prompt
+        
+        characters_to_include = []
+        
+        if character_names:
+            for name in character_names:
+                char = self.get_character(name)
+                if char:
+                    characters_to_include.append(char)
+        else:
+            # Include all characters that have appeared
+            characters_to_include = self.characters
+        
+        if not characters_to_include:
+            return prompt
+        
+        # Build character injection
+        injections = []
+        for char in characters_to_include:
+            injection = char.get_prompt_injection()
+            if injection:
+                injections.append(injection)
+            
+            # Track appearance
+            if frame_index is not None and frame_index not in char.appearance_frames:
+                char.appearance_frames.append(frame_index)
+        
+        if not injections:
+            return prompt
+        
+        character_prompt = ", ".join(injections)
+        
+        if self.config.prompt_injection_position == "start":
+            return f"{character_prompt}, {prompt}"
+        else:
+            return f"{prompt}, {character_prompt}"
+    
+    def get_reference_images(
+        self,
+        character_names: Optional[List[str]] = None,
+    ) -> List[str]:
+        """
+        Get reference images for specified characters
+        
+        Args:
+            character_names: Character names (None = all characters)
+            
+        Returns:
+            List of reference image paths
+        """
+        if not self.config.use_reference_images:
+            return []
+        
+        images = []
+        
+        if character_names:
+            for name in character_names:
+                char = self.get_character(name)
+                if char and char.primary_reference:
+                    images.append(char.primary_reference)
+        else:
+            for char in self.characters:
+                if char.primary_reference:
+                    images.append(char.primary_reference)
+        
+        return images[:self.config.max_reference_images]
+    
+    def set_reference_image(
+        self,
+        character_name: str,
+        image_path: str,
+        set_as_primary: bool = True,
+    ):
+        """
+        Set a reference image for a character
+        
+        Args:
+            character_name: Character name
+            image_path: Path to reference image
+            set_as_primary: Whether to set as primary reference
+        """
+        char = self.get_character(character_name)
+        if char:
+            char.add_reference_image(image_path, set_as_primary)
+            logger.debug(f"Set reference image for {character_name}: {image_path}")
+        else:
+            logger.warning(f"Character not found: {character_name}")
+    
+    def update_character_appearance(
+        self,
+        character_name: str,
+        appearance_description: Optional[str] = None,
+        clothing_description: Optional[str] = None,
+        distinctive_features: Optional[List[str]] = None,
+    ):
+        """Update a character's visual description"""
+        char = self.get_character(character_name)
+        if char:
+            if appearance_description:
+                char.appearance_description = appearance_description
+            if clothing_description:
+                char.clothing_description = clothing_description
+            if distinctive_features:
+                char.distinctive_features = distinctive_features
+            char._build_prompt_prefix()
+            logger.debug(f"Updated appearance for {character_name}")
+    
+    def get_consistency_summary(self) -> str:
+        """Get a summary of character consistency for logging"""
+        if not self._characters:
+            return "No characters registered"
+        
+        lines = [f"Characters ({len(self._characters)}):"]
+        for char in self.characters:
+            lines.append(
+                f"  - {char.name}: {len(char.appearance_frames)} appearances, "
+                f"ref_images={len(char.reference_images)}"
+            )
+        return "\n".join(lines)
+    
+    def reset(self):
+        """Clear all character memory"""
+        self._characters.clear()
+        self._name_index.clear()
+        logger.info("Character memory cleared")