feat: Add comprehensive timeline editor with frame editing and regeneration capabilities
This commit is contained in:
530
pixelle_video/services/quality/character_memory.py
Normal file
530
pixelle_video/services/quality/character_memory.py
Normal file
@@ -0,0 +1,530 @@
|
||||
# Copyright (C) 2025 AIDC-AI
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
CharacterMemory - Character consistency and memory system
|
||||
|
||||
Maintains consistent character appearance across video frames by:
|
||||
1. Detecting and registering characters from narrations
|
||||
2. Extracting visual descriptions from first appearances
|
||||
3. Injecting character consistency prompts into subsequent frames
|
||||
4. Supporting reference images for ComfyUI IP-Adapter/ControlNet
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Optional, Set
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class CharacterType(Enum):
|
||||
"""Type of character"""
|
||||
PERSON = "person" # Human character
|
||||
ANIMAL = "animal" # Animal character
|
||||
CREATURE = "creature" # Fantasy/fictional creature
|
||||
OBJECT = "object" # Personified object
|
||||
ABSTRACT = "abstract" # Abstract entity
|
||||
|
||||
|
||||
@dataclass
|
||||
class Character:
|
||||
"""
|
||||
Represents a character in the video narrative
|
||||
|
||||
Stores visual description, reference images, and appearance history
|
||||
to maintain consistency across frames.
|
||||
"""
|
||||
|
||||
# Identity
|
||||
id: str # Unique identifier
|
||||
name: str # Character name (e.g., "小明", "the hero")
|
||||
aliases: List[str] = field(default_factory=list) # Alternative names
|
||||
character_type: CharacterType = CharacterType.PERSON
|
||||
|
||||
# Visual description (for prompt injection)
|
||||
appearance_description: str = "" # Detailed visual description
|
||||
clothing_description: str = "" # Clothing/outfit description
|
||||
distinctive_features: List[str] = field(default_factory=list) # Unique features
|
||||
|
||||
# Reference images (for IP-Adapter/ControlNet)
|
||||
reference_images: List[str] = field(default_factory=list) # Paths to reference images
|
||||
primary_reference: Optional[str] = None # Primary reference image
|
||||
|
||||
# Prompt elements
|
||||
prompt_prefix: str = "" # Pre-built prompt prefix
|
||||
negative_prompt: str = "" # Negative prompt additions
|
||||
|
||||
# Metadata
|
||||
is_active: bool = True # Whether this character is active for logic
|
||||
first_appearance_frame: int = 0 # Frame index of first appearance
|
||||
appearance_frames: List[int] = field(default_factory=list) # All frames with this character
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.created_at is None:
|
||||
self.created_at = datetime.now()
|
||||
if not hasattr(self, 'is_active'):
|
||||
self.is_active = True
|
||||
if not self.prompt_prefix:
|
||||
self._build_prompt_prefix()
|
||||
|
||||
def _build_prompt_prefix(self):
|
||||
"""Build prompt prefix from visual descriptions"""
|
||||
elements = []
|
||||
|
||||
if self.appearance_description:
|
||||
elements.append(self.appearance_description)
|
||||
if self.clothing_description:
|
||||
elements.append(f"wearing {self.clothing_description}")
|
||||
if self.distinctive_features:
|
||||
elements.append(", ".join(self.distinctive_features))
|
||||
|
||||
self.prompt_prefix = ", ".join(elements) if elements else ""
|
||||
|
||||
def get_prompt_injection(self) -> str:
|
||||
"""Get the prompt text to inject for this character"""
|
||||
if self.prompt_prefix:
|
||||
return f"({self.name}: {self.prompt_prefix})"
|
||||
return f"({self.name})"
|
||||
|
||||
def add_reference_image(self, image_path: str, set_as_primary: bool = False):
|
||||
"""Add a reference image for this character"""
|
||||
if image_path not in self.reference_images:
|
||||
self.reference_images.append(image_path)
|
||||
if set_as_primary or self.primary_reference is None:
|
||||
self.primary_reference = image_path
|
||||
|
||||
def matches_name(self, name: str) -> bool:
|
||||
"""Check if a name matches this character"""
|
||||
name_lower = name.lower().strip()
|
||||
if self.name.lower() == name_lower:
|
||||
return True
|
||||
return any(alias.lower() == name_lower for alias in self.aliases)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"aliases": self.aliases,
|
||||
"type": self.character_type.value,
|
||||
"appearance_description": self.appearance_description,
|
||||
"clothing_description": self.clothing_description,
|
||||
"distinctive_features": self.distinctive_features,
|
||||
"reference_images": self.reference_images,
|
||||
"primary_reference": self.primary_reference,
|
||||
"prompt_prefix": self.prompt_prefix,
|
||||
"first_appearance_frame": self.first_appearance_frame,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class CharacterMemoryConfig:
|
||||
"""Configuration for character memory system"""
|
||||
|
||||
# Detection settings
|
||||
auto_detect_characters: bool = True # Automatically detect characters from narrations
|
||||
use_llm_detection: bool = True # Use LLM to extract character info
|
||||
|
||||
# Consistency settings
|
||||
inject_character_prompts: bool = True # Inject character descriptions into prompts
|
||||
use_reference_images: bool = True # Use reference images for generation
|
||||
|
||||
# Reference image settings
|
||||
extract_reference_from_first: bool = True # Extract reference from first appearance
|
||||
max_reference_images: int = 3 # Max reference images per character
|
||||
|
||||
# Prompt injection settings
|
||||
prompt_injection_position: str = "start" # "start" or "end"
|
||||
include_clothing: bool = True # Include clothing in prompts
|
||||
include_features: bool = True # Include distinctive features
|
||||
|
||||
|
||||
class CharacterMemory:
|
||||
"""
|
||||
Character memory and consistency manager
|
||||
|
||||
Tracks characters across video frames and ensures visual consistency
|
||||
by injecting character descriptions and reference images into the
|
||||
generation pipeline.
|
||||
|
||||
Example:
|
||||
>>> memory = CharacterMemory(llm_service)
|
||||
>>>
|
||||
>>> # Register a character
|
||||
>>> char = memory.register_character(
|
||||
... name="小明",
|
||||
... appearance_description="young man with short black hair",
|
||||
... clothing_description="blue t-shirt"
|
||||
... )
|
||||
>>>
|
||||
>>> # Apply to prompt
|
||||
>>> enhanced_prompt = memory.apply_to_prompt(
|
||||
... prompt="A person walking in the park",
|
||||
... characters=["小明"]
|
||||
... )
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm_service=None,
|
||||
config: Optional[CharacterMemoryConfig] = None
|
||||
):
|
||||
"""
|
||||
Initialize CharacterMemory
|
||||
|
||||
Args:
|
||||
llm_service: Optional LLM service for character detection
|
||||
config: Character memory configuration
|
||||
"""
|
||||
self.llm_service = llm_service
|
||||
self.config = config or CharacterMemoryConfig()
|
||||
self._characters: Dict[str, Character] = {}
|
||||
self._name_index: Dict[str, str] = {} # name -> character_id mapping
|
||||
|
||||
def register_character(
|
||||
self,
|
||||
name: str,
|
||||
appearance_description: str = "",
|
||||
clothing_description: str = "",
|
||||
distinctive_features: Optional[List[str]] = None,
|
||||
character_type: CharacterType = CharacterType.PERSON,
|
||||
first_frame: int = 0,
|
||||
) -> Character:
|
||||
"""
|
||||
Register a new character
|
||||
|
||||
Args:
|
||||
name: Character name
|
||||
appearance_description: Visual appearance description
|
||||
clothing_description: Clothing/outfit description
|
||||
distinctive_features: List of distinctive features
|
||||
character_type: Type of character
|
||||
first_frame: Frame index of first appearance
|
||||
|
||||
Returns:
|
||||
Created Character object
|
||||
"""
|
||||
# Generate unique ID
|
||||
char_id = f"char_{len(self._characters)}_{name.replace(' ', '_').lower()}"
|
||||
|
||||
character = Character(
|
||||
id=char_id,
|
||||
name=name,
|
||||
appearance_description=appearance_description,
|
||||
clothing_description=clothing_description,
|
||||
distinctive_features=distinctive_features or [],
|
||||
character_type=character_type,
|
||||
first_appearance_frame=first_frame,
|
||||
appearance_frames=[first_frame],
|
||||
)
|
||||
|
||||
self._characters[char_id] = character
|
||||
self._name_index[name.lower()] = char_id
|
||||
|
||||
logger.info(f"Registered character: {name} (id={char_id})")
|
||||
|
||||
return character
|
||||
|
||||
def get_character(self, name: str) -> Optional[Character]:
|
||||
"""Get a character by name"""
|
||||
name_lower = name.lower().strip()
|
||||
char_id = self._name_index.get(name_lower)
|
||||
if char_id:
|
||||
return self._characters.get(char_id)
|
||||
|
||||
# Search aliases
|
||||
for char in self._characters.values():
|
||||
if char.matches_name(name):
|
||||
return char
|
||||
|
||||
return None
|
||||
|
||||
def get_character_by_id(self, char_id: str) -> Optional[Character]:
|
||||
"""Get a character by ID"""
|
||||
return self._characters.get(char_id)
|
||||
|
||||
@property
|
||||
def characters(self) -> List[Character]:
|
||||
"""Get all registered characters"""
|
||||
return list(self._characters.values())
|
||||
|
||||
async def detect_characters_from_narration(
|
||||
self,
|
||||
narration: str,
|
||||
frame_index: int = 0,
|
||||
) -> List[Character]:
|
||||
"""
|
||||
Detect and register characters mentioned in narration
|
||||
|
||||
Args:
|
||||
narration: Narration text to analyze
|
||||
frame_index: Current frame index
|
||||
|
||||
Returns:
|
||||
List of detected/registered characters
|
||||
"""
|
||||
if not self.config.auto_detect_characters:
|
||||
return []
|
||||
|
||||
detected = []
|
||||
|
||||
if self.config.use_llm_detection and self.llm_service:
|
||||
detected = await self._detect_with_llm(narration, frame_index)
|
||||
else:
|
||||
detected = self._detect_basic(narration, frame_index)
|
||||
|
||||
return detected
|
||||
|
||||
async def _detect_with_llm(
|
||||
self,
|
||||
narration: str,
|
||||
frame_index: int,
|
||||
) -> List[Character]:
|
||||
"""Detect characters using LLM"""
|
||||
if not self.llm_service:
|
||||
return []
|
||||
|
||||
try:
|
||||
prompt = f"""分析以下文案,提取其中提到的角色/人物。
|
||||
|
||||
文案: {narration}
|
||||
|
||||
请用 JSON 格式返回角色列表,每个角色包含:
|
||||
- name: 角色名称或代称
|
||||
- type: person/animal/creature/object
|
||||
- appearance: 外貌描述(如有)
|
||||
- clothing: 服装描述(如有)
|
||||
|
||||
如果没有明确角色,返回空列表 []。
|
||||
|
||||
只返回 JSON,不要其他解释。"""
|
||||
|
||||
response = await self.llm_service(prompt, temperature=0.1)
|
||||
|
||||
# Parse response
|
||||
import json
|
||||
import re
|
||||
|
||||
# Extract JSON from response
|
||||
json_match = re.search(r'\[.*\]', response, re.DOTALL)
|
||||
if json_match:
|
||||
characters_data = json.loads(json_match.group())
|
||||
|
||||
result = []
|
||||
for char_data in characters_data:
|
||||
name = char_data.get("name", "").strip()
|
||||
if not name:
|
||||
continue
|
||||
|
||||
# Check if already registered
|
||||
existing = self.get_character(name)
|
||||
if existing:
|
||||
existing.appearance_frames.append(frame_index)
|
||||
result.append(existing)
|
||||
else:
|
||||
# Register new character
|
||||
char_type = CharacterType.PERSON
|
||||
type_str = char_data.get("type", "person").lower()
|
||||
if type_str == "animal":
|
||||
char_type = CharacterType.ANIMAL
|
||||
elif type_str == "creature":
|
||||
char_type = CharacterType.CREATURE
|
||||
|
||||
char = self.register_character(
|
||||
name=name,
|
||||
appearance_description=char_data.get("appearance", ""),
|
||||
clothing_description=char_data.get("clothing", ""),
|
||||
character_type=char_type,
|
||||
first_frame=frame_index,
|
||||
)
|
||||
result.append(char)
|
||||
|
||||
return result
|
||||
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM character detection failed: {e}")
|
||||
return self._detect_basic(narration, frame_index)
|
||||
|
||||
def _detect_basic(
|
||||
self,
|
||||
narration: str,
|
||||
frame_index: int,
|
||||
) -> List[Character]:
|
||||
"""Basic character detection without LLM"""
|
||||
# Simple pattern matching for common character references
|
||||
import re
|
||||
|
||||
patterns = [
|
||||
r'(?:他|她|它)们?', # Chinese pronouns
|
||||
r'(?:小\w{1,2})', # Names like 小明, 小红
|
||||
r'(?:老\w{1,2})', # Names like 老王, 老李
|
||||
]
|
||||
|
||||
detected = []
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, narration)
|
||||
for match in matches:
|
||||
existing = self.get_character(match)
|
||||
if existing:
|
||||
existing.appearance_frames.append(frame_index)
|
||||
if existing not in detected:
|
||||
detected.append(existing)
|
||||
|
||||
return detected
|
||||
|
||||
def apply_to_prompt(
|
||||
self,
|
||||
prompt: str,
|
||||
character_names: Optional[List[str]] = None,
|
||||
frame_index: Optional[int] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Apply character consistency to an image prompt
|
||||
|
||||
Args:
|
||||
prompt: Original image prompt
|
||||
character_names: Specific characters to include (None = auto-detect)
|
||||
frame_index: Current frame index for tracking
|
||||
|
||||
Returns:
|
||||
Enhanced prompt with character consistency
|
||||
"""
|
||||
if not self.config.inject_character_prompts:
|
||||
return prompt
|
||||
|
||||
characters_to_include = []
|
||||
|
||||
if character_names:
|
||||
for name in character_names:
|
||||
char = self.get_character(name)
|
||||
if char:
|
||||
characters_to_include.append(char)
|
||||
else:
|
||||
# Include all characters that have appeared
|
||||
characters_to_include = self.characters
|
||||
|
||||
if not characters_to_include:
|
||||
return prompt
|
||||
|
||||
# Build character injection
|
||||
injections = []
|
||||
for char in characters_to_include:
|
||||
injection = char.get_prompt_injection()
|
||||
if injection:
|
||||
injections.append(injection)
|
||||
|
||||
# Track appearance
|
||||
if frame_index is not None and frame_index not in char.appearance_frames:
|
||||
char.appearance_frames.append(frame_index)
|
||||
|
||||
if not injections:
|
||||
return prompt
|
||||
|
||||
character_prompt = ", ".join(injections)
|
||||
|
||||
if self.config.prompt_injection_position == "start":
|
||||
return f"{character_prompt}, {prompt}"
|
||||
else:
|
||||
return f"{prompt}, {character_prompt}"
|
||||
|
||||
def get_reference_images(
|
||||
self,
|
||||
character_names: Optional[List[str]] = None,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Get reference images for specified characters
|
||||
|
||||
Args:
|
||||
character_names: Character names (None = all characters)
|
||||
|
||||
Returns:
|
||||
List of reference image paths
|
||||
"""
|
||||
if not self.config.use_reference_images:
|
||||
return []
|
||||
|
||||
images = []
|
||||
|
||||
if character_names:
|
||||
for name in character_names:
|
||||
char = self.get_character(name)
|
||||
if char and char.primary_reference:
|
||||
images.append(char.primary_reference)
|
||||
else:
|
||||
for char in self.characters:
|
||||
if char.primary_reference:
|
||||
images.append(char.primary_reference)
|
||||
|
||||
return images[:self.config.max_reference_images]
|
||||
|
||||
def set_reference_image(
|
||||
self,
|
||||
character_name: str,
|
||||
image_path: str,
|
||||
set_as_primary: bool = True,
|
||||
):
|
||||
"""
|
||||
Set a reference image for a character
|
||||
|
||||
Args:
|
||||
character_name: Character name
|
||||
image_path: Path to reference image
|
||||
set_as_primary: Whether to set as primary reference
|
||||
"""
|
||||
char = self.get_character(character_name)
|
||||
if char:
|
||||
char.add_reference_image(image_path, set_as_primary)
|
||||
logger.debug(f"Set reference image for {character_name}: {image_path}")
|
||||
else:
|
||||
logger.warning(f"Character not found: {character_name}")
|
||||
|
||||
def update_character_appearance(
|
||||
self,
|
||||
character_name: str,
|
||||
appearance_description: Optional[str] = None,
|
||||
clothing_description: Optional[str] = None,
|
||||
distinctive_features: Optional[List[str]] = None,
|
||||
):
|
||||
"""Update a character's visual description"""
|
||||
char = self.get_character(character_name)
|
||||
if char:
|
||||
if appearance_description:
|
||||
char.appearance_description = appearance_description
|
||||
if clothing_description:
|
||||
char.clothing_description = clothing_description
|
||||
if distinctive_features:
|
||||
char.distinctive_features = distinctive_features
|
||||
char._build_prompt_prefix()
|
||||
logger.debug(f"Updated appearance for {character_name}")
|
||||
|
||||
def get_consistency_summary(self) -> str:
|
||||
"""Get a summary of character consistency for logging"""
|
||||
if not self._characters:
|
||||
return "No characters registered"
|
||||
|
||||
lines = [f"Characters ({len(self._characters)}):"]
|
||||
for char in self.characters:
|
||||
lines.append(
|
||||
f" - {char.name}: {len(char.appearance_frames)} appearances, "
|
||||
f"ref_images={len(char.reference_images)}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
def reset(self):
|
||||
"""Clear all character memory"""
|
||||
self._characters.clear()
|
||||
self._name_index.clear()
|
||||
logger.info("Character memory cleared")
|
||||
Reference in New Issue
Block a user