feat: Add comprehensive timeline editor with frame editing and regeneration capabilities

2026-01-05 14:48:43 +08:00
parent 7d78dcd078
commit ca018a9b1f
68 changed files with 14904 additions and 57 deletions
--- a/pixelle_video/services/quality/style_guard.py
+++ b/pixelle_video/services/quality/style_guard.py
@@ -0,0 +1,276 @@
+# Copyright (C) 2025 AIDC-AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+StyleGuard - Visual style consistency engine
+
+Ensures consistent visual style across all frames in a video by:
+1. Extracting style anchor from the first generated frame
+2. Injecting style constraints into subsequent frame prompts
+3. (Optional) Using style reference techniques like IP-Adapter
+"""
+
+from dataclasses import dataclass, field
+from typing import List, Optional
+
+from loguru import logger
+
+
+@dataclass
+class StyleAnchor:
+    """Style anchor extracted from reference frame"""
+    
+    # Core style elements
+    color_palette: str = ""           # e.g., "warm earth tones", "cool blues"
+    art_style: str = ""               # e.g., "minimalist", "realistic", "anime"
+    composition_style: str = ""       # e.g., "centered", "rule of thirds"
+    texture: str = ""                 # e.g., "smooth", "grainy", "watercolor"
+    lighting: str = ""                # e.g., "soft ambient", "dramatic shadows"
+    
+    # Combined style prefix for prompts
+    style_prefix: str = ""
+    
+    # Reference image path (for IP-Adapter style techniques)
+    reference_image: Optional[str] = None
+    
+    def to_prompt_prefix(self) -> str:
+        """Generate a style prefix for image prompts"""
+        if self.style_prefix:
+            return self.style_prefix
+        
+        elements = []
+        if self.art_style:
+            elements.append(f"{self.art_style} style")
+        if self.color_palette:
+            elements.append(f"{self.color_palette}")
+        if self.lighting:
+            elements.append(f"{self.lighting} lighting")
+        if self.texture:
+            elements.append(f"{self.texture} texture")
+        
+        return ", ".join(elements) if elements else ""
+    
+    def to_dict(self) -> dict:
+        return {
+            "color_palette": self.color_palette,
+            "art_style": self.art_style,
+            "composition_style": self.composition_style,
+            "texture": self.texture,
+            "lighting": self.lighting,
+            "style_prefix": self.style_prefix,
+            "reference_image": self.reference_image,
+        }
+
+
+@dataclass
+class StyleGuardConfig:
+    """Configuration for StyleGuard"""
+    
+    # Extraction settings
+    extract_from_first_frame: bool = True
+    use_vlm_extraction: bool = True
+    
+    # Application settings
+    apply_to_all_frames: bool = True
+    prefix_position: str = "start"    # "start" or "end"
+    
+    # Optional external style reference
+    external_style_image: Optional[str] = None
+    custom_style_prefix: Optional[str] = None
+
+
+class StyleGuard:
+    """
+    Style consistency guardian for video generation
+    
+    Ensures all frames in a video maintain visual consistency by:
+    1. Analyzing the first frame (or reference image) to extract style
+    2. Applying style constraints to all subsequent frame prompts
+    
+    Example:
+        >>> style_guard = StyleGuard(llm_service)
+        >>> 
+        >>> # Extract style from first frame
+        >>> anchor = await style_guard.extract_style_anchor(
+        ...     image_path="output/frame_001.png"
+        ... )
+        >>> 
+        >>> # Apply to subsequent prompts
+        >>> styled_prompt = style_guard.apply_style(
+        ...     prompt="A cat sitting on a windowsill",
+        ...     style_anchor=anchor
+        ... )
+    """
+    
+    def __init__(
+        self, 
+        llm_service=None,
+        config: Optional[StyleGuardConfig] = None
+    ):
+        """
+        Initialize StyleGuard
+        
+        Args:
+            llm_service: LLM service for VLM-based style extraction
+            config: StyleGuard configuration
+        """
+        self.llm_service = llm_service
+        self.config = config or StyleGuardConfig()
+        self._current_anchor: Optional[StyleAnchor] = None
+    
+    async def extract_style_anchor(
+        self,
+        image_path: str,
+    ) -> StyleAnchor:
+        """
+        Extract style anchor from reference image
+        
+        Args:
+            image_path: Path to reference image
+            
+        Returns:
+            StyleAnchor with extracted style elements
+        """
+        logger.info(f"Extracting style anchor from: {image_path}")
+        
+        if self.config.custom_style_prefix:
+            # Use custom style prefix if provided
+            anchor = StyleAnchor(
+                style_prefix=self.config.custom_style_prefix,
+                reference_image=image_path
+            )
+            self._current_anchor = anchor
+            return anchor
+        
+        if self.config.use_vlm_extraction and self.llm_service:
+            anchor = await self._extract_with_vlm(image_path)
+        else:
+            anchor = self._extract_basic(image_path)
+        
+        self._current_anchor = anchor
+        logger.info(f"Style anchor extracted: {anchor.to_prompt_prefix()}")
+        
+        return anchor
+    
+    async def _extract_with_vlm(self, image_path: str) -> StyleAnchor:
+        """Extract style using Vision Language Model"""
+        try:
+            # TODO: Implement VLM call when vision-capable LLM is integrated
+            # For now, return a placeholder
+            logger.debug("VLM style extraction: using placeholder (VLM not yet integrated)")
+            
+            # Placeholder extraction based on common styles
+            return StyleAnchor(
+                art_style="consistent artistic",
+                color_palette="harmonious colors",
+                lighting="balanced",
+                style_prefix="maintaining visual consistency, same artistic style as previous frames",
+                reference_image=image_path,
+            )
+            
+        except Exception as e:
+            logger.warning(f"VLM style extraction failed: {e}")
+            return self._extract_basic(image_path)
+    
+    def _extract_basic(self, image_path: str) -> StyleAnchor:
+        """Basic style extraction without VLM"""
+        # Return generic style anchor
+        return StyleAnchor(
+            style_prefix="consistent visual style",
+            reference_image=image_path,
+        )
+    
+    def apply_style(
+        self,
+        prompt: str,
+        style_anchor: Optional[StyleAnchor] = None,
+    ) -> str:
+        """
+        Apply style constraints to an image prompt
+        
+        Args:
+            prompt: Original image prompt
+            style_anchor: Style anchor to apply (uses current if not provided)
+            
+        Returns:
+            Modified prompt with style constraints
+        """
+        anchor = style_anchor or self._current_anchor
+        
+        if not anchor:
+            return prompt
+        
+        style_prefix = anchor.to_prompt_prefix()
+        
+        if not style_prefix:
+            return prompt
+        
+        if self.config.prefix_position == "start":
+            return f"{style_prefix}, {prompt}"
+        else:
+            return f"{prompt}, {style_prefix}"
+    
+    def apply_style_to_batch(
+        self,
+        prompts: List[str],
+        style_anchor: Optional[StyleAnchor] = None,
+        skip_first: bool = True,
+    ) -> List[str]:
+        """
+        Apply style constraints to a batch of prompts
+        
+        Args:
+            prompts: List of image prompts
+            style_anchor: Style anchor to apply
+            skip_first: Skip first prompt (used as reference)
+            
+        Returns:
+            List of styled prompts
+        """
+        if not prompts:
+            return prompts
+        
+        anchor = style_anchor or self._current_anchor
+        
+        if not anchor:
+            return prompts
+        
+        result = []
+        for i, prompt in enumerate(prompts):
+            if skip_first and i == 0:
+                result.append(prompt)
+            else:
+                result.append(self.apply_style(prompt, anchor))
+        
+        return result
+    
+    def get_consistency_prompt_suffix(self) -> str:
+        """
+        Get a consistency prompt suffix for LLM prompt generation
+        
+        This can be added to the LLM prompt when generating image prompts
+        to encourage consistent style descriptions.
+        """
+        return (
+            "Ensure all image prompts maintain consistent visual style, "
+            "including similar color palette, art style, lighting, and composition. "
+            "Each image should feel like it belongs to the same visual narrative."
+        )
+    
+    @property
+    def current_anchor(self) -> Optional[StyleAnchor]:
+        """Get the current style anchor"""
+        return self._current_anchor
+    
+    def reset(self):
+        """Reset the current style anchor"""
+        self._current_anchor = None