# Copyright (C) 2025 AIDC-AI # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ StyleGuard - Visual style consistency engine Ensures consistent visual style across all frames in a video by: 1. Extracting style anchor from the first generated frame 2. Injecting style constraints into subsequent frame prompts 3. (Optional) Using style reference techniques like IP-Adapter """ from dataclasses import dataclass, field from typing import List, Optional from loguru import logger @dataclass class StyleAnchor: """Style anchor extracted from reference frame""" # Core style elements color_palette: str = "" # e.g., "warm earth tones", "cool blues" art_style: str = "" # e.g., "minimalist", "realistic", "anime" composition_style: str = "" # e.g., "centered", "rule of thirds" texture: str = "" # e.g., "smooth", "grainy", "watercolor" lighting: str = "" # e.g., "soft ambient", "dramatic shadows" # Combined style prefix for prompts style_prefix: str = "" # Reference image path (for IP-Adapter style techniques) reference_image: Optional[str] = None def to_prompt_prefix(self) -> str: """Generate a style prefix for image prompts""" if self.style_prefix: return self.style_prefix elements = [] if self.art_style: elements.append(f"{self.art_style} style") if self.color_palette: elements.append(f"{self.color_palette}") if self.lighting: elements.append(f"{self.lighting} lighting") if self.texture: elements.append(f"{self.texture} texture") return ", ".join(elements) if elements else "" def to_dict(self) -> dict: return { "color_palette": self.color_palette, "art_style": self.art_style, "composition_style": self.composition_style, "texture": self.texture, "lighting": self.lighting, "style_prefix": self.style_prefix, "reference_image": self.reference_image, } @dataclass class StyleGuardConfig: """Configuration for StyleGuard""" # Extraction settings extract_from_first_frame: bool = True use_vlm_extraction: bool = True # Application settings apply_to_all_frames: bool = True prefix_position: str = "start" # "start" or "end" # Optional external style reference external_style_image: Optional[str] = None custom_style_prefix: Optional[str] = None class StyleGuard: """ Style consistency guardian for video generation Ensures all frames in a video maintain visual consistency by: 1. Analyzing the first frame (or reference image) to extract style 2. Applying style constraints to all subsequent frame prompts Example: >>> style_guard = StyleGuard(llm_service) >>> >>> # Extract style from first frame >>> anchor = await style_guard.extract_style_anchor( ... image_path="output/frame_001.png" ... ) >>> >>> # Apply to subsequent prompts >>> styled_prompt = style_guard.apply_style( ... prompt="A cat sitting on a windowsill", ... style_anchor=anchor ... ) """ def __init__( self, llm_service=None, config: Optional[StyleGuardConfig] = None ): """ Initialize StyleGuard Args: llm_service: LLM service for VLM-based style extraction config: StyleGuard configuration """ self.llm_service = llm_service self.config = config or StyleGuardConfig() self._current_anchor: Optional[StyleAnchor] = None async def extract_style_anchor( self, image_path: str, ) -> StyleAnchor: """ Extract style anchor from reference image Args: image_path: Path to reference image Returns: StyleAnchor with extracted style elements """ logger.info(f"Extracting style anchor from: {image_path}") if self.config.custom_style_prefix: # Use custom style prefix if provided anchor = StyleAnchor( style_prefix=self.config.custom_style_prefix, reference_image=image_path ) self._current_anchor = anchor return anchor if self.config.use_vlm_extraction and self.llm_service: anchor = await self._extract_with_vlm(image_path) else: anchor = self._extract_basic(image_path) self._current_anchor = anchor logger.info(f"Style anchor extracted: {anchor.to_prompt_prefix()}") return anchor async def _extract_with_vlm(self, image_path: str) -> StyleAnchor: """Extract style using Vision Language Model""" try: # TODO: Implement VLM call when vision-capable LLM is integrated # For now, return a placeholder logger.debug("VLM style extraction: using placeholder (VLM not yet integrated)") # Placeholder extraction based on common styles return StyleAnchor( art_style="consistent artistic", color_palette="harmonious colors", lighting="balanced", style_prefix="maintaining visual consistency, same artistic style as previous frames", reference_image=image_path, ) except Exception as e: logger.warning(f"VLM style extraction failed: {e}") return self._extract_basic(image_path) def _extract_basic(self, image_path: str) -> StyleAnchor: """Basic style extraction without VLM""" # Return generic style anchor return StyleAnchor( style_prefix="consistent visual style", reference_image=image_path, ) def apply_style( self, prompt: str, style_anchor: Optional[StyleAnchor] = None, ) -> str: """ Apply style constraints to an image prompt Args: prompt: Original image prompt style_anchor: Style anchor to apply (uses current if not provided) Returns: Modified prompt with style constraints """ anchor = style_anchor or self._current_anchor if not anchor: return prompt style_prefix = anchor.to_prompt_prefix() if not style_prefix: return prompt if self.config.prefix_position == "start": return f"{style_prefix}, {prompt}" else: return f"{prompt}, {style_prefix}" def apply_style_to_batch( self, prompts: List[str], style_anchor: Optional[StyleAnchor] = None, skip_first: bool = True, ) -> List[str]: """ Apply style constraints to a batch of prompts Args: prompts: List of image prompts style_anchor: Style anchor to apply skip_first: Skip first prompt (used as reference) Returns: List of styled prompts """ if not prompts: return prompts anchor = style_anchor or self._current_anchor if not anchor: return prompts result = [] for i, prompt in enumerate(prompts): if skip_first and i == 0: result.append(prompt) else: result.append(self.apply_style(prompt, anchor)) return result def get_consistency_prompt_suffix(self) -> str: """ Get a consistency prompt suffix for LLM prompt generation This can be added to the LLM prompt when generating image prompts to encourage consistent style descriptions. """ return ( "Ensure all image prompts maintain consistent visual style, " "including similar color palette, art style, lighting, and composition. " "Each image should feel like it belongs to the same visual narrative." ) @property def current_anchor(self) -> Optional[StyleAnchor]: """Get the current style anchor""" return self._current_anchor def reset(self): """Reset the current style anchor""" self._current_anchor = None