feat: Add comprehensive timeline editor with frame editing and regeneration capabilities
This commit is contained in:
276
pixelle_video/services/quality/style_guard.py
Normal file
276
pixelle_video/services/quality/style_guard.py
Normal file
@@ -0,0 +1,276 @@
|
||||
# Copyright (C) 2025 AIDC-AI
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
StyleGuard - Visual style consistency engine
|
||||
|
||||
Ensures consistent visual style across all frames in a video by:
|
||||
1. Extracting style anchor from the first generated frame
|
||||
2. Injecting style constraints into subsequent frame prompts
|
||||
3. (Optional) Using style reference techniques like IP-Adapter
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
@dataclass
|
||||
class StyleAnchor:
|
||||
"""Style anchor extracted from reference frame"""
|
||||
|
||||
# Core style elements
|
||||
color_palette: str = "" # e.g., "warm earth tones", "cool blues"
|
||||
art_style: str = "" # e.g., "minimalist", "realistic", "anime"
|
||||
composition_style: str = "" # e.g., "centered", "rule of thirds"
|
||||
texture: str = "" # e.g., "smooth", "grainy", "watercolor"
|
||||
lighting: str = "" # e.g., "soft ambient", "dramatic shadows"
|
||||
|
||||
# Combined style prefix for prompts
|
||||
style_prefix: str = ""
|
||||
|
||||
# Reference image path (for IP-Adapter style techniques)
|
||||
reference_image: Optional[str] = None
|
||||
|
||||
def to_prompt_prefix(self) -> str:
|
||||
"""Generate a style prefix for image prompts"""
|
||||
if self.style_prefix:
|
||||
return self.style_prefix
|
||||
|
||||
elements = []
|
||||
if self.art_style:
|
||||
elements.append(f"{self.art_style} style")
|
||||
if self.color_palette:
|
||||
elements.append(f"{self.color_palette}")
|
||||
if self.lighting:
|
||||
elements.append(f"{self.lighting} lighting")
|
||||
if self.texture:
|
||||
elements.append(f"{self.texture} texture")
|
||||
|
||||
return ", ".join(elements) if elements else ""
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"color_palette": self.color_palette,
|
||||
"art_style": self.art_style,
|
||||
"composition_style": self.composition_style,
|
||||
"texture": self.texture,
|
||||
"lighting": self.lighting,
|
||||
"style_prefix": self.style_prefix,
|
||||
"reference_image": self.reference_image,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class StyleGuardConfig:
|
||||
"""Configuration for StyleGuard"""
|
||||
|
||||
# Extraction settings
|
||||
extract_from_first_frame: bool = True
|
||||
use_vlm_extraction: bool = True
|
||||
|
||||
# Application settings
|
||||
apply_to_all_frames: bool = True
|
||||
prefix_position: str = "start" # "start" or "end"
|
||||
|
||||
# Optional external style reference
|
||||
external_style_image: Optional[str] = None
|
||||
custom_style_prefix: Optional[str] = None
|
||||
|
||||
|
||||
class StyleGuard:
|
||||
"""
|
||||
Style consistency guardian for video generation
|
||||
|
||||
Ensures all frames in a video maintain visual consistency by:
|
||||
1. Analyzing the first frame (or reference image) to extract style
|
||||
2. Applying style constraints to all subsequent frame prompts
|
||||
|
||||
Example:
|
||||
>>> style_guard = StyleGuard(llm_service)
|
||||
>>>
|
||||
>>> # Extract style from first frame
|
||||
>>> anchor = await style_guard.extract_style_anchor(
|
||||
... image_path="output/frame_001.png"
|
||||
... )
|
||||
>>>
|
||||
>>> # Apply to subsequent prompts
|
||||
>>> styled_prompt = style_guard.apply_style(
|
||||
... prompt="A cat sitting on a windowsill",
|
||||
... style_anchor=anchor
|
||||
... )
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm_service=None,
|
||||
config: Optional[StyleGuardConfig] = None
|
||||
):
|
||||
"""
|
||||
Initialize StyleGuard
|
||||
|
||||
Args:
|
||||
llm_service: LLM service for VLM-based style extraction
|
||||
config: StyleGuard configuration
|
||||
"""
|
||||
self.llm_service = llm_service
|
||||
self.config = config or StyleGuardConfig()
|
||||
self._current_anchor: Optional[StyleAnchor] = None
|
||||
|
||||
async def extract_style_anchor(
|
||||
self,
|
||||
image_path: str,
|
||||
) -> StyleAnchor:
|
||||
"""
|
||||
Extract style anchor from reference image
|
||||
|
||||
Args:
|
||||
image_path: Path to reference image
|
||||
|
||||
Returns:
|
||||
StyleAnchor with extracted style elements
|
||||
"""
|
||||
logger.info(f"Extracting style anchor from: {image_path}")
|
||||
|
||||
if self.config.custom_style_prefix:
|
||||
# Use custom style prefix if provided
|
||||
anchor = StyleAnchor(
|
||||
style_prefix=self.config.custom_style_prefix,
|
||||
reference_image=image_path
|
||||
)
|
||||
self._current_anchor = anchor
|
||||
return anchor
|
||||
|
||||
if self.config.use_vlm_extraction and self.llm_service:
|
||||
anchor = await self._extract_with_vlm(image_path)
|
||||
else:
|
||||
anchor = self._extract_basic(image_path)
|
||||
|
||||
self._current_anchor = anchor
|
||||
logger.info(f"Style anchor extracted: {anchor.to_prompt_prefix()}")
|
||||
|
||||
return anchor
|
||||
|
||||
async def _extract_with_vlm(self, image_path: str) -> StyleAnchor:
|
||||
"""Extract style using Vision Language Model"""
|
||||
try:
|
||||
# TODO: Implement VLM call when vision-capable LLM is integrated
|
||||
# For now, return a placeholder
|
||||
logger.debug("VLM style extraction: using placeholder (VLM not yet integrated)")
|
||||
|
||||
# Placeholder extraction based on common styles
|
||||
return StyleAnchor(
|
||||
art_style="consistent artistic",
|
||||
color_palette="harmonious colors",
|
||||
lighting="balanced",
|
||||
style_prefix="maintaining visual consistency, same artistic style as previous frames",
|
||||
reference_image=image_path,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"VLM style extraction failed: {e}")
|
||||
return self._extract_basic(image_path)
|
||||
|
||||
def _extract_basic(self, image_path: str) -> StyleAnchor:
|
||||
"""Basic style extraction without VLM"""
|
||||
# Return generic style anchor
|
||||
return StyleAnchor(
|
||||
style_prefix="consistent visual style",
|
||||
reference_image=image_path,
|
||||
)
|
||||
|
||||
def apply_style(
|
||||
self,
|
||||
prompt: str,
|
||||
style_anchor: Optional[StyleAnchor] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Apply style constraints to an image prompt
|
||||
|
||||
Args:
|
||||
prompt: Original image prompt
|
||||
style_anchor: Style anchor to apply (uses current if not provided)
|
||||
|
||||
Returns:
|
||||
Modified prompt with style constraints
|
||||
"""
|
||||
anchor = style_anchor or self._current_anchor
|
||||
|
||||
if not anchor:
|
||||
return prompt
|
||||
|
||||
style_prefix = anchor.to_prompt_prefix()
|
||||
|
||||
if not style_prefix:
|
||||
return prompt
|
||||
|
||||
if self.config.prefix_position == "start":
|
||||
return f"{style_prefix}, {prompt}"
|
||||
else:
|
||||
return f"{prompt}, {style_prefix}"
|
||||
|
||||
def apply_style_to_batch(
|
||||
self,
|
||||
prompts: List[str],
|
||||
style_anchor: Optional[StyleAnchor] = None,
|
||||
skip_first: bool = True,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Apply style constraints to a batch of prompts
|
||||
|
||||
Args:
|
||||
prompts: List of image prompts
|
||||
style_anchor: Style anchor to apply
|
||||
skip_first: Skip first prompt (used as reference)
|
||||
|
||||
Returns:
|
||||
List of styled prompts
|
||||
"""
|
||||
if not prompts:
|
||||
return prompts
|
||||
|
||||
anchor = style_anchor or self._current_anchor
|
||||
|
||||
if not anchor:
|
||||
return prompts
|
||||
|
||||
result = []
|
||||
for i, prompt in enumerate(prompts):
|
||||
if skip_first and i == 0:
|
||||
result.append(prompt)
|
||||
else:
|
||||
result.append(self.apply_style(prompt, anchor))
|
||||
|
||||
return result
|
||||
|
||||
def get_consistency_prompt_suffix(self) -> str:
|
||||
"""
|
||||
Get a consistency prompt suffix for LLM prompt generation
|
||||
|
||||
This can be added to the LLM prompt when generating image prompts
|
||||
to encourage consistent style descriptions.
|
||||
"""
|
||||
return (
|
||||
"Ensure all image prompts maintain consistent visual style, "
|
||||
"including similar color palette, art style, lighting, and composition. "
|
||||
"Each image should feel like it belongs to the same visual narrative."
|
||||
)
|
||||
|
||||
@property
|
||||
def current_anchor(self) -> Optional[StyleAnchor]:
|
||||
"""Get the current style anchor"""
|
||||
return self._current_anchor
|
||||
|
||||
def reset(self):
|
||||
"""Reset the current style anchor"""
|
||||
self._current_anchor = None
|
||||
Reference in New Issue
Block a user