优化自定义逻辑

This commit is contained in:
puke
2025-10-26 17:29:42 +08:00
committed by puke
parent 30cb9d9c18
commit 5acf0a53b6
18 changed files with 277 additions and 730 deletions

View File

@@ -14,11 +14,6 @@ from reelforge.services.image_prompt_generator import ImagePromptGeneratorServic
from reelforge.services.frame_composer import FrameComposerService
from reelforge.services.storyboard_processor import StoryboardProcessorService
from reelforge.services.video_generator import VideoGeneratorService
from reelforge.services.final_image_prompt import (
FinalImagePromptService,
StylePreset,
PresetValue
)
__all__ = [
"BaseService",
@@ -31,8 +26,5 @@ __all__ = [
"FrameComposerService",
"StoryboardProcessorService",
"VideoGeneratorService",
"FinalImagePromptService",
"StylePreset",
"PresetValue",
]

View File

@@ -1,196 +0,0 @@
"""
Final Image Prompt Service
Generates final complete image prompts by converting style descriptions
and combining them with base prompts in consistent order.
"""
from collections import namedtuple
from enum import Enum
from typing import Optional
from loguru import logger
# Define preset value structure
PresetValue = namedtuple('PresetValue', ['display_name', 'prompt'])
class StylePreset(Enum):
"""Predefined style presets for image generation"""
STICK_FIGURE = PresetValue(
display_name="Stick Figure",
prompt=(
"Pure white background, minimalist illustration, matchstick figure style, "
"black and white line drawing, simple clean lines"
),
)
MINIMAL = PresetValue(
display_name="Minimal",
prompt=(
"Simple and clean background, minimal design, soft colors, "
"professional look, modern aesthetic, uncluttered composition"
),
)
FUTURISTIC = PresetValue(
display_name="Futuristic",
prompt=(
"Futuristic sci-fi style, high-tech city background, "
"blue and silver tones, technology sense, soft neon lights, "
"cyberpunk aesthetics, digital art, advanced technology"
),
)
CINEMATIC = PresetValue(
display_name="Cinematic",
prompt=(
"Cinematic lighting, dramatic composition, film grain, "
"professional photography, depth of field, movie still quality"
),
)
class FinalImagePromptService:
"""
Final Image Prompt Service
Generates the final complete image prompt by:
1. Converting style description (preset or custom)
2. Combining style + base prompt in correct order
This ensures:
- Consistent style conversion logic across all scenarios
- Consistent prompt concatenation order (style first, then prompt)
- Single source of truth for image prompt generation
Usage:
# With preset style
final = await reelforge.generate_final_image_prompt(
prompt="A peaceful mountain landscape",
style_preset=StylePreset.FUTURISTIC
)
# With custom style (any language)
final = await reelforge.generate_final_image_prompt(
prompt="A coffee cup on table",
custom_style_description="温馨的咖啡馆,暖色调"
)
# Only prompt (no style)
final = await reelforge.generate_final_image_prompt(
prompt="A sunset over the ocean"
)
"""
def __init__(self, reelforge_core):
"""
Initialize service
Args:
reelforge_core: ReelForgeCore instance for accessing LLM
"""
self.core = reelforge_core
async def __call__(
self,
prompt: str = "",
style_preset: Optional[StylePreset] = None,
custom_style_description: str = ""
) -> str:
"""
Generate final image prompt with style
Priority:
1. custom_style_description (if provided) → convert via LLM
2. style_preset (if provided) → use predefined English prompt
3. Neither → just return prompt
Concatenation:
- Style part (if exists) comes first
- Base prompt (if exists) comes second
- Join with comma: "{style_part}, {prompt}"
Args:
prompt: Base prompt (optional, e.g., "A peaceful landscape")
style_preset: Preset style from StylePreset enum (optional)
custom_style_description: Custom description in any language (optional)
Overrides style_preset if provided
Returns:
Final complete image prompt in English
Examples:
# With preset style (IDE autocomplete!)
final = await service(
prompt="A mountain landscape",
style_preset=StylePreset.FUTURISTIC
)
# Returns: "Futuristic sci-fi style..., A mountain landscape"
# With custom style (any language)
final = await service(
prompt="A coffee cup",
custom_style_description="温馨的咖啡馆,暖色调"
)
# Returns: "Cozy coffee shop interior..., A coffee cup"
# Only prompt
final = await service(prompt="A sunset scene")
# Returns: "A sunset scene"
# Only style
final = await service(style_preset=StylePreset.MINIMAL)
# Returns: "Simple and clean background..."
"""
# Step 1: Determine style part
style_part = ""
if custom_style_description:
# Priority 1: Custom description (convert via LLM)
logger.debug(f"Converting custom style description: {custom_style_description}")
style_part = await self._convert_custom_style(custom_style_description)
elif style_preset:
# Priority 2: Preset style (use prompt from enum value)
style_part = style_preset.value.prompt
logger.debug(f"Using preset style: {style_preset.name}")
# Step 2: Combine parts with comma
parts = [p for p in [style_part, prompt] if p]
final_prompt = ", ".join(parts)
if final_prompt:
logger.debug(f"Final image prompt: {final_prompt}")
else:
logger.warning("Generated empty image prompt")
return final_prompt
async def _convert_custom_style(self, description: str) -> str:
"""
Convert custom style description to English image prompt via LLM
Args:
description: User's style description in any language
Returns:
Converted English image prompt suitable for image generation models
"""
from reelforge.prompts import build_style_conversion_prompt
# Build prompt using template
llm_prompt = build_style_conversion_prompt(description)
style_prompt = await self.core.llm(llm_prompt)
# Clean up the result (remove extra whitespace, newlines)
style_prompt = " ".join(style_prompt.strip().split())
logger.debug(f"Converted custom style to: {style_prompt}")
return style_prompt

View File

@@ -28,8 +28,6 @@ class ImagePromptGeneratorService:
self,
narrations: List[str],
config: StoryboardConfig,
image_style_preset: str = None,
image_style_description: str = None,
batch_size: int = 10,
max_retries: int = 3,
progress_callback: Optional[Callable] = None
@@ -40,14 +38,12 @@ class ImagePromptGeneratorService:
Args:
narrations: List of narrations
config: Storyboard configuration
image_style_preset: Preset style name (e.g., "minimal", "futuristic")
image_style_description: Custom style description (overrides preset)
batch_size: Max narrations per batch (default: 10)
max_retries: Max retry attempts per batch (default: 3)
progress_callback: Optional callback(completed, total, message) for progress updates
Returns:
List of image prompts with style applied
List of image prompts with prompt_prefix applied (from config)
Raises:
ValueError: If batch fails after max_retries
@@ -117,28 +113,20 @@ class ImagePromptGeneratorService:
base_prompts = all_base_prompts
logger.info(f"✅ All batches completed. Total prompts: {len(base_prompts)}")
# 5. Apply style to each prompt using FinalImagePromptService
from reelforge.services.final_image_prompt import StylePreset
# 5. Apply prompt prefix to each prompt
from reelforge.utils.prompt_helper import build_image_prompt
# Convert style preset name to enum if provided
style_preset_enum = None
if image_style_preset:
try:
style_preset_enum = StylePreset[image_style_preset.upper()]
except KeyError:
logger.warning(f"Unknown style preset: {image_style_preset}")
# Get prompt prefix from config
image_config = self.core.config.get("image", {})
prompt_prefix = image_config.get("prompt_prefix", "")
# Apply style to each base prompt
# Apply prefix to each base prompt
final_prompts = []
for base_prompt in base_prompts:
final_prompt = await self.core.generate_final_image_prompt(
prompt=base_prompt,
style_preset=style_preset_enum,
custom_style_description=image_style_description or ""
)
final_prompt = build_image_prompt(base_prompt, prompt_prefix)
final_prompts.append(final_prompt)
logger.info(f"Generated {len(final_prompts)} final image prompts with style applied")
logger.info(f"Generated {len(final_prompts)} final image prompts with prefix applied")
return final_prompts
async def _generate_batch_prompts(
@@ -170,9 +158,7 @@ class ImagePromptGeneratorService:
prompt = build_image_prompt_prompt(
narrations=batch_narrations,
min_words=config.min_image_prompt_words,
max_words=config.max_image_prompt_words,
image_style_preset=None,
image_style_description=None
max_words=config.max_image_prompt_words
)
# 2. Call LLM

View File

@@ -140,9 +140,10 @@ class StoryboardProcessorService:
"""Step 2: Generate image using ComfyKit"""
logger.debug(f" 2/4: Generating image for frame {frame.index}...")
# Call Image generation (using default preset)
# Call Image generation (with optional preset)
image_url = await self.core.image(
prompt=frame.image_prompt,
preset=config.image_preset, # Pass preset from config (None = use default)
width=config.image_width,
height=config.image_height
)
@@ -201,19 +202,17 @@ class StoryboardProcessorService:
from pathlib import Path
# Resolve template path
template_name = config.frame_template
if not template_name.endswith('.html'):
template_name = f"{template_name}.html"
template_filename = config.frame_template
# Try templates/ directory first
template_path = Path(f"templates/{template_name}")
template_path = Path(f"templates/{template_filename}")
if not template_path.exists():
# Try as absolute path
template_path = Path(template_name)
# Try as absolute/relative path
template_path = Path(template_filename)
if not template_path.exists():
raise FileNotFoundError(
f"Template not found: {template_name}. "
f"Available templates: classic, modern, minimal"
f"Template not found: {template_filename}. "
f"Built-in templates: default.html, modern.html, neon.html"
)
# Get storyboard for content metadata

View File

@@ -101,7 +101,7 @@ class VideoService:
- "filter": Slower but handles different formats
bgm_path: Background music file path (optional)
- None: No BGM
- "default" or "happy": Use built-in BGM from bgm/ folder
- Filename (e.g., "default.mp3", "happy.mp3"): Use built-in BGM from bgm/ folder
- Custom path: Use custom BGM file
bgm_volume: BGM volume level (0.0-1.0), default 0.2
bgm_mode: BGM playback mode
@@ -504,11 +504,11 @@ class VideoService:
def _resolve_bgm_path(self, bgm_path: str) -> str:
"""
Resolve BGM path (preset name or custom path)
Resolve BGM path (filename or custom path)
Args:
bgm_path: Can be:
- Preset name (e.g., "default", "happy"): auto-resolved from bgm/ directory
- Filename with extension (e.g., "default.mp3", "happy.mp3"): auto-resolved from bgm/ directory
- Custom file path (absolute or relative)
Returns:
@@ -521,8 +521,8 @@ class VideoService:
if os.path.exists(bgm_path):
return os.path.abspath(bgm_path)
# Try as preset in bgm/ directory
preset_path = f"bgm/{bgm_path}.mp3"
# Try as filename in bgm/ directory
preset_path = f"bgm/{bgm_path}"
if os.path.exists(preset_path):
return os.path.abspath(preset_path)
@@ -532,9 +532,9 @@ class VideoService:
os.path.abspath(preset_path)
]
# List available presets
# List available BGM files
available_bgm = self._list_available_bgm()
available_msg = f"\n Available presets: {', '.join(available_bgm)}" if available_bgm else ""
available_msg = f"\n Available BGM files: {', '.join(available_bgm)}" if available_bgm else ""
raise FileNotFoundError(
f"BGM file not found: '{bgm_path}'\n"
@@ -546,10 +546,10 @@ class VideoService:
def _list_available_bgm(self) -> list[str]:
"""
List available preset BGM files
List available BGM files in bgm/ directory
Returns:
List of preset names (without .mp3 extension)
List of filenames (with extensions)
"""
bgm_dir = "bgm"
if not os.path.exists(bgm_dir):
@@ -557,7 +557,9 @@ class VideoService:
try:
files = os.listdir(bgm_dir)
return [f[:-4] for f in files if f.endswith('.mp3')]
# Return all audio files (mp3, wav, ogg, flac, etc.)
audio_extensions = ('.mp3', '.wav', '.ogg', '.flac', '.m4a', '.aac')
return [f for f in files if f.lower().endswith(audio_extensions)]
except Exception:
return []

View File

@@ -66,8 +66,7 @@ class VideoGeneratorService:
# === Image Parameters ===
image_width: int = 1024,
image_height: int = 1024,
image_style_preset: Optional[str] = None,
image_style_description: Optional[str] = None,
image_preset: Optional[str] = None,
# === Video Parameters ===
video_width: int = 1080,
@@ -77,6 +76,9 @@ class VideoGeneratorService:
# === Frame Template ===
frame_template: Optional[str] = None,
# === Image Style ===
prompt_prefix: Optional[str] = None,
# === BGM Parameters ===
bgm_path: Optional[str] = None,
bgm_volume: float = 0.2,
@@ -119,17 +121,19 @@ class VideoGeneratorService:
image_width: Generated image width (default 1024)
image_height: Generated image height (default 1024)
image_style_preset: Preset style name (e.g., "minimal", "concept", "cinematic")
image_style_description: Custom style description (overrides preset)
image_preset: Image workflow preset (e.g., "flux", "sdxl", None = use default)
video_width: Final video width (default 1080)
video_height: Final video height (default 1920)
video_fps: Video frame rate (default 30)
frame_template: HTML template name or path (None = use PIL)
e.g., "classic", "modern", "minimal", or custom path
frame_template: HTML template filename or path (None = use PIL)
e.g., "default.html", "modern.html", "neon.html", or custom path
bgm_path: BGM path ("default", "happy", custom path, or None)
prompt_prefix: Image prompt prefix (overrides config.yaml if provided)
e.g., "anime style, vibrant colors" or "" for no prefix
bgm_path: BGM path (filename like "default.mp3", custom path, or None)
bgm_volume: BGM volume 0.0-1.0 (default 0.2)
bgm_mode: BGM mode "once" or "loop" (default "loop")
@@ -211,6 +215,7 @@ class VideoGeneratorService:
voice_id=voice_id,
image_width=image_width,
image_height=image_height,
image_preset=image_preset,
frame_template=frame_template
)
@@ -248,25 +253,36 @@ class VideoGeneratorService:
# Step 2: Generate image prompts
self._report_progress(progress_callback, "generating_image_prompts", 0.15)
# Create progress callback wrapper for image prompt generation (15%-30% range)
def image_prompt_progress(completed: int, total: int, message: str):
# Map batch progress to 15%-30% range
batch_progress = completed / total if total > 0 else 0
overall_progress = 0.15 + (batch_progress * 0.15) # 15% -> 30%
self._report_progress(
progress_callback,
"generating_image_prompts",
overall_progress,
extra_info=message
)
# Override prompt_prefix if provided (temporarily modify config)
original_prefix = None
if prompt_prefix is not None:
image_config = self.core.config.get("image", {})
original_prefix = image_config.get("prompt_prefix")
image_config["prompt_prefix"] = prompt_prefix
logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
image_prompts = await self.core.image_prompt_generator.generate_image_prompts(
narrations=narrations,
config=config,
image_style_preset=image_style_preset,
image_style_description=image_style_description,
progress_callback=image_prompt_progress
)
try:
# Create progress callback wrapper for image prompt generation (15%-30% range)
def image_prompt_progress(completed: int, total: int, message: str):
# Map batch progress to 15%-30% range
batch_progress = completed / total if total > 0 else 0
overall_progress = 0.15 + (batch_progress * 0.15) # 15% -> 30%
self._report_progress(
progress_callback,
"generating_image_prompts",
overall_progress,
extra_info=message
)
image_prompts = await self.core.image_prompt_generator.generate_image_prompts(
narrations=narrations,
config=config,
progress_callback=image_prompt_progress
)
finally:
# Restore original prompt_prefix
if original_prefix is not None:
image_config["prompt_prefix"] = original_prefix
logger.info(f"✅ Generated {len(image_prompts)} image prompts")
# Step 3: Create frames