优化自定义逻辑

This commit is contained in:
puke
2025-10-26 17:29:42 +08:00
committed by puke
parent 30cb9d9c18
commit 5acf0a53b6
18 changed files with 277 additions and 730 deletions

View File

@@ -56,7 +56,12 @@
"voice.previewing": "Generating voice preview...",
"voice.preview_failed": "Preview failed: {error}",
"style.title": "🎨 Illustration Style",
"style.title": "🎨 Image Settings",
"style.workflow": "ComfyUI Workflow",
"style.workflow_help": "💡 Custom: Place image_xxx.json in workflows/ folder",
"style.prompt_prefix": "Style Prompt Prefix",
"style.prompt_prefix_placeholder": "Enter style prefix (leave empty for config default)",
"style.prompt_prefix_help": "This text will be automatically added before all image generation prompts. To permanently change, edit config.yaml",
"style.custom": "Custom",
"style.description": "Style Description",
"style.description_placeholder": "Describe the illustration style you want (any language)...",
@@ -67,9 +72,10 @@
"style.generated_prompt": "Generated prompt: {prompt}",
"template.title": "📐 Storyboard Template",
"template.classic": "Classic",
"template.default": "Default",
"template.modern": "Modern",
"template.neon": "Neon",
"template.custom_help": "💡 Custom: Place .html files in templates/ folder",
"video.title": "🎬 Video Settings",
"video.frames": "Scenes",
@@ -81,6 +87,7 @@
"bgm.none": "🔇 No BGM",
"bgm.preview": "▶ Preview Music",
"bgm.preview_failed": "❌ Music file not found: {file}",
"bgm.custom_help": "💡 Custom: Place audio files in bgm/ folder",
"btn.generate": "🎬 Generate Video",
"btn.save_config": "💾 Save Configuration",

View File

@@ -56,7 +56,12 @@
"voice.previewing": "正在生成语音预览...",
"voice.preview_failed": "预览失败:{error}",
"style.title": "🎨 插图风格",
"style.title": "🎨 插图设置",
"style.workflow": "生图工作流",
"style.workflow_help": "💡 自定义:将 image_xxx.json 放入 workflows/ 文件夹",
"style.prompt_prefix": "风格提示词前缀",
"style.prompt_prefix_placeholder": "输入风格前缀(留空则使用配置文件默认值)",
"style.prompt_prefix_help": "此文本将自动添加到所有图像生成提示词之前。要永久修改,请编辑 config.yaml",
"style.custom": "自定义",
"style.description": "风格描述",
"style.description_placeholder": "描述您想要的插图风格(任何语言)...",
@@ -67,9 +72,10 @@
"style.generated_prompt": "生成的提示词:{prompt}",
"template.title": "📐 分镜模板",
"template.classic": "Classic",
"template.modern": "Modern",
"template.neon": "Neon",
"template.default": "默认",
"template.modern": "现代",
"template.neon": "霓虹",
"template.custom_help": "💡 自定义:将 .html 文件放入 templates/ 文件夹",
"video.title": "🎬 视频设置",
"video.frames": "分镜数",
@@ -81,6 +87,7 @@
"bgm.none": "🔇 无背景音乐",
"bgm.preview": "▶ 试听音乐",
"bgm.preview_failed": "❌ 音乐文件未找到:{file}",
"bgm.custom_help": "💡 自定义:将音频文件放入 bgm/ 文件夹",
"btn.generate": "🎬 生成视频",
"btn.save_config": "💾 保存配置",

View File

@@ -27,6 +27,7 @@ class StoryboardConfig:
# Image parameters
image_width: int = 1024
image_height: int = 1024
image_preset: Optional[str] = None # Image workflow preset (None = use default)
# Frame template
frame_template: Optional[str] = None # HTML template name or path (None = use PIL)

View File

@@ -50,7 +50,6 @@ IMAGE_PROMPT_GENERATION_PROMPT = """# 角色定位
## 图像提示词规范
- 语言:**必须使用英文**(用于 AI 图像生成模型)
- 画面风格:{style_description}
- 描述结构scene + character action + emotion + symbolic elements
- 描述长度:确保描述清晰完整且富有创意(建议 50-100 个英文单词)
@@ -100,8 +99,7 @@ IMAGE_PROMPT_GENERATION_PROMPT = """# 角色定位
5. **图像提示词必须使用英文**for AI image generation models
6. 图像提示词必须准确反映对应旁白的具体内容和情感
7. 每个图像都要有创意性和视觉冲击力,避免千篇一律
8. 严格遵守上述指定的画面风格要求({style_description}
9. 确保视觉画面能增强文案的说服力和观众的理解度
8. 确保视觉画面能增强文案的说服力和观众的理解度
现在,请为上述 {narrations_count} 个旁白创作对应的 {narrations_count} 个**英文**图像提示词。只输出JSON不要其他内容。
"""
@@ -110,54 +108,24 @@ IMAGE_PROMPT_GENERATION_PROMPT = """# 角色定位
def build_image_prompt_prompt(
narrations: List[str],
min_words: int,
max_words: int,
image_style_preset: Optional[str] = None,
image_style_description: Optional[str] = None
max_words: int
) -> str:
"""
Build image prompt generation prompt
Note: Style/prefix will be applied later via prompt_prefix in config.
Args:
narrations: List of narrations
min_words: Minimum word count
max_words: Maximum word count
image_style_preset: Preset style name (e.g., "minimal", "stick_figure", "concept")
Available presets: see IMAGE_STYLE_PRESETS
image_style_description: Custom style description (overrides preset if provided)
Example: "warm scenes, soft lighting, professional photography"
Returns:
Formatted prompt
Formatted prompt for LLM
Examples:
# Use preset style
>>> build_image_prompt_prompt(narrations, 50, 100, image_style_preset="minimal")
# Use custom style
>>> build_image_prompt_prompt(
... narrations, 50, 100,
... image_style_description="cyberpunk style, neon colors, futuristic"
... )
# Use default style (stick_figure)
Example:
>>> build_image_prompt_prompt(narrations, 50, 100)
"""
# Determine style description
if image_style_description:
# Custom description takes priority
style_desc = image_style_description
elif image_style_preset:
# Use preset
if image_style_preset not in IMAGE_STYLE_PRESETS:
raise ValueError(
f"Unknown preset '{image_style_preset}'. "
f"Available presets: {list(IMAGE_STYLE_PRESETS.keys())}"
)
style_desc = IMAGE_STYLE_PRESETS[image_style_preset]["description"]
else:
# Use default preset
style_desc = IMAGE_STYLE_PRESETS[DEFAULT_IMAGE_STYLE]["description"]
narrations_json = json.dumps(
{"narrations": narrations},
ensure_ascii=False,
@@ -168,7 +136,6 @@ def build_image_prompt_prompt(
narrations_json=narrations_json,
narrations_count=len(narrations),
min_words=min_words,
max_words=max_words,
style_description=style_desc
max_words=max_words
)

View File

@@ -66,7 +66,6 @@ class ReelForgeCore:
# Content generation services
self.narration_generator = None
self.image_prompt_generator = None
self.generate_final_image_prompt = None
# Frame processing services
self.frame_composer = None
@@ -111,11 +110,9 @@ class ReelForgeCore:
# 5. Initialize content generation services
from reelforge.services.narration_generator import NarrationGeneratorService
from reelforge.services.image_prompt_generator import ImagePromptGeneratorService
from reelforge.services.final_image_prompt import FinalImagePromptService
self.narration_generator = NarrationGeneratorService(self)
self.image_prompt_generator = ImagePromptGeneratorService(self)
self.generate_final_image_prompt = FinalImagePromptService(self)
# 6. Initialize frame processing services
from reelforge.services.frame_composer import FrameComposerService

View File

@@ -14,11 +14,6 @@ from reelforge.services.image_prompt_generator import ImagePromptGeneratorServic
from reelforge.services.frame_composer import FrameComposerService
from reelforge.services.storyboard_processor import StoryboardProcessorService
from reelforge.services.video_generator import VideoGeneratorService
from reelforge.services.final_image_prompt import (
FinalImagePromptService,
StylePreset,
PresetValue
)
__all__ = [
"BaseService",
@@ -31,8 +26,5 @@ __all__ = [
"FrameComposerService",
"StoryboardProcessorService",
"VideoGeneratorService",
"FinalImagePromptService",
"StylePreset",
"PresetValue",
]

View File

@@ -1,196 +0,0 @@
"""
Final Image Prompt Service
Generates final complete image prompts by converting style descriptions
and combining them with base prompts in consistent order.
"""
from collections import namedtuple
from enum import Enum
from typing import Optional
from loguru import logger
# Define preset value structure
PresetValue = namedtuple('PresetValue', ['display_name', 'prompt'])
class StylePreset(Enum):
"""Predefined style presets for image generation"""
STICK_FIGURE = PresetValue(
display_name="Stick Figure",
prompt=(
"Pure white background, minimalist illustration, matchstick figure style, "
"black and white line drawing, simple clean lines"
),
)
MINIMAL = PresetValue(
display_name="Minimal",
prompt=(
"Simple and clean background, minimal design, soft colors, "
"professional look, modern aesthetic, uncluttered composition"
),
)
FUTURISTIC = PresetValue(
display_name="Futuristic",
prompt=(
"Futuristic sci-fi style, high-tech city background, "
"blue and silver tones, technology sense, soft neon lights, "
"cyberpunk aesthetics, digital art, advanced technology"
),
)
CINEMATIC = PresetValue(
display_name="Cinematic",
prompt=(
"Cinematic lighting, dramatic composition, film grain, "
"professional photography, depth of field, movie still quality"
),
)
class FinalImagePromptService:
"""
Final Image Prompt Service
Generates the final complete image prompt by:
1. Converting style description (preset or custom)
2. Combining style + base prompt in correct order
This ensures:
- Consistent style conversion logic across all scenarios
- Consistent prompt concatenation order (style first, then prompt)
- Single source of truth for image prompt generation
Usage:
# With preset style
final = await reelforge.generate_final_image_prompt(
prompt="A peaceful mountain landscape",
style_preset=StylePreset.FUTURISTIC
)
# With custom style (any language)
final = await reelforge.generate_final_image_prompt(
prompt="A coffee cup on table",
custom_style_description="温馨的咖啡馆,暖色调"
)
# Only prompt (no style)
final = await reelforge.generate_final_image_prompt(
prompt="A sunset over the ocean"
)
"""
def __init__(self, reelforge_core):
"""
Initialize service
Args:
reelforge_core: ReelForgeCore instance for accessing LLM
"""
self.core = reelforge_core
async def __call__(
self,
prompt: str = "",
style_preset: Optional[StylePreset] = None,
custom_style_description: str = ""
) -> str:
"""
Generate final image prompt with style
Priority:
1. custom_style_description (if provided) → convert via LLM
2. style_preset (if provided) → use predefined English prompt
3. Neither → just return prompt
Concatenation:
- Style part (if exists) comes first
- Base prompt (if exists) comes second
- Join with comma: "{style_part}, {prompt}"
Args:
prompt: Base prompt (optional, e.g., "A peaceful landscape")
style_preset: Preset style from StylePreset enum (optional)
custom_style_description: Custom description in any language (optional)
Overrides style_preset if provided
Returns:
Final complete image prompt in English
Examples:
# With preset style (IDE autocomplete!)
final = await service(
prompt="A mountain landscape",
style_preset=StylePreset.FUTURISTIC
)
# Returns: "Futuristic sci-fi style..., A mountain landscape"
# With custom style (any language)
final = await service(
prompt="A coffee cup",
custom_style_description="温馨的咖啡馆,暖色调"
)
# Returns: "Cozy coffee shop interior..., A coffee cup"
# Only prompt
final = await service(prompt="A sunset scene")
# Returns: "A sunset scene"
# Only style
final = await service(style_preset=StylePreset.MINIMAL)
# Returns: "Simple and clean background..."
"""
# Step 1: Determine style part
style_part = ""
if custom_style_description:
# Priority 1: Custom description (convert via LLM)
logger.debug(f"Converting custom style description: {custom_style_description}")
style_part = await self._convert_custom_style(custom_style_description)
elif style_preset:
# Priority 2: Preset style (use prompt from enum value)
style_part = style_preset.value.prompt
logger.debug(f"Using preset style: {style_preset.name}")
# Step 2: Combine parts with comma
parts = [p for p in [style_part, prompt] if p]
final_prompt = ", ".join(parts)
if final_prompt:
logger.debug(f"Final image prompt: {final_prompt}")
else:
logger.warning("Generated empty image prompt")
return final_prompt
async def _convert_custom_style(self, description: str) -> str:
"""
Convert custom style description to English image prompt via LLM
Args:
description: User's style description in any language
Returns:
Converted English image prompt suitable for image generation models
"""
from reelforge.prompts import build_style_conversion_prompt
# Build prompt using template
llm_prompt = build_style_conversion_prompt(description)
style_prompt = await self.core.llm(llm_prompt)
# Clean up the result (remove extra whitespace, newlines)
style_prompt = " ".join(style_prompt.strip().split())
logger.debug(f"Converted custom style to: {style_prompt}")
return style_prompt

View File

@@ -28,8 +28,6 @@ class ImagePromptGeneratorService:
self,
narrations: List[str],
config: StoryboardConfig,
image_style_preset: str = None,
image_style_description: str = None,
batch_size: int = 10,
max_retries: int = 3,
progress_callback: Optional[Callable] = None
@@ -40,14 +38,12 @@ class ImagePromptGeneratorService:
Args:
narrations: List of narrations
config: Storyboard configuration
image_style_preset: Preset style name (e.g., "minimal", "futuristic")
image_style_description: Custom style description (overrides preset)
batch_size: Max narrations per batch (default: 10)
max_retries: Max retry attempts per batch (default: 3)
progress_callback: Optional callback(completed, total, message) for progress updates
Returns:
List of image prompts with style applied
List of image prompts with prompt_prefix applied (from config)
Raises:
ValueError: If batch fails after max_retries
@@ -117,28 +113,20 @@ class ImagePromptGeneratorService:
base_prompts = all_base_prompts
logger.info(f"✅ All batches completed. Total prompts: {len(base_prompts)}")
# 5. Apply style to each prompt using FinalImagePromptService
from reelforge.services.final_image_prompt import StylePreset
# 5. Apply prompt prefix to each prompt
from reelforge.utils.prompt_helper import build_image_prompt
# Convert style preset name to enum if provided
style_preset_enum = None
if image_style_preset:
try:
style_preset_enum = StylePreset[image_style_preset.upper()]
except KeyError:
logger.warning(f"Unknown style preset: {image_style_preset}")
# Get prompt prefix from config
image_config = self.core.config.get("image", {})
prompt_prefix = image_config.get("prompt_prefix", "")
# Apply style to each base prompt
# Apply prefix to each base prompt
final_prompts = []
for base_prompt in base_prompts:
final_prompt = await self.core.generate_final_image_prompt(
prompt=base_prompt,
style_preset=style_preset_enum,
custom_style_description=image_style_description or ""
)
final_prompt = build_image_prompt(base_prompt, prompt_prefix)
final_prompts.append(final_prompt)
logger.info(f"Generated {len(final_prompts)} final image prompts with style applied")
logger.info(f"Generated {len(final_prompts)} final image prompts with prefix applied")
return final_prompts
async def _generate_batch_prompts(
@@ -170,9 +158,7 @@ class ImagePromptGeneratorService:
prompt = build_image_prompt_prompt(
narrations=batch_narrations,
min_words=config.min_image_prompt_words,
max_words=config.max_image_prompt_words,
image_style_preset=None,
image_style_description=None
max_words=config.max_image_prompt_words
)
# 2. Call LLM

View File

@@ -140,9 +140,10 @@ class StoryboardProcessorService:
"""Step 2: Generate image using ComfyKit"""
logger.debug(f" 2/4: Generating image for frame {frame.index}...")
# Call Image generation (using default preset)
# Call Image generation (with optional preset)
image_url = await self.core.image(
prompt=frame.image_prompt,
preset=config.image_preset, # Pass preset from config (None = use default)
width=config.image_width,
height=config.image_height
)
@@ -201,19 +202,17 @@ class StoryboardProcessorService:
from pathlib import Path
# Resolve template path
template_name = config.frame_template
if not template_name.endswith('.html'):
template_name = f"{template_name}.html"
template_filename = config.frame_template
# Try templates/ directory first
template_path = Path(f"templates/{template_name}")
template_path = Path(f"templates/{template_filename}")
if not template_path.exists():
# Try as absolute path
template_path = Path(template_name)
# Try as absolute/relative path
template_path = Path(template_filename)
if not template_path.exists():
raise FileNotFoundError(
f"Template not found: {template_name}. "
f"Available templates: classic, modern, minimal"
f"Template not found: {template_filename}. "
f"Built-in templates: default.html, modern.html, neon.html"
)
# Get storyboard for content metadata

View File

@@ -101,7 +101,7 @@ class VideoService:
- "filter": Slower but handles different formats
bgm_path: Background music file path (optional)
- None: No BGM
- "default" or "happy": Use built-in BGM from bgm/ folder
- Filename (e.g., "default.mp3", "happy.mp3"): Use built-in BGM from bgm/ folder
- Custom path: Use custom BGM file
bgm_volume: BGM volume level (0.0-1.0), default 0.2
bgm_mode: BGM playback mode
@@ -504,11 +504,11 @@ class VideoService:
def _resolve_bgm_path(self, bgm_path: str) -> str:
"""
Resolve BGM path (preset name or custom path)
Resolve BGM path (filename or custom path)
Args:
bgm_path: Can be:
- Preset name (e.g., "default", "happy"): auto-resolved from bgm/ directory
- Filename with extension (e.g., "default.mp3", "happy.mp3"): auto-resolved from bgm/ directory
- Custom file path (absolute or relative)
Returns:
@@ -521,8 +521,8 @@ class VideoService:
if os.path.exists(bgm_path):
return os.path.abspath(bgm_path)
# Try as preset in bgm/ directory
preset_path = f"bgm/{bgm_path}.mp3"
# Try as filename in bgm/ directory
preset_path = f"bgm/{bgm_path}"
if os.path.exists(preset_path):
return os.path.abspath(preset_path)
@@ -532,9 +532,9 @@ class VideoService:
os.path.abspath(preset_path)
]
# List available presets
# List available BGM files
available_bgm = self._list_available_bgm()
available_msg = f"\n Available presets: {', '.join(available_bgm)}" if available_bgm else ""
available_msg = f"\n Available BGM files: {', '.join(available_bgm)}" if available_bgm else ""
raise FileNotFoundError(
f"BGM file not found: '{bgm_path}'\n"
@@ -546,10 +546,10 @@ class VideoService:
def _list_available_bgm(self) -> list[str]:
"""
List available preset BGM files
List available BGM files in bgm/ directory
Returns:
List of preset names (without .mp3 extension)
List of filenames (with extensions)
"""
bgm_dir = "bgm"
if not os.path.exists(bgm_dir):
@@ -557,7 +557,9 @@ class VideoService:
try:
files = os.listdir(bgm_dir)
return [f[:-4] for f in files if f.endswith('.mp3')]
# Return all audio files (mp3, wav, ogg, flac, etc.)
audio_extensions = ('.mp3', '.wav', '.ogg', '.flac', '.m4a', '.aac')
return [f for f in files if f.lower().endswith(audio_extensions)]
except Exception:
return []

View File

@@ -66,8 +66,7 @@ class VideoGeneratorService:
# === Image Parameters ===
image_width: int = 1024,
image_height: int = 1024,
image_style_preset: Optional[str] = None,
image_style_description: Optional[str] = None,
image_preset: Optional[str] = None,
# === Video Parameters ===
video_width: int = 1080,
@@ -77,6 +76,9 @@ class VideoGeneratorService:
# === Frame Template ===
frame_template: Optional[str] = None,
# === Image Style ===
prompt_prefix: Optional[str] = None,
# === BGM Parameters ===
bgm_path: Optional[str] = None,
bgm_volume: float = 0.2,
@@ -119,17 +121,19 @@ class VideoGeneratorService:
image_width: Generated image width (default 1024)
image_height: Generated image height (default 1024)
image_style_preset: Preset style name (e.g., "minimal", "concept", "cinematic")
image_style_description: Custom style description (overrides preset)
image_preset: Image workflow preset (e.g., "flux", "sdxl", None = use default)
video_width: Final video width (default 1080)
video_height: Final video height (default 1920)
video_fps: Video frame rate (default 30)
frame_template: HTML template name or path (None = use PIL)
e.g., "classic", "modern", "minimal", or custom path
frame_template: HTML template filename or path (None = use PIL)
e.g., "default.html", "modern.html", "neon.html", or custom path
bgm_path: BGM path ("default", "happy", custom path, or None)
prompt_prefix: Image prompt prefix (overrides config.yaml if provided)
e.g., "anime style, vibrant colors" or "" for no prefix
bgm_path: BGM path (filename like "default.mp3", custom path, or None)
bgm_volume: BGM volume 0.0-1.0 (default 0.2)
bgm_mode: BGM mode "once" or "loop" (default "loop")
@@ -211,6 +215,7 @@ class VideoGeneratorService:
voice_id=voice_id,
image_width=image_width,
image_height=image_height,
image_preset=image_preset,
frame_template=frame_template
)
@@ -248,25 +253,36 @@ class VideoGeneratorService:
# Step 2: Generate image prompts
self._report_progress(progress_callback, "generating_image_prompts", 0.15)
# Create progress callback wrapper for image prompt generation (15%-30% range)
def image_prompt_progress(completed: int, total: int, message: str):
# Map batch progress to 15%-30% range
batch_progress = completed / total if total > 0 else 0
overall_progress = 0.15 + (batch_progress * 0.15) # 15% -> 30%
self._report_progress(
progress_callback,
"generating_image_prompts",
overall_progress,
extra_info=message
)
# Override prompt_prefix if provided (temporarily modify config)
original_prefix = None
if prompt_prefix is not None:
image_config = self.core.config.get("image", {})
original_prefix = image_config.get("prompt_prefix")
image_config["prompt_prefix"] = prompt_prefix
logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
image_prompts = await self.core.image_prompt_generator.generate_image_prompts(
narrations=narrations,
config=config,
image_style_preset=image_style_preset,
image_style_description=image_style_description,
progress_callback=image_prompt_progress
)
try:
# Create progress callback wrapper for image prompt generation (15%-30% range)
def image_prompt_progress(completed: int, total: int, message: str):
# Map batch progress to 15%-30% range
batch_progress = completed / total if total > 0 else 0
overall_progress = 0.15 + (batch_progress * 0.15) # 15% -> 30%
self._report_progress(
progress_callback,
"generating_image_prompts",
overall_progress,
extra_info=message
)
image_prompts = await self.core.image_prompt_generator.generate_image_prompts(
narrations=narrations,
config=config,
progress_callback=image_prompt_progress
)
finally:
# Restore original prompt_prefix
if original_prefix is not None:
image_config["prompt_prefix"] = original_prefix
logger.info(f"✅ Generated {len(image_prompts)} image prompts")
# Step 3: Create frames

View File

@@ -0,0 +1,38 @@
"""
Prompt helper utilities
Simple utilities for building prompts with optional prefixes.
"""
def build_image_prompt(prompt: str, prefix: str = "") -> str:
"""
Build final image prompt with optional prefix
Args:
prompt: User's raw prompt
prefix: Optional prefix to add before the prompt
Returns:
Final prompt with prefix applied (if provided)
Examples:
>>> build_image_prompt("a cat", "")
'a cat'
>>> build_image_prompt("a cat", "anime style")
'anime style, a cat'
>>> build_image_prompt("a cat", " anime style ")
'anime style, a cat'
"""
prefix = prefix.strip() if prefix else ""
prompt = prompt.strip() if prompt else ""
if prefix and prompt:
return f"{prefix}, {prompt}"
elif prefix:
return prefix
else:
return prompt