Files
AI-Video/pixelle_video/prompts/image_generation.py
2025-11-07 16:59:47 +08:00

154 lines
5.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Image prompt generation template
For generating image prompts from narrations.
"""
import json
from typing import List, Optional
# ==================== PRESET IMAGE STYLES ====================
# Predefined visual styles for different use cases
IMAGE_STYLE_PRESETS = {
"stick_figure": {
"name": "火柴人简笔画",
"description": "stick figure style sketch, black and white lines, pure white background, minimalist hand-drawn feel",
"use_case": "通用场景,简单直观"
},
"minimal": {
"name": "极简抽象",
"description": "minimalist abstract art, geometric shapes, clean composition, modern design, soft pastel colors",
"use_case": "现代感、艺术感"
},
"concept": {
"name": "概念化视觉",
"description": "conceptual visual metaphors, symbolic elements, thought-provoking imagery, artistic interpretation",
"use_case": "深度内容、哲学思考"
},
}
# Default preset
DEFAULT_IMAGE_STYLE = "stick_figure"
IMAGE_PROMPT_GENERATION_PROMPT = """# 角色定位
你是一个专业的视觉创意设计师,擅长为视频脚本创作富有表现力和象征性的图像提示词,将抽象概念转化为具象的视觉画面。
# 核心任务
基于已有的视频脚本,为每个分镜的"旁白内容"创作对应的**英文**图像提示词,确保视觉画面与叙述内容完美配合,增强观众的理解和记忆。
**重要:输入包含 {narrations_count} 个旁白,你必须为每个旁白都生成一个对应的图像提示词,总共输出 {narrations_count} 个图像提示词。**
# 输入内容
{narrations_json}
# 输出要求
## 图像提示词规范
- 语言:**必须使用英文**(用于 AI 图像生成模型)
- 描述结构scene + character action + emotion + symbolic elements
- 描述长度:确保描述清晰完整且富有创意(建议 50-100 个英文单词)
## 视觉创意要求
- 每个图像都要准确反映对应旁白的具体内容和情感
- 使用象征手法将抽象概念视觉化(如用路径代表人生选择,用锁链代表束缚等)
- 画面要表现出丰富的情感和动作,增强视觉冲击力
- 通过构图和元素安排突出主题,避免过于直白的表现方式
## 关键英文词汇参考
- 象征元素symbolic elements
- 表情expression / facial expression
- 动作action / gesture / movement
- 场景scene / setting
- 氛围atmosphere / mood
## 视觉与文案配合原则
- 图像要服务于文案,成为文案内容的视觉延伸
- 避免与文案内容无关或矛盾的视觉元素
- 选择最能增强文案说服力的视觉表现方式
- 确保观众能通过图像快速理解文案的核心观点
## 创意指导
1. **现象描述类文案**:用直观的场景表现社会现象
2. **原因分析类文案**:用因果关系的视觉比喻表现内在逻辑
3. **影响论证类文案**:用后果场景或对比手法表现影响程度
4. **深入探讨类文案**:用抽象概念的具象化表现深刻思考
5. **结论启发类文案**:用开放式场景或指引性元素表现启发性
# 输出格式
严格按照以下JSON格式输出**图像提示词必须是英文**
```json
{{
"image_prompts": [
"[detailed English image prompt following the style requirements]",
"[detailed English image prompt following the style requirements]"
]
}}
```
# 重要提醒
1. 只输出JSON格式内容不要添加任何解释说明
2. 确保JSON格式严格正确可以被程序直接解析
3. 输入是 {{"narrations": [旁白数组]}} 格式,输出是 {{"image_prompts": [图像提示词数组]}} 格式
4. **输出的image_prompts数组必须恰好包含 {narrations_count} 个元素与输入的narrations数组一一对应**
5. **图像提示词必须使用英文**for AI image generation models
6. 图像提示词必须准确反映对应旁白的具体内容和情感
7. 每个图像都要有创意性和视觉冲击力,避免千篇一律
8. 确保视觉画面能增强文案的说服力和观众的理解度
现在,请为上述 {narrations_count} 个旁白创作对应的 {narrations_count} 个**英文**图像提示词。只输出JSON不要其他内容。
"""
def build_image_prompt_prompt(
narrations: List[str],
min_words: int,
max_words: int
) -> str:
"""
Build image prompt generation prompt
Note: Style/prefix will be applied later via prompt_prefix in config.
Args:
narrations: List of narrations
min_words: Minimum word count
max_words: Maximum word count
Returns:
Formatted prompt for LLM
Example:
>>> build_image_prompt_prompt(narrations, 50, 100)
"""
narrations_json = json.dumps(
{"narrations": narrations},
ensure_ascii=False,
indent=2
)
return IMAGE_PROMPT_GENERATION_PROMPT.format(
narrations_json=narrations_json,
narrations_count=len(narrations),
min_words=min_words,
max_words=max_words
)