Update prompts and content generators

2025-12-05 16:42:10 +08:00
parent 5739601afa
commit 1769b7faee
11 changed files with 518 additions and 260 deletions
--- a/pixelle_video/prompts/asset_script_generation.py
+++ b/pixelle_video/prompts/asset_script_generation.py
@@ -17,37 +17,38 @@ For generating video scripts based on user-provided assets.
 """


-ASSET_SCRIPT_GENERATION_PROMPT = """你是一位专业的视频脚本创作者。请基于用户提供的视频意图和可用素材，生成一个 {duration} 秒的视频脚本。
+ASSET_SCRIPT_GENERATION_PROMPT = """You are a professional video script creator. Based on the user's video intent and available assets, generate a {duration}-second video script. Before doing so, you need to detect the user's input language - if it's English, then all copy must be in English. Strictly follow the user's input language type as the standard, ensuring consistent and corresponding copy!

-## 需求信息
-{title_section}- 视频意图：{intent}
- 目标时长：{duration} 秒
+## Requirements
+{title_section}- Video Intent: {intent}
+- Target Duration: {duration} seconds

-## 可用素材（请在输出中使用精确路径）
+## Available Assets (use exact paths in output)
 {assets_text}

-## 创作指南
-1. 根据目标时长决定需要多少个场景（通常每个场景 5-15 秒）
-2. 为每个场景从可用素材中直接分配一个素材
-3. 每个场景可以包含 1-3 句旁白
-4. 尽量使用所有可用素材，但如有需要可以复用素材
-5. 所有场景的总时长应约等于 {duration} 秒
+## Creation Guidelines
+1. Strictly output copy according to the user's input language type - if input is English, output must be English, and so on
+2. Determine the number of scenes based on target duration (typically 5-15 seconds per scene)
+3. Assign one asset from available assets to each scene
+4. Each scene can contain 1-3 narration sentences
+5. Try to use all available assets, but assets can be reused if needed
+6. Total duration of all scenes should approximately equal {duration} seconds
 {title_instruction}

-## 语言一致性要求（非常重要）
- 旁白的语言必须与用户输入的视频意图保持一致
- 如果视频意图是中文，则旁白必须是中文
- 如果视频意图是英文，则旁白必须是英文
- 除非视频意图中明确指定了输出语言，否则严格遵循意图的原始语言
+## Language Consistency Requirements (Strictly Enforce)
+- Narration language must match the user's input video intent
+- If video intent is in Chinese, narration must be in Chinese
+- If video intent is in English, narration must be in English
+- Unless the video intent explicitly specifies an output language, strictly follow the original language of the intent

-## 输出要求
-为每个场景提供：
- scene_number: 场景编号（从 1 开始）
- asset_path: 从可用素材列表中选择的精确路径
- narrations: 包含 1-3 句旁白的数组
- duration: 预估时长（秒）
+## Output Requirements
+Provide for each scene:
+- scene_number: Scene number (starting from 1)
+- asset_path: Exact path selected from available assets list
+- narrations: Array containing 1-3 narration sentences
+- duration: Estimated duration (seconds)

-现在请开始生成视频脚本："""
+Now please begin generating the video script:"""


 def build_asset_script_prompt(
@@ -68,8 +69,8 @@ def build_asset_script_prompt(
    Returns:
        Formatted prompt
    """
-    title_section = f"- 视频标题：{title}\n" if title else ""
-    title_instruction = f"6. 旁白内容应与视频标题保持一致：{title}\n" if title else ""
+    title_section = f"- Video Title: {title}\n" if title else ""
+    title_instruction = f"6. Narration content should be consistent with the video title: {title}\n" if title else ""
    
    return ASSET_SCRIPT_GENERATION_PROMPT.format(
        duration=duration,
--- a/pixelle_video/prompts/content_narration.py
+++ b/pixelle_video/prompts/content_narration.py
@@ -17,61 +17,63 @@ For extracting/refining narrations from user-provided content.
 """


-CONTENT_NARRATION_PROMPT = """# 角色定位
-你是一位专业的内容提炼专家，擅长从用户提供的内容中提取核心要点，并转化成适合短视频的脚本。
+CONTENT_NARRATION_PROMPT = """# Role Definition
+Globally, you must strictly output copy in the corresponding language type according to the user's language type.
+You are a professional content refinement expert, skilled at extracting core points from user-provided content and transforming them into scripts suitable for short videos.

-# 核心任务
-用户会提供一段内容（可能很长，也可能很短），你需要从中提炼出 {n_storyboard} 个视频分镜的旁白（用于TTS生成视频音频）。
+# Core Task
+The user will provide content (which may be long or short), and you need to extract narrations for {n_storyboard} video storyboards (for TTS to generate video audio).

-# 用户提供的内容
+# User-Provided Content
 {content}

-# 输出要求
+# Output Requirements

-## 旁白规范
- 用途定位：用于TTS生成短视频音频
- 字数限制：严格控制在{min_words}~{max_words}个字（最低不少于{min_words}字）
- 结尾格式：结尾不要使用标点符号
- 提炼策略：
-  * 如果用户内容较长：提取{n_storyboard}个核心要点，去除冗余信息
-  * 如果用户内容较短：在保留核心观点的基础上适当扩展，增加例子或解释
-  * 如果用户内容刚好：优化表达，使其更适合口播
- 风格要求：保持用户内容的核心观点，但用更口语化、适合TTS的方式表达
- 开场建议：第一个分镜可以用提问或场景引入，吸引观众注意
- 核心内容：中间分镜展开用户内容的核心要点
- 结尾建议：最后一个分镜给出总结或启发
- 情绪与语气：温和、真诚、自然，像在跟朋友分享观点
- 禁止项：不出现网址、表情符号、数字编号、不说空话套话
- 字数检查：生成后必须自我验证每段不少于{min_words}个字
+## Narration Specifications
+- Language consistency requirement: Strictly output copy according to the user's input language type - if input is English, output must be English, and so on
+- Purpose: For TTS to generate short video audio
+- Word count limit: Strictly control to {min_words}~{max_words} words (minimum not less than {min_words} words)
+- Ending format: Do not use punctuation at the end
+- Refinement strategy:
+  * If user content is long: Extract {n_storyboard} core points, remove redundant information
+  * If user content is short: Appropriately expand while retaining core viewpoints, add examples or explanations
+  * If user content is just right: Optimize expression to make it more suitable for voice narration
+- Style requirement: Maintain the core viewpoint of user content, but express it in a more colloquial way suitable for TTS
+- Opening suggestion: The first storyboard can use a question or scene introduction to attract audience attention
+- Core content: Middle storyboards expand on the core points of user content
+- Ending suggestion: The last storyboard provides a summary or inspiration
+- Emotion and tone: Gentle, sincere, natural, like sharing viewpoints with a friend
+- Prohibitions: No URLs, emojis, numeric numbering, no empty talk or clichés
+- Word count check: After generation, must self-verify that each segment is not less than {min_words} words

-## 分镜连贯性要求
- {n_storyboard} 个分镜应基于用户内容的核心观点展开，形成完整表达
- 保持逻辑连贯，自然过渡
- 每个分镜像同一个人在讲述，语气一致
- 确保提炼的内容忠于用户原意，但更适合短视频呈现
+## Storyboard Coherence Requirements
+- {n_storyboard} storyboards should expand based on the core viewpoint of user content, forming a complete expression
+- Maintain logical coherence and natural transitions
+- Each storyboard should sound like the same person narrating, with consistent tone
+- Ensure the refined content is faithful to the user's original meaning, but more suitable for short video presentation

-# 输出格式
-严格按照以下JSON格式输出，不要添加任何额外的文字说明：
+# Output Format
+Strictly output in the following JSON format, do not add any additional text explanations:

 ```json
 {{
  "narrations": [
-    "第一段{min_words}~{max_words}字的旁白",
-    "第二段{min_words}~{max_words}字的旁白",
-    "第三段{min_words}~{max_words}字的旁白"
+    "First {min_words}~{max_words} word narration",
+    "Second {min_words}~{max_words} word narration",
+    "Third {min_words}~{max_words} word narration"
  ]
 }}
 ```

-# 重要提醒
-1. 只输出JSON格式内容，不要添加任何解释说明
-2. 确保JSON格式严格正确，可以被程序直接解析
-3. 旁白必须严格控制在{min_words}~{max_words}字之间
-4. 必须输出恰好 {n_storyboard} 个分镜的旁白
-5. 内容要忠于用户原意，但优化为更适合口播的表达
-6. 输出格式为 {{"narrations": [旁白数组]}} 的JSON对象
+# Important Reminders
+1. Only output JSON format content, do not add any explanations
+2. Ensure JSON format is strictly correct and can be directly parsed by the program
+3. Narrations must be strictly controlled between {min_words}~{max_words} words
+4. Must output exactly {n_storyboard} storyboard narrations
+5. Content must be faithful to the user's original meaning, but optimized for voice narration expression
+6. Output format is {{"narrations": [narration array]}} JSON object

-现在，请从上述内容中提炼出 {n_storyboard} 个分镜的旁白。只输出JSON，不要其他内容。
+Now, please extract {n_storyboard} storyboard narrations from the above content. Only output JSON, no other content.
 """


--- a/pixelle_video/prompts/image_generation.py
+++ b/pixelle_video/prompts/image_generation.py
@@ -25,21 +25,21 @@ from typing import List, Optional

 IMAGE_STYLE_PRESETS = {
    "stick_figure": {
-        "name": "火柴人简笔画",
+        "name": "Stick Figure Sketch",
        "description": "stick figure style sketch, black and white lines, pure white background, minimalist hand-drawn feel",
-        "use_case": "通用场景，简单直观"
+        "use_case": "General scenes, simple and intuitive"
    },
    
    "minimal": {
-        "name": "极简抽象",
+        "name": "Minimalist Abstract",
        "description": "minimalist abstract art, geometric shapes, clean composition, modern design, soft pastel colors",
-        "use_case": "现代感、艺术感"
+        "use_case": "Modern, artistic feel"
    },
    
    "concept": {
-        "name": "概念化视觉",
+        "name": "Conceptual Visual",
        "description": "conceptual visual metaphors, symbolic elements, thought-provoking imagery, artistic interpretation",
-        "use_case": "深度内容、哲学思考"
+        "use_case": "Deep content, philosophical thinking"
    },
 }

@@ -47,52 +47,52 @@ IMAGE_STYLE_PRESETS = {
 DEFAULT_IMAGE_STYLE = "stick_figure"


-IMAGE_PROMPT_GENERATION_PROMPT = """# 角色定位
-你是一个专业的视觉创意设计师，擅长为视频脚本创作富有表现力和象征性的图像提示词，将抽象概念转化为具象的视觉画面。
+IMAGE_PROMPT_GENERATION_PROMPT = """# Role Definition
+You are a professional visual creative designer, skilled at creating expressive and symbolic image prompts for video scripts, transforming abstract concepts into concrete visual scenes.

-# 核心任务
-基于已有的视频脚本，为每个分镜的"旁白内容"创作对应的**英文**图像提示词，确保视觉画面与叙述内容完美配合，增强观众的理解和记忆。
+# Core Task
+Based on the existing video script, create corresponding **English** image prompts for each storyboard's "narration content", ensuring visual scenes perfectly match the narrative content and enhance audience understanding and memory.

-**重要：输入包含 {narrations_count} 个旁白，你必须为每个旁白都生成一个对应的图像提示词，总共输出 {narrations_count} 个图像提示词。**
+**Important: The input contains {narrations_count} narrations. You must generate one corresponding image prompt for each narration, totaling {narrations_count} image prompts.**

-# 输入内容
+# Input Content
 {narrations_json}

-# 输出要求
+# Output Requirements

-## 图像提示词规范
- 语言：**必须使用英文**（用于 AI 图像生成模型）
- 描述结构：scene + character action + emotion + symbolic elements
- 描述长度：确保描述清晰完整且富有创意（建议 50-100 个英文单词）
+## Image Prompt Specifications
+- Language: **Must use English** (for AI image generation models)
+- Description structure: scene + character action + emotion + symbolic elements
+- Description length: Ensure clear, complete, and creative descriptions (recommended 50-100 English words)

-## 视觉创意要求
- 每个图像都要准确反映对应旁白的具体内容和情感
- 使用象征手法将抽象概念视觉化（如用路径代表人生选择，用锁链代表束缚等）
- 画面要表现出丰富的情感和动作，增强视觉冲击力
- 通过构图和元素安排突出主题，避免过于直白的表现方式
+## Visual Creative Requirements
+- Each image must accurately reflect the specific content and emotion of the corresponding narration
+- Use symbolic techniques to visualize abstract concepts (e.g., use paths to represent life choices, chains to represent constraints, etc.)
+- Scenes should express rich emotions and actions to enhance visual impact
+- Highlight themes through composition and element arrangement, avoid overly literal representations

-## 关键英文词汇参考
- 象征元素：symbolic elements
- 表情：expression / facial expression
- 动作：action / gesture / movement
- 场景：scene / setting
- 氛围：atmosphere / mood
+## Key English Vocabulary Reference
+- Symbolic elements: symbolic elements
+- Expression: expression / facial expression
+- Action: action / gesture / movement
+- Scene: scene / setting
+- Atmosphere: atmosphere / mood

-## 视觉与文案配合原则
- 图像要服务于文案，成为文案内容的视觉延伸
- 避免与文案内容无关或矛盾的视觉元素
- 选择最能增强文案说服力的视觉表现方式
- 确保观众能通过图像快速理解文案的核心观点
+## Visual and Copy Coordination Principles
+- Images should serve the copy, becoming a visual extension of the copy content
+- Avoid visual elements unrelated to or contradicting the copy content
+- Choose visual presentation methods that best enhance the persuasiveness of the copy
+- Ensure the audience can quickly understand the core viewpoint of the copy through images

-## 创意指导
-1. **现象描述类文案**：用直观的场景表现社会现象
-2. **原因分析类文案**：用因果关系的视觉比喻表现内在逻辑
-3. **影响论证类文案**：用后果场景或对比手法表现影响程度
-4. **深入探讨类文案**：用抽象概念的具象化表现深刻思考
-5. **结论启发类文案**：用开放式场景或指引性元素表现启发性
+## Creative Guidance
+1. **Phenomenon Description Copy**: Use intuitive scenes to represent social phenomena
+2. **Cause Analysis Copy**: Use visual metaphors of cause-and-effect relationships to represent internal logic
+3. **Impact Argumentation Copy**: Use consequence scenes or contrast techniques to represent the degree of impact
+4. **In-depth Discussion Copy**: Use concretization of abstract concepts to represent deep thinking
+5. **Conclusion Inspiration Copy**: Use open-ended scenes or guiding elements to represent inspiration

-# 输出格式
-严格按照以下JSON格式输出，**图像提示词必须是英文**：
+# Output Format
+Strictly output in the following JSON format, **image prompts must be in English**:

 ```json
 {{
@@ -103,17 +103,17 @@ IMAGE_PROMPT_GENERATION_PROMPT = """# 角色定位
 }}
 ```

-# 重要提醒
-1. 只输出JSON格式内容，不要添加任何解释说明
-2. 确保JSON格式严格正确，可以被程序直接解析
-3. 输入是 {{"narrations": [旁白数组]}} 格式，输出是 {{"image_prompts": [图像提示词数组]}} 格式
-4. **输出的image_prompts数组必须恰好包含 {narrations_count} 个元素，与输入的narrations数组一一对应**
-5. **图像提示词必须使用英文**（for AI image generation models）
-6. 图像提示词必须准确反映对应旁白的具体内容和情感
-7. 每个图像都要有创意性和视觉冲击力，避免千篇一律
-8. 确保视觉画面能增强文案的说服力和观众的理解度
+# Important Reminders
+1. Only output JSON format content, do not add any explanations
+2. Ensure JSON format is strictly correct and can be directly parsed by the program
+3. Input is {{"narrations": [narration array]}} format, output is {{"image_prompts": [image prompt array]}} format
+4. **The output image_prompts array must contain exactly {narrations_count} elements, corresponding one-to-one with the input narrations array**
+5. **Image prompts must use English** (for AI image generation models)
+6. Image prompts must accurately reflect the specific content and emotion of the corresponding narration
+7. Each image must be creative and visually impactful, avoid being monotonous
+8. Ensure visual scenes can enhance the persuasiveness of the copy and audience understanding

-现在，请为上述 {narrations_count} 个旁白创作对应的 {narrations_count} 个**英文**图像提示词。只输出JSON，不要其他内容。
+Now, please create {narrations_count} corresponding **English** image prompts for the above {narrations_count} narrations. Only output JSON, no other content.
 """


--- a/pixelle_video/prompts/title_generation.py
+++ b/pixelle_video/prompts/title_generation.py
@@ -17,35 +17,69 @@ For generating video title from content.
 """


-TITLE_GENERATION_PROMPT = """Please generate a short, attractive title (within 10 characters) for the following content.
+TITLE_GENERATION_PROMPT = """Please generate a short, attractive title for the following content.

 Content:
 {content}

 Requirements:
-1. Brief and concise, within 10 characters
-2. Accurately summarize the core content
-3. Attractive, suitable as a video title
-4. Output only the title text, no other content
+1. **Language Consistency (CRITICAL)**: The title MUST be in the same language as the input content
+   - If the input content is in English, the title MUST be in English
+   - If the input content is in Chinese, the title MUST be in Chinese
+   - Strictly follow the language of the input content
+
+2. **Character Limit (CRITICAL)**: The title MUST NOT exceed {max_length} characters
+   - Count every character including spaces
+   - The title must be complete and meaningful within this limit
+   - Do NOT generate a title that would need to be cut off
+
+3. **Core Message (CRITICAL)**: The title MUST capture the MAIN POINT of the content
+   - Identify the central theme or key message
+   - Don't focus on just one aspect if the content has multiple important points
+   - Ensure the title accurately represents what the content is about
+
+4. **No Punctuation at End**: Do NOT include any punctuation marks at the end of the title
+   - No period (.), comma (,), exclamation mark (!), question mark (?), etc.
+   - The title should end with a word or number, not punctuation
+
+5. **Completeness**: Ensure the title is a complete, meaningful phrase
+   - Do not cut off in the middle of a word or number
+   - Do not create incomplete phrases like "Rise Early for" or "How to Make"
+   - Use abbreviations or shorter words if needed to fit the limit
+   
+6. **Abbreviation Examples** (use when needed to fit character limit):
+   - For English:
+     * "10,000" → "10K"
+     * "per month" → "monthly" or "a month"
+     * "early to bed and early to rise" → "Sleep Early" or "Early Habits"
+     * "makes you healthy" → "for Health" or "Stay Healthy"
+   - For Chinese:
+     * "10,000元" → "万元" or "1万"
+     * "每个月" → "月入" or "月收"
+
+7. Accurately summarize the core content
+8. Attractive and engaging, suitable as a video title
+9. Output only the title text, no quotes, no explanations

 Title:"""


-def build_title_generation_prompt(content: str, max_length: int = 500) -> str:
+def build_title_generation_prompt(content: str, max_length: int = 15) -> str:
    """
    Build title generation prompt
    
    Args:
        content: Content to generate title from
-        max_length: Maximum content length to use (default 500 chars)
+        max_length: Maximum title length in characters (default: 15)
    
    Returns:
-        Formatted prompt
+        Formatted prompt with character limit
    """
-    # Take first max_length chars to avoid overly long prompts
-    content_preview = content[:max_length]
+    # Take first 500 chars to avoid overly long prompts
+    content_preview = content[:500]
    
    return TITLE_GENERATION_PROMPT.format(
-        content=content_preview
+        content=content_preview,
+        max_length=max_length
    )

--- a/pixelle_video/prompts/topic_narration.py
+++ b/pixelle_video/prompts/topic_narration.py
@@ -17,110 +17,117 @@ For generating narrations from a topic/theme.
 """


-TOPIC_NARRATION_PROMPT = """# 角色定位
-你是一位专业的内容创作专家，擅长将话题扩展成引人入胜的短视频脚本，用深入浅出的方式讲解观点，帮助观众理解复杂概念。
+TOPIC_NARRATION_PROMPT = """# Role Definition
+You are a professional content creation expert, skilled at expanding topics into engaging short video scripts, explaining viewpoints in an accessible way to help audiences understand complex concepts.
+Globally, you must strictly output copy in the corresponding language type according to the user's language type.

-# 核心任务
-用户会输入一个话题或主题，你需要为这个话题或主题进行创作 {n_storyboard} 个视频分镜，每个分镜包含"旁白（用于TTS生成视频讲解音频）"，像在跟朋友聊天一样，自然、有价值、引发共鸣。
+# Core Task
+The user will input a topic or theme. You need to create {n_storyboard} video storyboards for this topic or theme. Each storyboard contains "narration (for TTS to generate video explanation audio)", naturally and valuably, like chatting with a friend, to resonate with the audience.
+- Language consistency requirement: Strictly output copy according to the user's input language type - if input is English, output must be English, and so on

-# 输入话题
+# Input Topic
 {topic}

-# 输出要求
+# Output Requirements

-## 旁白规范
- 输出语言要求：严格按照用户输入的话题或主题的语种输出，如：用户输入的是英文，则输出的文案必须为英文，中文也是一样。
- 用途定位：用于TTS生成短视频音频，通俗易懂地讲解话题
- 字数限制：严格控制在{min_words}~{max_words}个字（最低不少于{min_words}字）
- 结尾格式：每段旁白的结尾不要使用标点符号，若旁白中出现断句读法必须使用中文标点（，。？！……：“”）来表达语气和停顿，自动判断并插入合适的标点符号，保留自然口语节奏（比如“对吗？不对。”要有停顿和语气转折）
- 内容要求：围绕话题展开，每个分镜传递一个有价值的观点或洞察
- 风格要求：像跟朋友聊天一样，通俗、真诚、有启发性，避免学术化和生硬的表达，拒绝套路化和模板化的表达
- 情绪与语气：温和、真诚、有热情，像一个有见解的朋友在分享思考
- 可适当引用权威内容，不强制每次输出都要有引用出现，根据用户传入的标题或内容参考判断是否需要有相关引用：
-  若为科学/健康类，可引用《自然》《柳叶刀》、哈佛研究、神经科学发现等；
-  若为心理/哲学类，可引用荣格、尼采、庄子、曾仕强、卡巴金等人的观点或语录；
-  若为国学/佛道类，可引用《道德经》《金刚经》《黄帝内经》等经典原文或释义；
-  若为文学/历史类，可引用鲁迅、苏轼、《史记》、《人类简史》等；
-  若为时尚/生活方式类，可引用色彩心理学、形象管理理论、行为经济学等。
-  根据上述举例，若有其他类型的方向和赛道也可检索引用相关书籍，但也要遵循不强制引用的要求。
+## Narration Specifications
+- Output language requirement: Strictly output according to the language of the user's input topic or theme. For example: if the user's input is in English, the output copy must be in English, same for Chinese.
+- Purpose: For TTS to generate short video audio, explaining topics in an accessible way
+- Word count limit: Strictly control to {min_words}~{max_words} words (minimum not less than {min_words} words)
+- Ending format: Do not use punctuation at the end of each narration. If there are sentence breaks in the narration, Chinese punctuation (,。?!……:"") must be used to express tone and pauses. Automatically determine and insert appropriate punctuation to maintain natural spoken rhythm (e.g., "Right? Wrong." should have pauses and tonal shifts)
+- Content requirement: Expand around the topic, each storyboard conveys a valuable viewpoint or insight
+- Style requirement: Like chatting with a friend, accessible, sincere, inspiring, avoid academic and stiff expressions, reject formulaic and template expressions
+- Emotion and tone: Gentle, sincere, enthusiastic, like a friend with insights sharing thoughts
+- Can appropriately cite authoritative content, not mandatory for every output, determine based on the user's input title or content reference whether relevant citations are needed:
+  For science/health topics, can cite Nature, The Lancet, Harvard research, neuroscience findings, etc.;
+  For psychology/philosophy topics, can cite viewpoints or quotes from Jung, Nietzsche, Zhuangzi, Zeng Shiqiang, Kabat-Zinn, etc.;
+  For Chinese studies/Buddhism/Taoism topics, can cite original texts or interpretations from Tao Te Ching, Diamond Sutra, Yellow Emperor's Inner Canon, etc.;
+  For literature/history topics, can cite Lu Xun, Su Shi, Records of the Grand Historian, Sapiens, etc.;
+  For fashion/lifestyle topics, can cite color psychology, image management theory, behavioral economics, etc.
+  Based on the above examples, if there are other types of directions and tracks, relevant books can also be searched and cited, but must also follow the non-mandatory citation requirement.

-  若有引用需自然融入，不生硬堆砌，不虚构出处。
+  If there are citations, integrate them naturally, do not pile them up stiffly, do not fabricate sources.

-## 开头多样性要求（最重要）
-【核心原则】每个分镜的开头必须根据内容本身自然表达，拒绝任何形式的固定套路和模板化表达。
+## Opening Diversity Requirements (Most Important)
+[Core Principle] The opening of each storyboard must be expressed naturally based on the content itself, rejecting any form of fixed routines and template expressions.

-【表达方式灵活性】
-根据话题内容，可以采用陈述、场景、感叹、观点、问句、对比、故事等多种表达方式，但务必做到：
- 每个分镜根据要表达的具体内容选择最自然的开头
- 绝不形成任何规律性的句式模式
- 不要让任何一个词或短语成为"习惯性开头"
+[Expression Flexibility]
+Based on the topic content, various expression methods such as statements, scenes, exclamations, viewpoints, questions, contrasts, stories, etc. can be used, but must achieve:
+- Each storyboard chooses the most natural opening based on the specific content to be expressed
+- Never form any regular sentence pattern
+- Do not let any word or phrase become a "habitual opening"

-【严禁固定模式】
-❌ 绝对禁止以下行为：
- 形成"第N句总用X开头"的任何规律
- 多次重复使用同一个连接词或句式作为开头
- 按照某种隐藏的模板顺序来组织分镜
+[Strictly Prohibit Fixed Patterns]
+❌ Absolutely prohibit the following behaviors:
+- Forming any pattern of "the Nth sentence always starts with X"
+- Repeatedly using the same conjunction or sentence pattern as an opening
+- Organizing storyboards according to some hidden template order

-【特别强调】
- 第一个分镜的开头要完全根据话题内容自然选择，不要有任何固定词汇倾向
- 整组旁白中，如果某个词（如"有时候"、"其实"、"你有没有"）出现超过1次作为开头，就是失败的创作
- 要像真人说话一样自然流畅，而不是套用任何句式模板
+[Special Emphasis]
+## Language Consistency Requirements (Strictly Enforce)
+- Narration language must match the user's input video intent
+- If video intent is in Chinese, narration must be in Chinese
+- If video intent is in English, narration must be in English
+- Unless the video intent explicitly specifies an output language, strictly follow the original language of the intent
+- The opening of the first storyboard should be completely naturally chosen based on the topic content, without any fixed vocabulary tendency
+- In the entire set of narrations, if any word (such as "sometimes", "actually", "have you ever") appears more than once as an opening, it is a failed creation
+- Should be as natural and fluent as a real person speaking, not applying any sentence pattern template

-## 自然表达要求
- 内容应该像真人在自然交流，而不是按照模板填空
- 每个分镜的开头要根据内容本身选择最合适的表达方式
- 同一个词作为开头在整个旁白中最多只能出现1次
- 优先用观点、场景、故事来串联内容，避免依赖连接词开头
+## Natural Expression Requirements
+- Content should be like real people communicating naturally, not filling in templates
+- The opening of each storyboard should choose the most appropriate expression method based on the content itself
+- The same word can appear as an opening at most once in the entire narration
+- Prioritize using viewpoints, scenes, stories to connect content, avoid relying on conjunctions as openings

-## 内容结构建议
- 开场方式：可以用场景、故事、观点、现象等多种方式引入，不固定套路
- 核心内容：中间分镜展开核心观点，用生活化的例子帮助理解
- 结尾方式：最后分镜给出行动建议或启发，让观众有收获感
- 整体逻辑：遵循"引发共鸣 → 提出观点 → 深入讲解 → 给出启发"的叙述逻辑
+## Content Structure Suggestions
+- Opening method: Can use scenes, stories, viewpoints, phenomena, and other methods to introduce, no fixed routine
+- Core content: Middle storyboards expand core viewpoints, use life examples to help understanding
+- Ending method: Last storyboard provides action suggestions or inspiration, giving the audience a sense of gain
+- Overall logic: Follow the narrative logic of "resonate → propose viewpoint → in-depth explanation → provide inspiration"

-## 其他规范
- 禁止项：不出现网址、表情符号、数字编号、不说空话套话、不过度煽情
- 字数检查：生成后必须自我验证不少于{min_words}个字，如不足则补充具体观点或例子
+## Other Specifications
+- Prohibitions: No URLs, emojis, numeric numbering, no empty talk or clichés, no excessive sentimentality
+- Word count check: After generation, must self-verify not less than {min_words} words. If insufficient, supplement with specific viewpoints or examples

-## 分镜连贯性要求
- {n_storyboard} 个分镜应围绕话题展开，形成完整的观点表达
- 遵循"吸引注意 → 提出观点 → 深入讲解 → 给出启发"的叙述逻辑
- 每个分镜像同一个人在连贯分享观点，语气一致、自然流畅
- 通过观点的递进自然过渡，形成完整的论述脉络
- 确保内容有价值、有启发，让观众觉得"这个视频值得看"
+## Storyboard Coherence Requirements
+- {n_storyboard} storyboards should expand around the topic, forming a complete viewpoint expression
+- Follow the narrative logic of "attract attention → propose viewpoint → in-depth explanation → provide inspiration"
+- Each storyboard should sound like the same person continuously sharing viewpoints, with consistent and natural tone
+- Naturally transition through the progression of viewpoints, forming a complete argumentative thread
+- Ensure content is valuable and inspiring, making the audience feel "this video is worth watching"

-# 输出格式
-严格按照以下JSON格式输出，不要添加任何额外的文字说明：
+# Output Format
+Strictly output in the following JSON format, do not add any additional text explanations:


 ```json
 {{
  "narrations": [
-    "第一段旁白内容",
-    "第二段旁白内容",
-    "第三段旁白内容"
+    "First narration content",
+    "Second narration content",
+    "Third narration content"
  ]
 }}
 ```

-# 重要提醒
-1. 只输出JSON格式内容，不要添加任何解释说明
-2. 确保JSON格式严格正确，可以被程序直接解析
-3. 旁白必须严格控制在{min_words}~{max_words}字之间，用通俗易懂的语言
-4. {n_storyboard} 个分镜要围绕话题展开，形成完整的观点表达
-5. 每个分镜都要有价值，提供洞察，避免空洞的陈述
-6. 输出格式为 {{"narrations": [旁白数组]}} 的JSON对象
+# Important Reminders
+1. Only output JSON format content, do not add any explanations
+2. Ensure JSON format is strictly correct and can be directly parsed by the program
+3. Narrations must be strictly controlled between {min_words}~{max_words} words, using accessible language
+4. {n_storyboard} storyboards should expand around the topic, forming a complete viewpoint expression
+5. Each storyboard must be valuable, providing insights, avoiding empty statements
+6. Output format is {{"narrations": [narration array]}} JSON object

-【多样性核心要求 - 必须严格执行】
-7. 第一句旁白不要固定用某个词开头，每次创作都要根据话题内容自然选择不同的开头
-8. 同一个词（如"有时候"、"你有没有"、"其实"、"想象一下"等）在所有旁白中作为开头最多只能出现1次
-9. 不要形成任何隐藏的句式规律，每个分镜的开头要真正做到独立思考、自然表达
-10. 检查你的输出：如果发现有任何词作为开头重复出现2次或以上，必须修改
-11. 输出语言要求：严格按照用户输入的话题或主题的语种输出，如：用户输入的是英文，则输出的文案必须为英文，中文也是一样。
+[Diversity Core Requirements - Must Strictly Execute]
+7. The first narration should not use a fixed word as an opening. Each creation should naturally choose different openings based on the topic content
+8. The same word (such as "sometimes", "have you ever", "actually", "imagine") can appear as an opening at most once in all narrations
+9. Do not form any hidden sentence pattern rules. The opening of each storyboard should truly be independently thought out and naturally expressed
+10. Check your output: if any word appears as an opening 2 or more times, it must be modified
+11. Output language requirement: Strictly output according to the language of the user's input topic or theme. For example: if the user's input is in English, the output copy must be in English, same for Chinese.

-现在，请为话题创作 {n_storyboard} 个分镜的旁白。
-⚠️ 特别注意：写完后自查所有分镜的开头，确保没有重复使用同一个词或短语作为开头。
-只输出JSON，不要其他内容。
+Now, please create narrations for {n_storyboard} storyboards for the topic.
+⚠️ Special note: After writing, self-check the openings of all storyboards to ensure no repeated use of the same word or phrase as an opening.
+Only output JSON, no other content.
 """


--- a/pixelle_video/prompts/video_generation.py
+++ b/pixelle_video/prompts/video_generation.py
@@ -20,60 +20,60 @@ import json
 from typing import List


-VIDEO_PROMPT_GENERATION_PROMPT = """# 角色定位
-你是一个专业的视频创意设计师，擅长为视频脚本创作富有动感和表现力的视频生成提示词，将叙述内容转化为生动的视频画面。
+VIDEO_PROMPT_GENERATION_PROMPT = """# Role Definition
+You are a professional video creative designer, skilled at creating dynamic and expressive video generation prompts for video scripts, transforming narrative content into vivid video scenes.

-# 核心任务
-基于已有的视频脚本，为每个分镜的"旁白内容"创作对应的**英文**视频生成提示词，确保视频画面与叙述内容完美配合，通过动态画面增强观众的理解和记忆。
+# Core Task
+Based on the existing video script, create corresponding **English** video generation prompts for each storyboard's "narration content", ensuring video scenes perfectly match the narrative content and enhance audience understanding and memory through dynamic visuals.

-**重要：输入包含 {narrations_count} 个旁白，你必须为每个旁白都生成一个对应的视频提示词，总共输出 {narrations_count} 个视频提示词。**
+**Important: The input contains {narrations_count} narrations. You must generate one corresponding video prompt for each narration, totaling {narrations_count} video prompts.**

-# 输入内容
+# Input Content
 {narrations_json}

-# 输出要求
+# Output Requirements

-## 视频提示词规范
- 语言：**必须使用英文**（用于 AI 视频生成模型）
- 描述结构：scene + character action + camera movement + emotion + atmosphere
- 描述长度：确保描述清晰完整且富有创意（建议 50-100 个英文单词）
- 动态元素：强调动作、运动、变化等动态效果
+## Video Prompt Specifications
+- Language: **Must use English** (for AI video generation models)
+- Description structure: scene + character action + camera movement + emotion + atmosphere
+- Description length: Ensure clear, complete, and creative descriptions (recommended 50-100 English words)
+- Dynamic elements: Emphasize actions, movements, changes, and other dynamic effects

-## 视觉创意要求
- 每个视频都要准确反映对应旁白的具体内容和情感
- 突出画面的动态性：角色动作、物体运动、镜头移动、场景转换等
- 使用象征手法将抽象概念视觉化（如用流动的水代表时间流逝，用上升的阶梯代表进步等）
- 画面要表现出丰富的情感和动作，增强视觉冲击力
- 通过镜头语言（推拉摇移）和剪辑节奏增强表现力
+## Visual Creative Requirements
+- Each video must accurately reflect the specific content and emotion of the corresponding narration
+- Highlight visual dynamics: character actions, object movements, camera movements, scene transitions, etc.
+- Use symbolic techniques to visualize abstract concepts (e.g., use flowing water to represent the passage of time, rising stairs to represent progress, etc.)
+- Scenes should express rich emotions and actions to enhance visual impact
+- Enhance expressiveness through camera language (push, pull, pan, tilt) and editing rhythm

-## 关键英文词汇参考
- 动作：moving, running, flowing, transforming, growing, falling
- 镜头：camera pan, zoom in, zoom out, tracking shot, aerial view
- 转场：transition, fade in, fade out, dissolve
- 氛围：dynamic, energetic, peaceful, dramatic, mysterious
- 光影：lighting changes, shadows moving, sunlight streaming
+## Key English Vocabulary Reference
+- Actions: moving, running, flowing, transforming, growing, falling
+- Camera: camera pan, zoom in, zoom out, tracking shot, aerial view
+- Transitions: transition, fade in, fade out, dissolve
+- Atmosphere: dynamic, energetic, peaceful, dramatic, mysterious
+- Lighting: lighting changes, shadows moving, sunlight streaming

-## 视频与文案配合原则
- 视频要服务于文案，成为文案内容的视觉延伸
- 避免与文案内容无关或矛盾的视觉元素
- 选择最能增强文案说服力的动态表现方式
- 确保观众能通过视频动态快速理解文案的核心观点
+## Video and Copy Coordination Principles
+- Videos should serve the copy, becoming a visual extension of the copy content
+- Avoid visual elements unrelated to or contradicting the copy content
+- Choose dynamic presentation methods that best enhance the persuasiveness of the copy
+- Ensure the audience can quickly understand the core viewpoint of the copy through video dynamics

-## 创意指导
-1. **现象描述类文案**：用动态场景表现社会现象的发生过程
-2. **原因分析类文案**：用因果关系的动态演变表现内在逻辑
-3. **影响论证类文案**：用后果场景的动态展开或对比表现影响程度
-4. **深入探讨类文案**：用抽象概念的动态具象化表现深刻思考
-5. **结论启发类文案**：用开放式动态场景或指引性运动表现启发性
+## Creative Guidance
+1. **Phenomenon Description Copy**: Use dynamic scenes to represent the occurrence process of social phenomena
+2. **Cause Analysis Copy**: Use dynamic evolution of cause-and-effect relationships to represent internal logic
+3. **Impact Argumentation Copy**: Use dynamic unfolding of consequence scenes or contrasts to represent the degree of impact
+4. **In-depth Discussion Copy**: Use dynamic concretization of abstract concepts to represent deep thinking
+5. **Conclusion Inspiration Copy**: Use open-ended dynamic scenes or guiding movements to represent inspiration

-## 视频特有注意事项
- 强调动态：每个视频都应该包含明显的动作或运动
- 镜头语言：适当使用推拉摇移等镜头技巧增强表现力
- 时长考虑：视频应该是连贯的动态过程，不是静态画面
- 流畅性：注意动作的流畅性和自然性
+## Video-Specific Considerations
+- Emphasize dynamics: Each video should include obvious actions or movements
+- Camera language: Appropriately use camera techniques such as push, pull, pan, tilt to enhance expressiveness
+- Duration consideration: Videos should be a coherent dynamic process, not static images
+- Fluidity: Pay attention to the fluidity and naturalness of actions

-# 输出格式
-严格按照以下JSON格式输出，**视频提示词必须是英文**：
+# Output Format
+Strictly output in the following JSON format, **video prompts must be in English**:

 ```json
 {{
@@ -84,18 +84,18 @@ VIDEO_PROMPT_GENERATION_PROMPT = """# 角色定位
 }}
 ```

-# 重要提醒
-1. 只输出JSON格式内容，不要添加任何解释说明
-2. 确保JSON格式严格正确，可以被程序直接解析
-3. 输入是 {{"narrations": [旁白数组]}} 格式，输出是 {{"video_prompts": [视频提示词数组]}} 格式
-4. **输出的video_prompts数组必须恰好包含 {narrations_count} 个元素，与输入的narrations数组一一对应**
-5. **视频提示词必须使用英文**（for AI video generation models）
-6. 视频提示词必须准确反映对应旁白的具体内容和情感
-7. 每个视频都要强调动态性和运动感，避免静态描述
-8. 适当使用镜头语言增强表现力
-9. 确保视频画面能增强文案的说服力和观众的理解度
+# Important Reminders
+1. Only output JSON format content, do not add any explanations
+2. Ensure JSON format is strictly correct and can be directly parsed by the program
+3. Input is {{"narrations": [narration array]}} format, output is {{"video_prompts": [video prompt array]}} format
+4. **The output video_prompts array must contain exactly {narrations_count} elements, corresponding one-to-one with the input narrations array**
+5. **Video prompts must use English** (for AI video generation models)
+6. Video prompts must accurately reflect the specific content and emotion of the corresponding narration
+7. Each video must emphasize dynamics and sense of movement, avoid static descriptions
+8. Appropriately use camera language to enhance expressiveness
+9. Ensure video scenes can enhance the persuasiveness of the copy and audience understanding

-现在，请为上述 {narrations_count} 个旁白创作对应的 {narrations_count} 个**英文**视频提示词。只输出JSON，不要其他内容。
+Now, please create {narrations_count} corresponding **English** video prompts for the above {narrations_count} narrations. Only output JSON, no other content.
 """


--- a/pixelle_video/utils/content_generators.py
+++ b/pixelle_video/utils/content_generators.py
@@ -57,7 +57,8 @@ async def generate_title(
    # Use LLM to generate title
    from pixelle_video.prompts import build_title_generation_prompt
    
-    prompt = build_title_generation_prompt(content, max_length=500)
+    # Pass max_length to prompt so LLM knows the character limit
+    prompt = build_title_generation_prompt(content, max_length=max_length)
    response = await llm_service(prompt, temperature=0.7, max_tokens=50)
    
    # Clean up response
@@ -69,9 +70,23 @@ async def generate_title(
    if title.startswith("'") and title.endswith("'"):
        title = title[1:-1]
    
-    # Limit to max_length (safety)
+    # Remove trailing punctuation
+    title = title.rstrip('.,!?;:\'"')
+    
+    # Safety: if still over limit, truncate smartly
    if len(title) > max_length:
-        title = title[:max_length]
+        # Try to truncate at word boundary
+        truncated = title[:max_length]
+        last_space = truncated.rfind(' ')
+        
+        # Only use word boundary if it's not too far back (at least 60% of max_length)
+        if last_space > max_length * 0.6:
+            title = truncated[:last_space]
+        else:
+            title = truncated
+        
+        # Remove any trailing punctuation after truncation
+        title = title.rstrip('.,!?;:\'"')
    
    logger.debug(f"Generated title: '{title}' (length: {len(title)})")
    return title
--- a/workflows/runninghub/image_qwen_chinese_cartoon.json
+++ b/workflows/runninghub/image_qwen_chinese_cartoon.json
@@ -0,0 +1,5 @@
+{
+  "source": "runninghub",
+  "workflow_id": "1988434426705133569"
+}
+
--- a/workflows/runninghub/video_Z_image_wan2.2.json
+++ b/workflows/runninghub/video_Z_image_wan2.2.json
@@ -0,0 +1,5 @@
+{
+  "source": "runninghub",
+  "workflow_id": "1993931250872369154"
+}
+
--- a/workflows/runninghub/video_qwen_wan2.2.json
+++ b/workflows/runninghub/video_qwen_wan2.2.json
@@ -0,0 +1,5 @@
+{
+  "source": "runninghub",
+  "workflow_id": "1993608528969531394"
+}
+
--- a/workflows/selfhost/image_qwen.json
+++ b/workflows/selfhost/image_qwen.json
@@ -0,0 +1,184 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 388600705609480,
+      "steps": 4,
+      "cfg": 1,
+      "sampler_name": "euler",
+      "scheduler": "beta",
+      "denoise": 1,
+      "model": [
+        "86",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "7",
+        0
+      ],
+      "latent_image": [
+        "58",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "6": {
+    "inputs": {
+      "text": "",
+      "clip": [
+        "67",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "$prompt.text"
+    }
+  },
+  "7": {
+    "inputs": {
+      "text": "NSFW",
+      "clip": [
+        "67",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Negative Prompt)"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "39",
+        0
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "37": {
+    "inputs": {
+      "unet_name": "qwen_image_fp8_e4m3fn.safetensors",
+      "weight_dtype": "default"
+    },
+    "class_type": "UNETLoader",
+    "_meta": {
+      "title": "Load Diffusion Model"
+    }
+  },
+  "38": {
+    "inputs": {
+      "clip_name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+      "type": "qwen_image",
+      "device": "default"
+    },
+    "class_type": "CLIPLoader",
+    "_meta": {
+      "title": "Load CLIP"
+    }
+  },
+  "39": {
+    "inputs": {
+      "vae_name": "qwen_image_vae.safetensors"
+    },
+    "class_type": "VAELoader",
+    "_meta": {
+      "title": "Load VAE"
+    }
+  },
+  "58": {
+    "inputs": {
+      "width": [
+        "90",
+        0
+      ],
+      "height": [
+        "91",
+        0
+      ],
+      "batch_size": 1
+    },
+    "class_type": "EmptySD3LatentImage",
+    "_meta": {
+      "title": "EmptySD3LatentImage"
+    }
+  },
+  "60": {
+    "inputs": {
+      "filename_prefix": "ComfyUI",
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "67": {
+    "inputs": {
+      "lora_name": "Qwen-Image-Lightning-4steps-V1.0.safetensors",
+      "strength_model": 1.0000000000000002,
+      "strength_clip": 1,
+      "model": [
+        "37",
+        0
+      ],
+      "clip": [
+        "38",
+        0
+      ]
+    },
+    "class_type": "LoraLoader",
+    "_meta": {
+      "title": "Load LoRA"
+    }
+  },
+  "86": {
+    "inputs": {
+      "shift": 3.1000000000000005,
+      "model": [
+        "67",
+        0
+      ]
+    },
+    "class_type": "ModelSamplingAuraFlow",
+    "_meta": {
+      "title": "ModelSamplingAuraFlow"
+    }
+  },
+  "90": {
+    "inputs": {
+      "value": 768
+    },
+    "class_type": "easy int",
+    "_meta": {
+      "title": "$width.value"
+    }
+  },
+  "91": {
+    "inputs": {
+      "value": 1024
+    },
+    "class_type": "easy int",
+    "_meta": {
+      "title": "$height.value"
+    }
+  }
+}