diff --git a/pixelle_video/prompts/asset_script_generation.py b/pixelle_video/prompts/asset_script_generation.py index 0aade20..d9db57a 100644 --- a/pixelle_video/prompts/asset_script_generation.py +++ b/pixelle_video/prompts/asset_script_generation.py @@ -17,37 +17,38 @@ For generating video scripts based on user-provided assets. """ -ASSET_SCRIPT_GENERATION_PROMPT = """你是一位专业的视频脚本创作者。请基于用户提供的视频意图和可用素材,生成一个 {duration} 秒的视频脚本。 +ASSET_SCRIPT_GENERATION_PROMPT = """You are a professional video script creator. Based on the user's video intent and available assets, generate a {duration}-second video script. Before doing so, you need to detect the user's input language - if it's English, then all copy must be in English. Strictly follow the user's input language type as the standard, ensuring consistent and corresponding copy! -## 需求信息 -{title_section}- 视频意图:{intent} -- 目标时长:{duration} 秒 +## Requirements +{title_section}- Video Intent: {intent} +- Target Duration: {duration} seconds -## 可用素材(请在输出中使用精确路径) +## Available Assets (use exact paths in output) {assets_text} -## 创作指南 -1. 根据目标时长决定需要多少个场景(通常每个场景 5-15 秒) -2. 为每个场景从可用素材中直接分配一个素材 -3. 每个场景可以包含 1-3 句旁白 -4. 尽量使用所有可用素材,但如有需要可以复用素材 -5. 所有场景的总时长应约等于 {duration} 秒 +## Creation Guidelines +1. Strictly output copy according to the user's input language type - if input is English, output must be English, and so on +2. Determine the number of scenes based on target duration (typically 5-15 seconds per scene) +3. Assign one asset from available assets to each scene +4. Each scene can contain 1-3 narration sentences +5. Try to use all available assets, but assets can be reused if needed +6. Total duration of all scenes should approximately equal {duration} seconds {title_instruction} -## 语言一致性要求(非常重要) -- 旁白的语言必须与用户输入的视频意图保持一致 -- 如果视频意图是中文,则旁白必须是中文 -- 如果视频意图是英文,则旁白必须是英文 -- 除非视频意图中明确指定了输出语言,否则严格遵循意图的原始语言 +## Language Consistency Requirements (Strictly Enforce) +- Narration language must match the user's input video intent +- If video intent is in Chinese, narration must be in Chinese +- If video intent is in English, narration must be in English +- Unless the video intent explicitly specifies an output language, strictly follow the original language of the intent -## 输出要求 -为每个场景提供: -- scene_number: 场景编号(从 1 开始) -- asset_path: 从可用素材列表中选择的精确路径 -- narrations: 包含 1-3 句旁白的数组 -- duration: 预估时长(秒) +## Output Requirements +Provide for each scene: +- scene_number: Scene number (starting from 1) +- asset_path: Exact path selected from available assets list +- narrations: Array containing 1-3 narration sentences +- duration: Estimated duration (seconds) -现在请开始生成视频脚本:""" +Now please begin generating the video script:""" def build_asset_script_prompt( @@ -68,8 +69,8 @@ def build_asset_script_prompt( Returns: Formatted prompt """ - title_section = f"- 视频标题:{title}\n" if title else "" - title_instruction = f"6. 旁白内容应与视频标题保持一致:{title}\n" if title else "" + title_section = f"- Video Title: {title}\n" if title else "" + title_instruction = f"6. Narration content should be consistent with the video title: {title}\n" if title else "" return ASSET_SCRIPT_GENERATION_PROMPT.format( duration=duration, diff --git a/pixelle_video/prompts/content_narration.py b/pixelle_video/prompts/content_narration.py index 6271bee..4c078cd 100644 --- a/pixelle_video/prompts/content_narration.py +++ b/pixelle_video/prompts/content_narration.py @@ -17,61 +17,63 @@ For extracting/refining narrations from user-provided content. """ -CONTENT_NARRATION_PROMPT = """# 角色定位 -你是一位专业的内容提炼专家,擅长从用户提供的内容中提取核心要点,并转化成适合短视频的脚本。 +CONTENT_NARRATION_PROMPT = """# Role Definition +Globally, you must strictly output copy in the corresponding language type according to the user's language type. +You are a professional content refinement expert, skilled at extracting core points from user-provided content and transforming them into scripts suitable for short videos. -# 核心任务 -用户会提供一段内容(可能很长,也可能很短),你需要从中提炼出 {n_storyboard} 个视频分镜的旁白(用于TTS生成视频音频)。 +# Core Task +The user will provide content (which may be long or short), and you need to extract narrations for {n_storyboard} video storyboards (for TTS to generate video audio). -# 用户提供的内容 +# User-Provided Content {content} -# 输出要求 +# Output Requirements -## 旁白规范 -- 用途定位:用于TTS生成短视频音频 -- 字数限制:严格控制在{min_words}~{max_words}个字(最低不少于{min_words}字) -- 结尾格式:结尾不要使用标点符号 -- 提炼策略: - * 如果用户内容较长:提取{n_storyboard}个核心要点,去除冗余信息 - * 如果用户内容较短:在保留核心观点的基础上适当扩展,增加例子或解释 - * 如果用户内容刚好:优化表达,使其更适合口播 -- 风格要求:保持用户内容的核心观点,但用更口语化、适合TTS的方式表达 -- 开场建议:第一个分镜可以用提问或场景引入,吸引观众注意 -- 核心内容:中间分镜展开用户内容的核心要点 -- 结尾建议:最后一个分镜给出总结或启发 -- 情绪与语气:温和、真诚、自然,像在跟朋友分享观点 -- 禁止项:不出现网址、表情符号、数字编号、不说空话套话 -- 字数检查:生成后必须自我验证每段不少于{min_words}个字 +## Narration Specifications +- Language consistency requirement: Strictly output copy according to the user's input language type - if input is English, output must be English, and so on +- Purpose: For TTS to generate short video audio +- Word count limit: Strictly control to {min_words}~{max_words} words (minimum not less than {min_words} words) +- Ending format: Do not use punctuation at the end +- Refinement strategy: + * If user content is long: Extract {n_storyboard} core points, remove redundant information + * If user content is short: Appropriately expand while retaining core viewpoints, add examples or explanations + * If user content is just right: Optimize expression to make it more suitable for voice narration +- Style requirement: Maintain the core viewpoint of user content, but express it in a more colloquial way suitable for TTS +- Opening suggestion: The first storyboard can use a question or scene introduction to attract audience attention +- Core content: Middle storyboards expand on the core points of user content +- Ending suggestion: The last storyboard provides a summary or inspiration +- Emotion and tone: Gentle, sincere, natural, like sharing viewpoints with a friend +- Prohibitions: No URLs, emojis, numeric numbering, no empty talk or clichés +- Word count check: After generation, must self-verify that each segment is not less than {min_words} words -## 分镜连贯性要求 -- {n_storyboard} 个分镜应基于用户内容的核心观点展开,形成完整表达 -- 保持逻辑连贯,自然过渡 -- 每个分镜像同一个人在讲述,语气一致 -- 确保提炼的内容忠于用户原意,但更适合短视频呈现 +## Storyboard Coherence Requirements +- {n_storyboard} storyboards should expand based on the core viewpoint of user content, forming a complete expression +- Maintain logical coherence and natural transitions +- Each storyboard should sound like the same person narrating, with consistent tone +- Ensure the refined content is faithful to the user's original meaning, but more suitable for short video presentation -# 输出格式 -严格按照以下JSON格式输出,不要添加任何额外的文字说明: +# Output Format +Strictly output in the following JSON format, do not add any additional text explanations: ```json {{ "narrations": [ - "第一段{min_words}~{max_words}字的旁白", - "第二段{min_words}~{max_words}字的旁白", - "第三段{min_words}~{max_words}字的旁白" + "First {min_words}~{max_words} word narration", + "Second {min_words}~{max_words} word narration", + "Third {min_words}~{max_words} word narration" ] }} ``` -# 重要提醒 -1. 只输出JSON格式内容,不要添加任何解释说明 -2. 确保JSON格式严格正确,可以被程序直接解析 -3. 旁白必须严格控制在{min_words}~{max_words}字之间 -4. 必须输出恰好 {n_storyboard} 个分镜的旁白 -5. 内容要忠于用户原意,但优化为更适合口播的表达 -6. 输出格式为 {{"narrations": [旁白数组]}} 的JSON对象 +# Important Reminders +1. Only output JSON format content, do not add any explanations +2. Ensure JSON format is strictly correct and can be directly parsed by the program +3. Narrations must be strictly controlled between {min_words}~{max_words} words +4. Must output exactly {n_storyboard} storyboard narrations +5. Content must be faithful to the user's original meaning, but optimized for voice narration expression +6. Output format is {{"narrations": [narration array]}} JSON object -现在,请从上述内容中提炼出 {n_storyboard} 个分镜的旁白。只输出JSON,不要其他内容。 +Now, please extract {n_storyboard} storyboard narrations from the above content. Only output JSON, no other content. """ diff --git a/pixelle_video/prompts/image_generation.py b/pixelle_video/prompts/image_generation.py index 9e763d4..0890666 100644 --- a/pixelle_video/prompts/image_generation.py +++ b/pixelle_video/prompts/image_generation.py @@ -25,21 +25,21 @@ from typing import List, Optional IMAGE_STYLE_PRESETS = { "stick_figure": { - "name": "火柴人简笔画", + "name": "Stick Figure Sketch", "description": "stick figure style sketch, black and white lines, pure white background, minimalist hand-drawn feel", - "use_case": "通用场景,简单直观" + "use_case": "General scenes, simple and intuitive" }, "minimal": { - "name": "极简抽象", + "name": "Minimalist Abstract", "description": "minimalist abstract art, geometric shapes, clean composition, modern design, soft pastel colors", - "use_case": "现代感、艺术感" + "use_case": "Modern, artistic feel" }, "concept": { - "name": "概念化视觉", + "name": "Conceptual Visual", "description": "conceptual visual metaphors, symbolic elements, thought-provoking imagery, artistic interpretation", - "use_case": "深度内容、哲学思考" + "use_case": "Deep content, philosophical thinking" }, } @@ -47,52 +47,52 @@ IMAGE_STYLE_PRESETS = { DEFAULT_IMAGE_STYLE = "stick_figure" -IMAGE_PROMPT_GENERATION_PROMPT = """# 角色定位 -你是一个专业的视觉创意设计师,擅长为视频脚本创作富有表现力和象征性的图像提示词,将抽象概念转化为具象的视觉画面。 +IMAGE_PROMPT_GENERATION_PROMPT = """# Role Definition +You are a professional visual creative designer, skilled at creating expressive and symbolic image prompts for video scripts, transforming abstract concepts into concrete visual scenes. -# 核心任务 -基于已有的视频脚本,为每个分镜的"旁白内容"创作对应的**英文**图像提示词,确保视觉画面与叙述内容完美配合,增强观众的理解和记忆。 +# Core Task +Based on the existing video script, create corresponding **English** image prompts for each storyboard's "narration content", ensuring visual scenes perfectly match the narrative content and enhance audience understanding and memory. -**重要:输入包含 {narrations_count} 个旁白,你必须为每个旁白都生成一个对应的图像提示词,总共输出 {narrations_count} 个图像提示词。** +**Important: The input contains {narrations_count} narrations. You must generate one corresponding image prompt for each narration, totaling {narrations_count} image prompts.** -# 输入内容 +# Input Content {narrations_json} -# 输出要求 +# Output Requirements -## 图像提示词规范 -- 语言:**必须使用英文**(用于 AI 图像生成模型) -- 描述结构:scene + character action + emotion + symbolic elements -- 描述长度:确保描述清晰完整且富有创意(建议 50-100 个英文单词) +## Image Prompt Specifications +- Language: **Must use English** (for AI image generation models) +- Description structure: scene + character action + emotion + symbolic elements +- Description length: Ensure clear, complete, and creative descriptions (recommended 50-100 English words) -## 视觉创意要求 -- 每个图像都要准确反映对应旁白的具体内容和情感 -- 使用象征手法将抽象概念视觉化(如用路径代表人生选择,用锁链代表束缚等) -- 画面要表现出丰富的情感和动作,增强视觉冲击力 -- 通过构图和元素安排突出主题,避免过于直白的表现方式 +## Visual Creative Requirements +- Each image must accurately reflect the specific content and emotion of the corresponding narration +- Use symbolic techniques to visualize abstract concepts (e.g., use paths to represent life choices, chains to represent constraints, etc.) +- Scenes should express rich emotions and actions to enhance visual impact +- Highlight themes through composition and element arrangement, avoid overly literal representations -## 关键英文词汇参考 -- 象征元素:symbolic elements -- 表情:expression / facial expression -- 动作:action / gesture / movement -- 场景:scene / setting -- 氛围:atmosphere / mood +## Key English Vocabulary Reference +- Symbolic elements: symbolic elements +- Expression: expression / facial expression +- Action: action / gesture / movement +- Scene: scene / setting +- Atmosphere: atmosphere / mood -## 视觉与文案配合原则 -- 图像要服务于文案,成为文案内容的视觉延伸 -- 避免与文案内容无关或矛盾的视觉元素 -- 选择最能增强文案说服力的视觉表现方式 -- 确保观众能通过图像快速理解文案的核心观点 +## Visual and Copy Coordination Principles +- Images should serve the copy, becoming a visual extension of the copy content +- Avoid visual elements unrelated to or contradicting the copy content +- Choose visual presentation methods that best enhance the persuasiveness of the copy +- Ensure the audience can quickly understand the core viewpoint of the copy through images -## 创意指导 -1. **现象描述类文案**:用直观的场景表现社会现象 -2. **原因分析类文案**:用因果关系的视觉比喻表现内在逻辑 -3. **影响论证类文案**:用后果场景或对比手法表现影响程度 -4. **深入探讨类文案**:用抽象概念的具象化表现深刻思考 -5. **结论启发类文案**:用开放式场景或指引性元素表现启发性 +## Creative Guidance +1. **Phenomenon Description Copy**: Use intuitive scenes to represent social phenomena +2. **Cause Analysis Copy**: Use visual metaphors of cause-and-effect relationships to represent internal logic +3. **Impact Argumentation Copy**: Use consequence scenes or contrast techniques to represent the degree of impact +4. **In-depth Discussion Copy**: Use concretization of abstract concepts to represent deep thinking +5. **Conclusion Inspiration Copy**: Use open-ended scenes or guiding elements to represent inspiration -# 输出格式 -严格按照以下JSON格式输出,**图像提示词必须是英文**: +# Output Format +Strictly output in the following JSON format, **image prompts must be in English**: ```json {{ @@ -103,17 +103,17 @@ IMAGE_PROMPT_GENERATION_PROMPT = """# 角色定位 }} ``` -# 重要提醒 -1. 只输出JSON格式内容,不要添加任何解释说明 -2. 确保JSON格式严格正确,可以被程序直接解析 -3. 输入是 {{"narrations": [旁白数组]}} 格式,输出是 {{"image_prompts": [图像提示词数组]}} 格式 -4. **输出的image_prompts数组必须恰好包含 {narrations_count} 个元素,与输入的narrations数组一一对应** -5. **图像提示词必须使用英文**(for AI image generation models) -6. 图像提示词必须准确反映对应旁白的具体内容和情感 -7. 每个图像都要有创意性和视觉冲击力,避免千篇一律 -8. 确保视觉画面能增强文案的说服力和观众的理解度 +# Important Reminders +1. Only output JSON format content, do not add any explanations +2. Ensure JSON format is strictly correct and can be directly parsed by the program +3. Input is {{"narrations": [narration array]}} format, output is {{"image_prompts": [image prompt array]}} format +4. **The output image_prompts array must contain exactly {narrations_count} elements, corresponding one-to-one with the input narrations array** +5. **Image prompts must use English** (for AI image generation models) +6. Image prompts must accurately reflect the specific content and emotion of the corresponding narration +7. Each image must be creative and visually impactful, avoid being monotonous +8. Ensure visual scenes can enhance the persuasiveness of the copy and audience understanding -现在,请为上述 {narrations_count} 个旁白创作对应的 {narrations_count} 个**英文**图像提示词。只输出JSON,不要其他内容。 +Now, please create {narrations_count} corresponding **English** image prompts for the above {narrations_count} narrations. Only output JSON, no other content. """ diff --git a/pixelle_video/prompts/title_generation.py b/pixelle_video/prompts/title_generation.py index 751681d..839c38f 100644 --- a/pixelle_video/prompts/title_generation.py +++ b/pixelle_video/prompts/title_generation.py @@ -17,35 +17,69 @@ For generating video title from content. """ -TITLE_GENERATION_PROMPT = """Please generate a short, attractive title (within 10 characters) for the following content. +TITLE_GENERATION_PROMPT = """Please generate a short, attractive title for the following content. Content: {content} Requirements: -1. Brief and concise, within 10 characters -2. Accurately summarize the core content -3. Attractive, suitable as a video title -4. Output only the title text, no other content +1. **Language Consistency (CRITICAL)**: The title MUST be in the same language as the input content + - If the input content is in English, the title MUST be in English + - If the input content is in Chinese, the title MUST be in Chinese + - Strictly follow the language of the input content + +2. **Character Limit (CRITICAL)**: The title MUST NOT exceed {max_length} characters + - Count every character including spaces + - The title must be complete and meaningful within this limit + - Do NOT generate a title that would need to be cut off + +3. **Core Message (CRITICAL)**: The title MUST capture the MAIN POINT of the content + - Identify the central theme or key message + - Don't focus on just one aspect if the content has multiple important points + - Ensure the title accurately represents what the content is about + +4. **No Punctuation at End**: Do NOT include any punctuation marks at the end of the title + - No period (.), comma (,), exclamation mark (!), question mark (?), etc. + - The title should end with a word or number, not punctuation + +5. **Completeness**: Ensure the title is a complete, meaningful phrase + - Do not cut off in the middle of a word or number + - Do not create incomplete phrases like "Rise Early for" or "How to Make" + - Use abbreviations or shorter words if needed to fit the limit + +6. **Abbreviation Examples** (use when needed to fit character limit): + - For English: + * "10,000" → "10K" + * "per month" → "monthly" or "a month" + * "early to bed and early to rise" → "Sleep Early" or "Early Habits" + * "makes you healthy" → "for Health" or "Stay Healthy" + - For Chinese: + * "10,000元" → "万元" or "1万" + * "每个月" → "月入" or "月收" + +7. Accurately summarize the core content +8. Attractive and engaging, suitable as a video title +9. Output only the title text, no quotes, no explanations Title:""" -def build_title_generation_prompt(content: str, max_length: int = 500) -> str: +def build_title_generation_prompt(content: str, max_length: int = 15) -> str: """ Build title generation prompt Args: content: Content to generate title from - max_length: Maximum content length to use (default 500 chars) + max_length: Maximum title length in characters (default: 15) Returns: - Formatted prompt + Formatted prompt with character limit """ - # Take first max_length chars to avoid overly long prompts - content_preview = content[:max_length] + # Take first 500 chars to avoid overly long prompts + content_preview = content[:500] return TITLE_GENERATION_PROMPT.format( - content=content_preview + content=content_preview, + max_length=max_length ) diff --git a/pixelle_video/prompts/topic_narration.py b/pixelle_video/prompts/topic_narration.py index 9e23663..a6992fb 100644 --- a/pixelle_video/prompts/topic_narration.py +++ b/pixelle_video/prompts/topic_narration.py @@ -17,110 +17,117 @@ For generating narrations from a topic/theme. """ -TOPIC_NARRATION_PROMPT = """# 角色定位 -你是一位专业的内容创作专家,擅长将话题扩展成引人入胜的短视频脚本,用深入浅出的方式讲解观点,帮助观众理解复杂概念。 +TOPIC_NARRATION_PROMPT = """# Role Definition +You are a professional content creation expert, skilled at expanding topics into engaging short video scripts, explaining viewpoints in an accessible way to help audiences understand complex concepts. +Globally, you must strictly output copy in the corresponding language type according to the user's language type. -# 核心任务 -用户会输入一个话题或主题,你需要为这个话题或主题进行创作 {n_storyboard} 个视频分镜,每个分镜包含"旁白(用于TTS生成视频讲解音频)",像在跟朋友聊天一样,自然、有价值、引发共鸣。 +# Core Task +The user will input a topic or theme. You need to create {n_storyboard} video storyboards for this topic or theme. Each storyboard contains "narration (for TTS to generate video explanation audio)", naturally and valuably, like chatting with a friend, to resonate with the audience. +- Language consistency requirement: Strictly output copy according to the user's input language type - if input is English, output must be English, and so on -# 输入话题 +# Input Topic {topic} -# 输出要求 +# Output Requirements -## 旁白规范 -- 输出语言要求:严格按照用户输入的话题或主题的语种输出,如:用户输入的是英文,则输出的文案必须为英文,中文也是一样。 -- 用途定位:用于TTS生成短视频音频,通俗易懂地讲解话题 -- 字数限制:严格控制在{min_words}~{max_words}个字(最低不少于{min_words}字) -- 结尾格式:每段旁白的结尾不要使用标点符号,若旁白中出现断句读法必须使用中文标点(,。?!……:“”)来表达语气和停顿,自动判断并插入合适的标点符号,保留自然口语节奏(比如“对吗?不对。”要有停顿和语气转折) -- 内容要求:围绕话题展开,每个分镜传递一个有价值的观点或洞察 -- 风格要求:像跟朋友聊天一样,通俗、真诚、有启发性,避免学术化和生硬的表达,拒绝套路化和模板化的表达 -- 情绪与语气:温和、真诚、有热情,像一个有见解的朋友在分享思考 -- 可适当引用权威内容,不强制每次输出都要有引用出现,根据用户传入的标题或内容参考判断是否需要有相关引用: - 若为科学/健康类,可引用《自然》《柳叶刀》、哈佛研究、神经科学发现等; - 若为心理/哲学类,可引用荣格、尼采、庄子、曾仕强、卡巴金等人的观点或语录; - 若为国学/佛道类,可引用《道德经》《金刚经》《黄帝内经》等经典原文或释义; - 若为文学/历史类,可引用鲁迅、苏轼、《史记》、《人类简史》等; - 若为时尚/生活方式类,可引用色彩心理学、形象管理理论、行为经济学等。 - 根据上述举例,若有其他类型的方向和赛道也可检索引用相关书籍,但也要遵循不强制引用的要求。 +## Narration Specifications +- Output language requirement: Strictly output according to the language of the user's input topic or theme. For example: if the user's input is in English, the output copy must be in English, same for Chinese. +- Purpose: For TTS to generate short video audio, explaining topics in an accessible way +- Word count limit: Strictly control to {min_words}~{max_words} words (minimum not less than {min_words} words) +- Ending format: Do not use punctuation at the end of each narration. If there are sentence breaks in the narration, Chinese punctuation (,。?!……:"") must be used to express tone and pauses. Automatically determine and insert appropriate punctuation to maintain natural spoken rhythm (e.g., "Right? Wrong." should have pauses and tonal shifts) +- Content requirement: Expand around the topic, each storyboard conveys a valuable viewpoint or insight +- Style requirement: Like chatting with a friend, accessible, sincere, inspiring, avoid academic and stiff expressions, reject formulaic and template expressions +- Emotion and tone: Gentle, sincere, enthusiastic, like a friend with insights sharing thoughts +- Can appropriately cite authoritative content, not mandatory for every output, determine based on the user's input title or content reference whether relevant citations are needed: + For science/health topics, can cite Nature, The Lancet, Harvard research, neuroscience findings, etc.; + For psychology/philosophy topics, can cite viewpoints or quotes from Jung, Nietzsche, Zhuangzi, Zeng Shiqiang, Kabat-Zinn, etc.; + For Chinese studies/Buddhism/Taoism topics, can cite original texts or interpretations from Tao Te Ching, Diamond Sutra, Yellow Emperor's Inner Canon, etc.; + For literature/history topics, can cite Lu Xun, Su Shi, Records of the Grand Historian, Sapiens, etc.; + For fashion/lifestyle topics, can cite color psychology, image management theory, behavioral economics, etc. + Based on the above examples, if there are other types of directions and tracks, relevant books can also be searched and cited, but must also follow the non-mandatory citation requirement. - 若有引用需自然融入,不生硬堆砌,不虚构出处。 + If there are citations, integrate them naturally, do not pile them up stiffly, do not fabricate sources. -## 开头多样性要求(最重要) -【核心原则】每个分镜的开头必须根据内容本身自然表达,拒绝任何形式的固定套路和模板化表达。 +## Opening Diversity Requirements (Most Important) +[Core Principle] The opening of each storyboard must be expressed naturally based on the content itself, rejecting any form of fixed routines and template expressions. -【表达方式灵活性】 -根据话题内容,可以采用陈述、场景、感叹、观点、问句、对比、故事等多种表达方式,但务必做到: -- 每个分镜根据要表达的具体内容选择最自然的开头 -- 绝不形成任何规律性的句式模式 -- 不要让任何一个词或短语成为"习惯性开头" +[Expression Flexibility] +Based on the topic content, various expression methods such as statements, scenes, exclamations, viewpoints, questions, contrasts, stories, etc. can be used, but must achieve: +- Each storyboard chooses the most natural opening based on the specific content to be expressed +- Never form any regular sentence pattern +- Do not let any word or phrase become a "habitual opening" -【严禁固定模式】 -❌ 绝对禁止以下行为: -- 形成"第N句总用X开头"的任何规律 -- 多次重复使用同一个连接词或句式作为开头 -- 按照某种隐藏的模板顺序来组织分镜 +[Strictly Prohibit Fixed Patterns] +❌ Absolutely prohibit the following behaviors: +- Forming any pattern of "the Nth sentence always starts with X" +- Repeatedly using the same conjunction or sentence pattern as an opening +- Organizing storyboards according to some hidden template order -【特别强调】 -- 第一个分镜的开头要完全根据话题内容自然选择,不要有任何固定词汇倾向 -- 整组旁白中,如果某个词(如"有时候"、"其实"、"你有没有")出现超过1次作为开头,就是失败的创作 -- 要像真人说话一样自然流畅,而不是套用任何句式模板 +[Special Emphasis] +## Language Consistency Requirements (Strictly Enforce) +- Narration language must match the user's input video intent +- If video intent is in Chinese, narration must be in Chinese +- If video intent is in English, narration must be in English +- Unless the video intent explicitly specifies an output language, strictly follow the original language of the intent +- The opening of the first storyboard should be completely naturally chosen based on the topic content, without any fixed vocabulary tendency +- In the entire set of narrations, if any word (such as "sometimes", "actually", "have you ever") appears more than once as an opening, it is a failed creation +- Should be as natural and fluent as a real person speaking, not applying any sentence pattern template -## 自然表达要求 -- 内容应该像真人在自然交流,而不是按照模板填空 -- 每个分镜的开头要根据内容本身选择最合适的表达方式 -- 同一个词作为开头在整个旁白中最多只能出现1次 -- 优先用观点、场景、故事来串联内容,避免依赖连接词开头 +## Natural Expression Requirements +- Content should be like real people communicating naturally, not filling in templates +- The opening of each storyboard should choose the most appropriate expression method based on the content itself +- The same word can appear as an opening at most once in the entire narration +- Prioritize using viewpoints, scenes, stories to connect content, avoid relying on conjunctions as openings -## 内容结构建议 -- 开场方式:可以用场景、故事、观点、现象等多种方式引入,不固定套路 -- 核心内容:中间分镜展开核心观点,用生活化的例子帮助理解 -- 结尾方式:最后分镜给出行动建议或启发,让观众有收获感 -- 整体逻辑:遵循"引发共鸣 → 提出观点 → 深入讲解 → 给出启发"的叙述逻辑 +## Content Structure Suggestions +- Opening method: Can use scenes, stories, viewpoints, phenomena, and other methods to introduce, no fixed routine +- Core content: Middle storyboards expand core viewpoints, use life examples to help understanding +- Ending method: Last storyboard provides action suggestions or inspiration, giving the audience a sense of gain +- Overall logic: Follow the narrative logic of "resonate → propose viewpoint → in-depth explanation → provide inspiration" -## 其他规范 -- 禁止项:不出现网址、表情符号、数字编号、不说空话套话、不过度煽情 -- 字数检查:生成后必须自我验证不少于{min_words}个字,如不足则补充具体观点或例子 +## Other Specifications +- Prohibitions: No URLs, emojis, numeric numbering, no empty talk or clichés, no excessive sentimentality +- Word count check: After generation, must self-verify not less than {min_words} words. If insufficient, supplement with specific viewpoints or examples -## 分镜连贯性要求 -- {n_storyboard} 个分镜应围绕话题展开,形成完整的观点表达 -- 遵循"吸引注意 → 提出观点 → 深入讲解 → 给出启发"的叙述逻辑 -- 每个分镜像同一个人在连贯分享观点,语气一致、自然流畅 -- 通过观点的递进自然过渡,形成完整的论述脉络 -- 确保内容有价值、有启发,让观众觉得"这个视频值得看" +## Storyboard Coherence Requirements +- {n_storyboard} storyboards should expand around the topic, forming a complete viewpoint expression +- Follow the narrative logic of "attract attention → propose viewpoint → in-depth explanation → provide inspiration" +- Each storyboard should sound like the same person continuously sharing viewpoints, with consistent and natural tone +- Naturally transition through the progression of viewpoints, forming a complete argumentative thread +- Ensure content is valuable and inspiring, making the audience feel "this video is worth watching" -# 输出格式 -严格按照以下JSON格式输出,不要添加任何额外的文字说明: +# Output Format +Strictly output in the following JSON format, do not add any additional text explanations: ```json {{ "narrations": [ - "第一段旁白内容", - "第二段旁白内容", - "第三段旁白内容" + "First narration content", + "Second narration content", + "Third narration content" ] }} ``` -# 重要提醒 -1. 只输出JSON格式内容,不要添加任何解释说明 -2. 确保JSON格式严格正确,可以被程序直接解析 -3. 旁白必须严格控制在{min_words}~{max_words}字之间,用通俗易懂的语言 -4. {n_storyboard} 个分镜要围绕话题展开,形成完整的观点表达 -5. 每个分镜都要有价值,提供洞察,避免空洞的陈述 -6. 输出格式为 {{"narrations": [旁白数组]}} 的JSON对象 +# Important Reminders +1. Only output JSON format content, do not add any explanations +2. Ensure JSON format is strictly correct and can be directly parsed by the program +3. Narrations must be strictly controlled between {min_words}~{max_words} words, using accessible language +4. {n_storyboard} storyboards should expand around the topic, forming a complete viewpoint expression +5. Each storyboard must be valuable, providing insights, avoiding empty statements +6. Output format is {{"narrations": [narration array]}} JSON object -【多样性核心要求 - 必须严格执行】 -7. 第一句旁白不要固定用某个词开头,每次创作都要根据话题内容自然选择不同的开头 -8. 同一个词(如"有时候"、"你有没有"、"其实"、"想象一下"等)在所有旁白中作为开头最多只能出现1次 -9. 不要形成任何隐藏的句式规律,每个分镜的开头要真正做到独立思考、自然表达 -10. 检查你的输出:如果发现有任何词作为开头重复出现2次或以上,必须修改 -11. 输出语言要求:严格按照用户输入的话题或主题的语种输出,如:用户输入的是英文,则输出的文案必须为英文,中文也是一样。 +[Diversity Core Requirements - Must Strictly Execute] +7. The first narration should not use a fixed word as an opening. Each creation should naturally choose different openings based on the topic content +8. The same word (such as "sometimes", "have you ever", "actually", "imagine") can appear as an opening at most once in all narrations +9. Do not form any hidden sentence pattern rules. The opening of each storyboard should truly be independently thought out and naturally expressed +10. Check your output: if any word appears as an opening 2 or more times, it must be modified +11. Output language requirement: Strictly output according to the language of the user's input topic or theme. For example: if the user's input is in English, the output copy must be in English, same for Chinese. -现在,请为话题创作 {n_storyboard} 个分镜的旁白。 -⚠️ 特别注意:写完后自查所有分镜的开头,确保没有重复使用同一个词或短语作为开头。 -只输出JSON,不要其他内容。 +Now, please create narrations for {n_storyboard} storyboards for the topic. +⚠️ Special note: After writing, self-check the openings of all storyboards to ensure no repeated use of the same word or phrase as an opening. +Only output JSON, no other content. """ diff --git a/pixelle_video/prompts/video_generation.py b/pixelle_video/prompts/video_generation.py index f795012..3b42e0a 100644 --- a/pixelle_video/prompts/video_generation.py +++ b/pixelle_video/prompts/video_generation.py @@ -20,60 +20,60 @@ import json from typing import List -VIDEO_PROMPT_GENERATION_PROMPT = """# 角色定位 -你是一个专业的视频创意设计师,擅长为视频脚本创作富有动感和表现力的视频生成提示词,将叙述内容转化为生动的视频画面。 +VIDEO_PROMPT_GENERATION_PROMPT = """# Role Definition +You are a professional video creative designer, skilled at creating dynamic and expressive video generation prompts for video scripts, transforming narrative content into vivid video scenes. -# 核心任务 -基于已有的视频脚本,为每个分镜的"旁白内容"创作对应的**英文**视频生成提示词,确保视频画面与叙述内容完美配合,通过动态画面增强观众的理解和记忆。 +# Core Task +Based on the existing video script, create corresponding **English** video generation prompts for each storyboard's "narration content", ensuring video scenes perfectly match the narrative content and enhance audience understanding and memory through dynamic visuals. -**重要:输入包含 {narrations_count} 个旁白,你必须为每个旁白都生成一个对应的视频提示词,总共输出 {narrations_count} 个视频提示词。** +**Important: The input contains {narrations_count} narrations. You must generate one corresponding video prompt for each narration, totaling {narrations_count} video prompts.** -# 输入内容 +# Input Content {narrations_json} -# 输出要求 +# Output Requirements -## 视频提示词规范 -- 语言:**必须使用英文**(用于 AI 视频生成模型) -- 描述结构:scene + character action + camera movement + emotion + atmosphere -- 描述长度:确保描述清晰完整且富有创意(建议 50-100 个英文单词) -- 动态元素:强调动作、运动、变化等动态效果 +## Video Prompt Specifications +- Language: **Must use English** (for AI video generation models) +- Description structure: scene + character action + camera movement + emotion + atmosphere +- Description length: Ensure clear, complete, and creative descriptions (recommended 50-100 English words) +- Dynamic elements: Emphasize actions, movements, changes, and other dynamic effects -## 视觉创意要求 -- 每个视频都要准确反映对应旁白的具体内容和情感 -- 突出画面的动态性:角色动作、物体运动、镜头移动、场景转换等 -- 使用象征手法将抽象概念视觉化(如用流动的水代表时间流逝,用上升的阶梯代表进步等) -- 画面要表现出丰富的情感和动作,增强视觉冲击力 -- 通过镜头语言(推拉摇移)和剪辑节奏增强表现力 +## Visual Creative Requirements +- Each video must accurately reflect the specific content and emotion of the corresponding narration +- Highlight visual dynamics: character actions, object movements, camera movements, scene transitions, etc. +- Use symbolic techniques to visualize abstract concepts (e.g., use flowing water to represent the passage of time, rising stairs to represent progress, etc.) +- Scenes should express rich emotions and actions to enhance visual impact +- Enhance expressiveness through camera language (push, pull, pan, tilt) and editing rhythm -## 关键英文词汇参考 -- 动作:moving, running, flowing, transforming, growing, falling -- 镜头:camera pan, zoom in, zoom out, tracking shot, aerial view -- 转场:transition, fade in, fade out, dissolve -- 氛围:dynamic, energetic, peaceful, dramatic, mysterious -- 光影:lighting changes, shadows moving, sunlight streaming +## Key English Vocabulary Reference +- Actions: moving, running, flowing, transforming, growing, falling +- Camera: camera pan, zoom in, zoom out, tracking shot, aerial view +- Transitions: transition, fade in, fade out, dissolve +- Atmosphere: dynamic, energetic, peaceful, dramatic, mysterious +- Lighting: lighting changes, shadows moving, sunlight streaming -## 视频与文案配合原则 -- 视频要服务于文案,成为文案内容的视觉延伸 -- 避免与文案内容无关或矛盾的视觉元素 -- 选择最能增强文案说服力的动态表现方式 -- 确保观众能通过视频动态快速理解文案的核心观点 +## Video and Copy Coordination Principles +- Videos should serve the copy, becoming a visual extension of the copy content +- Avoid visual elements unrelated to or contradicting the copy content +- Choose dynamic presentation methods that best enhance the persuasiveness of the copy +- Ensure the audience can quickly understand the core viewpoint of the copy through video dynamics -## 创意指导 -1. **现象描述类文案**:用动态场景表现社会现象的发生过程 -2. **原因分析类文案**:用因果关系的动态演变表现内在逻辑 -3. **影响论证类文案**:用后果场景的动态展开或对比表现影响程度 -4. **深入探讨类文案**:用抽象概念的动态具象化表现深刻思考 -5. **结论启发类文案**:用开放式动态场景或指引性运动表现启发性 +## Creative Guidance +1. **Phenomenon Description Copy**: Use dynamic scenes to represent the occurrence process of social phenomena +2. **Cause Analysis Copy**: Use dynamic evolution of cause-and-effect relationships to represent internal logic +3. **Impact Argumentation Copy**: Use dynamic unfolding of consequence scenes or contrasts to represent the degree of impact +4. **In-depth Discussion Copy**: Use dynamic concretization of abstract concepts to represent deep thinking +5. **Conclusion Inspiration Copy**: Use open-ended dynamic scenes or guiding movements to represent inspiration -## 视频特有注意事项 -- 强调动态:每个视频都应该包含明显的动作或运动 -- 镜头语言:适当使用推拉摇移等镜头技巧增强表现力 -- 时长考虑:视频应该是连贯的动态过程,不是静态画面 -- 流畅性:注意动作的流畅性和自然性 +## Video-Specific Considerations +- Emphasize dynamics: Each video should include obvious actions or movements +- Camera language: Appropriately use camera techniques such as push, pull, pan, tilt to enhance expressiveness +- Duration consideration: Videos should be a coherent dynamic process, not static images +- Fluidity: Pay attention to the fluidity and naturalness of actions -# 输出格式 -严格按照以下JSON格式输出,**视频提示词必须是英文**: +# Output Format +Strictly output in the following JSON format, **video prompts must be in English**: ```json {{ @@ -84,18 +84,18 @@ VIDEO_PROMPT_GENERATION_PROMPT = """# 角色定位 }} ``` -# 重要提醒 -1. 只输出JSON格式内容,不要添加任何解释说明 -2. 确保JSON格式严格正确,可以被程序直接解析 -3. 输入是 {{"narrations": [旁白数组]}} 格式,输出是 {{"video_prompts": [视频提示词数组]}} 格式 -4. **输出的video_prompts数组必须恰好包含 {narrations_count} 个元素,与输入的narrations数组一一对应** -5. **视频提示词必须使用英文**(for AI video generation models) -6. 视频提示词必须准确反映对应旁白的具体内容和情感 -7. 每个视频都要强调动态性和运动感,避免静态描述 -8. 适当使用镜头语言增强表现力 -9. 确保视频画面能增强文案的说服力和观众的理解度 +# Important Reminders +1. Only output JSON format content, do not add any explanations +2. Ensure JSON format is strictly correct and can be directly parsed by the program +3. Input is {{"narrations": [narration array]}} format, output is {{"video_prompts": [video prompt array]}} format +4. **The output video_prompts array must contain exactly {narrations_count} elements, corresponding one-to-one with the input narrations array** +5. **Video prompts must use English** (for AI video generation models) +6. Video prompts must accurately reflect the specific content and emotion of the corresponding narration +7. Each video must emphasize dynamics and sense of movement, avoid static descriptions +8. Appropriately use camera language to enhance expressiveness +9. Ensure video scenes can enhance the persuasiveness of the copy and audience understanding -现在,请为上述 {narrations_count} 个旁白创作对应的 {narrations_count} 个**英文**视频提示词。只输出JSON,不要其他内容。 +Now, please create {narrations_count} corresponding **English** video prompts for the above {narrations_count} narrations. Only output JSON, no other content. """ diff --git a/pixelle_video/utils/content_generators.py b/pixelle_video/utils/content_generators.py index 21b4d59..02c1471 100644 --- a/pixelle_video/utils/content_generators.py +++ b/pixelle_video/utils/content_generators.py @@ -57,7 +57,8 @@ async def generate_title( # Use LLM to generate title from pixelle_video.prompts import build_title_generation_prompt - prompt = build_title_generation_prompt(content, max_length=500) + # Pass max_length to prompt so LLM knows the character limit + prompt = build_title_generation_prompt(content, max_length=max_length) response = await llm_service(prompt, temperature=0.7, max_tokens=50) # Clean up response @@ -69,9 +70,23 @@ async def generate_title( if title.startswith("'") and title.endswith("'"): title = title[1:-1] - # Limit to max_length (safety) + # Remove trailing punctuation + title = title.rstrip('.,!?;:\'"') + + # Safety: if still over limit, truncate smartly if len(title) > max_length: - title = title[:max_length] + # Try to truncate at word boundary + truncated = title[:max_length] + last_space = truncated.rfind(' ') + + # Only use word boundary if it's not too far back (at least 60% of max_length) + if last_space > max_length * 0.6: + title = truncated[:last_space] + else: + title = truncated + + # Remove any trailing punctuation after truncation + title = title.rstrip('.,!?;:\'"') logger.debug(f"Generated title: '{title}' (length: {len(title)})") return title diff --git a/workflows/runninghub/image_qwen_chinese_cartoon.json b/workflows/runninghub/image_qwen_chinese_cartoon.json new file mode 100644 index 0000000..299ad8a --- /dev/null +++ b/workflows/runninghub/image_qwen_chinese_cartoon.json @@ -0,0 +1,5 @@ +{ + "source": "runninghub", + "workflow_id": "1988434426705133569" +} + diff --git a/workflows/runninghub/video_Z_image_wan2.2.json b/workflows/runninghub/video_Z_image_wan2.2.json new file mode 100644 index 0000000..941ed97 --- /dev/null +++ b/workflows/runninghub/video_Z_image_wan2.2.json @@ -0,0 +1,5 @@ +{ + "source": "runninghub", + "workflow_id": "1993931250872369154" +} + diff --git a/workflows/runninghub/video_qwen_wan2.2.json b/workflows/runninghub/video_qwen_wan2.2.json new file mode 100644 index 0000000..8dff197 --- /dev/null +++ b/workflows/runninghub/video_qwen_wan2.2.json @@ -0,0 +1,5 @@ +{ + "source": "runninghub", + "workflow_id": "1993608528969531394" +} + diff --git a/workflows/selfhost/image_qwen.json b/workflows/selfhost/image_qwen.json new file mode 100644 index 0000000..378a86c --- /dev/null +++ b/workflows/selfhost/image_qwen.json @@ -0,0 +1,184 @@ +{ + "3": { + "inputs": { + "seed": 388600705609480, + "steps": 4, + "cfg": 1, + "sampler_name": "euler", + "scheduler": "beta", + "denoise": 1, + "model": [ + "86", + 0 + ], + "positive": [ + "6", + 0 + ], + "negative": [ + "7", + 0 + ], + "latent_image": [ + "58", + 0 + ] + }, + "class_type": "KSampler", + "_meta": { + "title": "KSampler" + } + }, + "6": { + "inputs": { + "text": "", + "clip": [ + "67", + 1 + ] + }, + "class_type": "CLIPTextEncode", + "_meta": { + "title": "$prompt.text" + } + }, + "7": { + "inputs": { + "text": "NSFW", + "clip": [ + "67", + 1 + ] + }, + "class_type": "CLIPTextEncode", + "_meta": { + "title": "CLIP Text Encode (Negative Prompt)" + } + }, + "8": { + "inputs": { + "samples": [ + "3", + 0 + ], + "vae": [ + "39", + 0 + ] + }, + "class_type": "VAEDecode", + "_meta": { + "title": "VAE Decode" + } + }, + "37": { + "inputs": { + "unet_name": "qwen_image_fp8_e4m3fn.safetensors", + "weight_dtype": "default" + }, + "class_type": "UNETLoader", + "_meta": { + "title": "Load Diffusion Model" + } + }, + "38": { + "inputs": { + "clip_name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "type": "qwen_image", + "device": "default" + }, + "class_type": "CLIPLoader", + "_meta": { + "title": "Load CLIP" + } + }, + "39": { + "inputs": { + "vae_name": "qwen_image_vae.safetensors" + }, + "class_type": "VAELoader", + "_meta": { + "title": "Load VAE" + } + }, + "58": { + "inputs": { + "width": [ + "90", + 0 + ], + "height": [ + "91", + 0 + ], + "batch_size": 1 + }, + "class_type": "EmptySD3LatentImage", + "_meta": { + "title": "EmptySD3LatentImage" + } + }, + "60": { + "inputs": { + "filename_prefix": "ComfyUI", + "images": [ + "8", + 0 + ] + }, + "class_type": "SaveImage", + "_meta": { + "title": "Save Image" + } + }, + "67": { + "inputs": { + "lora_name": "Qwen-Image-Lightning-4steps-V1.0.safetensors", + "strength_model": 1.0000000000000002, + "strength_clip": 1, + "model": [ + "37", + 0 + ], + "clip": [ + "38", + 0 + ] + }, + "class_type": "LoraLoader", + "_meta": { + "title": "Load LoRA" + } + }, + "86": { + "inputs": { + "shift": 3.1000000000000005, + "model": [ + "67", + 0 + ] + }, + "class_type": "ModelSamplingAuraFlow", + "_meta": { + "title": "ModelSamplingAuraFlow" + } + }, + "90": { + "inputs": { + "value": 768 + }, + "class_type": "easy int", + "_meta": { + "title": "$width.value" + } + }, + "91": { + "inputs": { + "value": 1024 + }, + "class_type": "easy int", + "_meta": { + "title": "$height.value" + } + } +} \ No newline at end of file