Files
AI-Video/pixelle_video/prompts/paragraph_merging.py
empty 3d3aba3670
Some checks failed
Deploy Documentation / deploy (push) Has been cancelled
feat: Add smart paragraph merging mode with AI grouping
- Add "smart" split mode that uses LLM to intelligently merge related paragraphs
- Implement two-step approach: analyze text structure, then group by semantic relevance
- Add paragraph_merging.py with analysis and grouping prompts
- Update UI to support smart mode selection with auto-detect hint
- Add i18n translations for smart mode (en_US, zh_CN)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-17 00:19:46 +08:00

203 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Paragraph merging prompt
For intelligently merging short paragraphs into longer segments suitable for video storyboards.
Uses a two-step approach: first analyze, then group.
"""
import json
from typing import List
# Step 1: Analyze text and recommend segment count
PARAGRAPH_ANALYSIS_PROMPT = """# 任务定义
你是一个专业的视频分镜规划师。请分析以下文本,推荐最佳分镜数量。
# 核心任务
分析文本结构,根据以下原则推荐分镜数量:
## 分析原则
1. **语义边界**:识别场景切换、话题转换、情绪变化点
2. **叙事完整性**:保持对话回合完整(问-答不拆分)
3. **时长控制**:每个分镜语音时长建议 15-45 秒(约 60-180 字)
4. **视觉多样性**:确保分镜之间有足够的画面变化
## 文本信息
- 总段落数:{total_paragraphs}
- 预估总字数:{total_chars}
- 预估总时长:{estimated_duration}
## 输入段落预览
{paragraphs_preview}
# 输出格式
返回 JSON 格式的分析结果:
```json
{{
"recommended_segments": 8,
"reasoning": "文本包含开场设定、分手对话、争吵升级、离别等多个场景切换点...",
"scene_boundaries": [
{{"after_paragraph": 3, "reason": "场景从背景介绍转入对话"}},
{{"after_paragraph": 7, "reason": "对话情绪升级"}},
...
]
}}
```
# 重要提醒
1. recommended_segments 应该在 3-15 之间
2. 每个分镜平均字数建议 80-200 字
3. scene_boundaries 标记主要的场景切换点,用于后续分组参考
4. 只输出 JSON不要添加其他解释
"""
# Step 2: Group paragraphs based on analysis
PARAGRAPH_GROUPING_PROMPT = """# 任务定义
你是一个专业的文本分段专家。根据分析结果,将段落分组。
# 核心任务
{total_paragraphs} 个段落(编号 0 到 {max_index})分成 **{target_segments}** 个分组。
# 分析建议
{analysis_hint}
# 分组原则
1. **语义关联**:将描述同一场景、同一对话回合的段落放在一起
2. **对话完整**:一轮完整的对话(问与答)应该在同一分组
3. **场景统一**:同一时间、地点发生的事件应该在同一分组
4. **长度均衡**:每个分组的字数尽量均衡(目标 80-200 字/分组)
5. **顺序保持**:分组内段落必须连续
# 输入段落
{paragraphs_preview}
# 输出格式
返回 JSON 格式,包含每个分组的起始和结束索引(包含)。
```json
{{
"groups": [
{{"start": 0, "end": 3}},
{{"start": 4, "end": 7}},
{{"start": 8, "end": 12}}
]
}}
```
# 重要提醒
1. 必须输出正好 {target_segments} 个分组
2. 分组必须覆盖所有段落(从 0 到 {max_index}
3. 每个分组的 start 必须等于上一个 end + 1
4. 只输出 JSON不要添加其他解释
"""
def build_paragraph_analysis_prompt(
paragraphs: List[str],
) -> str:
"""
Build prompt for analyzing text and recommending segment count
Args:
paragraphs: List of original paragraphs
Returns:
Formatted prompt for analysis
"""
# Calculate stats
total_chars = sum(len(p) for p in paragraphs)
# Estimate: ~250 chars/minute for Chinese speech
estimated_duration = int(total_chars / 250 * 60)
# Create preview for each paragraph (first 50 chars)
previews = []
for i, para in enumerate(paragraphs):
preview = para[:50].replace('\n', ' ')
char_count = len(para)
if len(para) > 50:
preview += "..."
previews.append(f"[{i}] ({char_count}字) {preview}")
paragraphs_preview = "\n".join(previews)
return PARAGRAPH_ANALYSIS_PROMPT.format(
paragraphs_preview=paragraphs_preview,
total_paragraphs=len(paragraphs),
total_chars=total_chars,
estimated_duration=estimated_duration
)
def build_paragraph_grouping_prompt(
paragraphs: List[str],
target_segments: int,
analysis_result: dict = None,
) -> str:
"""
Build prompt for grouping paragraphs based on analysis
Args:
paragraphs: List of original paragraphs
target_segments: Target number of segments (from analysis)
analysis_result: Optional analysis result for context
Returns:
Formatted prompt for grouping
"""
# Create preview with char counts
previews = []
for i, para in enumerate(paragraphs):
preview = para[:50].replace('\n', ' ')
char_count = len(para)
if len(para) > 50:
preview += "..."
previews.append(f"[{i}] ({char_count}字) {preview}")
paragraphs_preview = "\n".join(previews)
# Build analysis hint if available
analysis_hint = ""
if analysis_result:
if "reasoning" in analysis_result:
analysis_hint += f"分析理由:{analysis_result['reasoning']}\n"
if "scene_boundaries" in analysis_result:
boundaries = [str(b.get("after_paragraph", "")) for b in analysis_result["scene_boundaries"]]
analysis_hint += f"建议场景切换点(段落后):{', '.join(boundaries)}"
if not analysis_hint:
analysis_hint = "无额外分析信息"
return PARAGRAPH_GROUPING_PROMPT.format(
paragraphs_preview=paragraphs_preview,
target_segments=target_segments,
total_paragraphs=len(paragraphs),
max_index=len(paragraphs) - 1,
analysis_hint=analysis_hint
)
# Legacy support - keep original function name for backward compatibility
def build_paragraph_merging_prompt(
paragraphs: List[str],
target_segments: int = 8,
) -> str:
"""
Legacy function for backward compatibility.
Now delegates to build_paragraph_grouping_prompt.
"""
return build_paragraph_grouping_prompt(paragraphs, target_segments)