feat: Add smart paragraph merging mode with AI grouping
Some checks failed
Deploy Documentation / deploy (push) Has been cancelled
Some checks failed
Deploy Documentation / deploy (push) Has been cancelled
- Add "smart" split mode that uses LLM to intelligently merge related paragraphs - Implement two-step approach: analyze text structure, then group by semantic relevance - Add paragraph_merging.py with analysis and grouping prompts - Update UI to support smart mode selection with auto-detect hint - Add i18n translations for smart mode (en_US, zh_CN) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
202
pixelle_video/prompts/paragraph_merging.py
Normal file
202
pixelle_video/prompts/paragraph_merging.py
Normal file
@@ -0,0 +1,202 @@
|
||||
# Copyright (C) 2025 AIDC-AI
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Paragraph merging prompt
|
||||
|
||||
For intelligently merging short paragraphs into longer segments suitable for video storyboards.
|
||||
Uses a two-step approach: first analyze, then group.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
|
||||
# Step 1: Analyze text and recommend segment count
|
||||
PARAGRAPH_ANALYSIS_PROMPT = """# 任务定义
|
||||
你是一个专业的视频分镜规划师。请分析以下文本,推荐最佳分镜数量。
|
||||
|
||||
# 核心任务
|
||||
分析文本结构,根据以下原则推荐分镜数量:
|
||||
|
||||
## 分析原则
|
||||
1. **语义边界**:识别场景切换、话题转换、情绪变化点
|
||||
2. **叙事完整性**:保持对话回合完整(问-答不拆分)
|
||||
3. **时长控制**:每个分镜语音时长建议 15-45 秒(约 60-180 字)
|
||||
4. **视觉多样性**:确保分镜之间有足够的画面变化
|
||||
|
||||
## 文本信息
|
||||
- 总段落数:{total_paragraphs}
|
||||
- 预估总字数:{total_chars} 字
|
||||
- 预估总时长:{estimated_duration} 秒
|
||||
|
||||
## 输入段落预览
|
||||
{paragraphs_preview}
|
||||
|
||||
# 输出格式
|
||||
返回 JSON 格式的分析结果:
|
||||
|
||||
```json
|
||||
{{
|
||||
"recommended_segments": 8,
|
||||
"reasoning": "文本包含开场设定、分手对话、争吵升级、离别等多个场景切换点...",
|
||||
"scene_boundaries": [
|
||||
{{"after_paragraph": 3, "reason": "场景从背景介绍转入对话"}},
|
||||
{{"after_paragraph": 7, "reason": "对话情绪升级"}},
|
||||
...
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
# 重要提醒
|
||||
1. recommended_segments 应该在 3-15 之间
|
||||
2. 每个分镜平均字数建议 80-200 字
|
||||
3. scene_boundaries 标记主要的场景切换点,用于后续分组参考
|
||||
4. 只输出 JSON,不要添加其他解释
|
||||
"""
|
||||
|
||||
|
||||
# Step 2: Group paragraphs based on analysis
|
||||
PARAGRAPH_GROUPING_PROMPT = """# 任务定义
|
||||
你是一个专业的文本分段专家。根据分析结果,将段落分组。
|
||||
|
||||
# 核心任务
|
||||
将 {total_paragraphs} 个段落(编号 0 到 {max_index})分成 **{target_segments}** 个分组。
|
||||
|
||||
# 分析建议
|
||||
{analysis_hint}
|
||||
|
||||
# 分组原则
|
||||
1. **语义关联**:将描述同一场景、同一对话回合的段落放在一起
|
||||
2. **对话完整**:一轮完整的对话(问与答)应该在同一分组
|
||||
3. **场景统一**:同一时间、地点发生的事件应该在同一分组
|
||||
4. **长度均衡**:每个分组的字数尽量均衡(目标 80-200 字/分组)
|
||||
5. **顺序保持**:分组内段落必须连续
|
||||
|
||||
# 输入段落
|
||||
{paragraphs_preview}
|
||||
|
||||
# 输出格式
|
||||
返回 JSON 格式,包含每个分组的起始和结束索引(包含)。
|
||||
|
||||
```json
|
||||
{{
|
||||
"groups": [
|
||||
{{"start": 0, "end": 3}},
|
||||
{{"start": 4, "end": 7}},
|
||||
{{"start": 8, "end": 12}}
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
# 重要提醒
|
||||
1. 必须输出正好 {target_segments} 个分组
|
||||
2. 分组必须覆盖所有段落(从 0 到 {max_index})
|
||||
3. 每个分组的 start 必须等于上一个 end + 1
|
||||
4. 只输出 JSON,不要添加其他解释
|
||||
"""
|
||||
|
||||
|
||||
def build_paragraph_analysis_prompt(
|
||||
paragraphs: List[str],
|
||||
) -> str:
|
||||
"""
|
||||
Build prompt for analyzing text and recommending segment count
|
||||
|
||||
Args:
|
||||
paragraphs: List of original paragraphs
|
||||
|
||||
Returns:
|
||||
Formatted prompt for analysis
|
||||
"""
|
||||
# Calculate stats
|
||||
total_chars = sum(len(p) for p in paragraphs)
|
||||
# Estimate: ~250 chars/minute for Chinese speech
|
||||
estimated_duration = int(total_chars / 250 * 60)
|
||||
|
||||
# Create preview for each paragraph (first 50 chars)
|
||||
previews = []
|
||||
for i, para in enumerate(paragraphs):
|
||||
preview = para[:50].replace('\n', ' ')
|
||||
char_count = len(para)
|
||||
if len(para) > 50:
|
||||
preview += "..."
|
||||
previews.append(f"[{i}] ({char_count}字) {preview}")
|
||||
|
||||
paragraphs_preview = "\n".join(previews)
|
||||
|
||||
return PARAGRAPH_ANALYSIS_PROMPT.format(
|
||||
paragraphs_preview=paragraphs_preview,
|
||||
total_paragraphs=len(paragraphs),
|
||||
total_chars=total_chars,
|
||||
estimated_duration=estimated_duration
|
||||
)
|
||||
|
||||
|
||||
def build_paragraph_grouping_prompt(
|
||||
paragraphs: List[str],
|
||||
target_segments: int,
|
||||
analysis_result: dict = None,
|
||||
) -> str:
|
||||
"""
|
||||
Build prompt for grouping paragraphs based on analysis
|
||||
|
||||
Args:
|
||||
paragraphs: List of original paragraphs
|
||||
target_segments: Target number of segments (from analysis)
|
||||
analysis_result: Optional analysis result for context
|
||||
|
||||
Returns:
|
||||
Formatted prompt for grouping
|
||||
"""
|
||||
# Create preview with char counts
|
||||
previews = []
|
||||
for i, para in enumerate(paragraphs):
|
||||
preview = para[:50].replace('\n', ' ')
|
||||
char_count = len(para)
|
||||
if len(para) > 50:
|
||||
preview += "..."
|
||||
previews.append(f"[{i}] ({char_count}字) {preview}")
|
||||
|
||||
paragraphs_preview = "\n".join(previews)
|
||||
|
||||
# Build analysis hint if available
|
||||
analysis_hint = ""
|
||||
if analysis_result:
|
||||
if "reasoning" in analysis_result:
|
||||
analysis_hint += f"分析理由:{analysis_result['reasoning']}\n"
|
||||
if "scene_boundaries" in analysis_result:
|
||||
boundaries = [str(b.get("after_paragraph", "")) for b in analysis_result["scene_boundaries"]]
|
||||
analysis_hint += f"建议场景切换点(段落后):{', '.join(boundaries)}"
|
||||
|
||||
if not analysis_hint:
|
||||
analysis_hint = "无额外分析信息"
|
||||
|
||||
return PARAGRAPH_GROUPING_PROMPT.format(
|
||||
paragraphs_preview=paragraphs_preview,
|
||||
target_segments=target_segments,
|
||||
total_paragraphs=len(paragraphs),
|
||||
max_index=len(paragraphs) - 1,
|
||||
analysis_hint=analysis_hint
|
||||
)
|
||||
|
||||
|
||||
# Legacy support - keep original function name for backward compatibility
|
||||
def build_paragraph_merging_prompt(
|
||||
paragraphs: List[str],
|
||||
target_segments: int = 8,
|
||||
) -> str:
|
||||
"""
|
||||
Legacy function for backward compatibility.
|
||||
Now delegates to build_paragraph_grouping_prompt.
|
||||
"""
|
||||
return build_paragraph_grouping_prompt(paragraphs, target_segments)
|
||||
Reference in New Issue
Block a user