feat: Add smart paragraph merging mode with AI grouping

- Add "smart" split mode that uses LLM to intelligently merge related paragraphs - Implement two-step approach: analyze text structure, then group by semantic relevance - Add paragraph_merging.py with analysis and grouping prompts - Update UI to support smart mode selection with auto-detect hint - Add i18n translations for smart mode (en_US, zh_CN) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-17 00:19:46 +08:00
parent 3a8ec576ee
commit 3d3aba3670
8 changed files with 427 additions and 6 deletions
--- a/pixelle_video/prompts/paragraph_merging.py
+++ b/pixelle_video/prompts/paragraph_merging.py
@@ -0,0 +1,202 @@
+# Copyright (C) 2025 AIDC-AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Paragraph merging prompt
+
+For intelligently merging short paragraphs into longer segments suitable for video storyboards.
+Uses a two-step approach: first analyze, then group.
+"""
+
+import json
+from typing import List
+
+
+# Step 1: Analyze text and recommend segment count
+PARAGRAPH_ANALYSIS_PROMPT = """# 任务定义
+你是一个专业的视频分镜规划师。请分析以下文本，推荐最佳分镜数量。
+
+# 核心任务
+分析文本结构，根据以下原则推荐分镜数量：
+
+## 分析原则
+1. **语义边界**：识别场景切换、话题转换、情绪变化点
+2. **叙事完整性**：保持对话回合完整（问-答不拆分）
+3. **时长控制**：每个分镜语音时长建议 15-45 秒（约 60-180 字）
+4. **视觉多样性**：确保分镜之间有足够的画面变化
+
+## 文本信息
+- 总段落数：{total_paragraphs}
+- 预估总字数：{total_chars} 字
+- 预估总时长：{estimated_duration} 秒
+
+## 输入段落预览
+{paragraphs_preview}
+
+# 输出格式
+返回 JSON 格式的分析结果：
+
+```json
+{{
+  "recommended_segments": 8,
+  "reasoning": "文本包含开场设定、分手对话、争吵升级、离别等多个场景切换点...",
+  "scene_boundaries": [
+    {{"after_paragraph": 3, "reason": "场景从背景介绍转入对话"}},
+    {{"after_paragraph": 7, "reason": "对话情绪升级"}},
+    ...
+  ]
+}}
+```
+
+# 重要提醒
+1. recommended_segments 应该在 3-15 之间
+2. 每个分镜平均字数建议 80-200 字
+3. scene_boundaries 标记主要的场景切换点，用于后续分组参考
+4. 只输出 JSON，不要添加其他解释
+"""
+
+
+# Step 2: Group paragraphs based on analysis
+PARAGRAPH_GROUPING_PROMPT = """# 任务定义
+你是一个专业的文本分段专家。根据分析结果，将段落分组。
+
+# 核心任务
+将 {total_paragraphs} 个段落（编号 0 到 {max_index}）分成 **{target_segments}** 个分组。
+
+# 分析建议
+{analysis_hint}
+
+# 分组原则
+1. **语义关联**：将描述同一场景、同一对话回合的段落放在一起
+2. **对话完整**：一轮完整的对话（问与答）应该在同一分组
+3. **场景统一**：同一时间、地点发生的事件应该在同一分组
+4. **长度均衡**：每个分组的字数尽量均衡（目标 80-200 字/分组）
+5. **顺序保持**：分组内段落必须连续
+
+# 输入段落
+{paragraphs_preview}
+
+# 输出格式
+返回 JSON 格式，包含每个分组的起始和结束索引（包含）。
+
+```json
+{{
+  "groups": [
+    {{"start": 0, "end": 3}},
+    {{"start": 4, "end": 7}},
+    {{"start": 8, "end": 12}}
+  ]
+}}
+```
+
+# 重要提醒
+1. 必须输出正好 {target_segments} 个分组
+2. 分组必须覆盖所有段落（从 0 到 {max_index}）
+3. 每个分组的 start 必须等于上一个 end + 1
+4. 只输出 JSON，不要添加其他解释
+"""
+
+
+def build_paragraph_analysis_prompt(
+    paragraphs: List[str],
+) -> str:
+    """
+    Build prompt for analyzing text and recommending segment count
+    
+    Args:
+        paragraphs: List of original paragraphs
+    
+    Returns:
+        Formatted prompt for analysis
+    """
+    # Calculate stats
+    total_chars = sum(len(p) for p in paragraphs)
+    # Estimate: ~250 chars/minute for Chinese speech
+    estimated_duration = int(total_chars / 250 * 60)
+    
+    # Create preview for each paragraph (first 50 chars)
+    previews = []
+    for i, para in enumerate(paragraphs):
+        preview = para[:50].replace('\n', ' ')
+        char_count = len(para)
+        if len(para) > 50:
+            preview += "..."
+        previews.append(f"[{i}] ({char_count}字) {preview}")
+    
+    paragraphs_preview = "\n".join(previews)
+    
+    return PARAGRAPH_ANALYSIS_PROMPT.format(
+        paragraphs_preview=paragraphs_preview,
+        total_paragraphs=len(paragraphs),
+        total_chars=total_chars,
+        estimated_duration=estimated_duration
+    )
+
+
+def build_paragraph_grouping_prompt(
+    paragraphs: List[str],
+    target_segments: int,
+    analysis_result: dict = None,
+) -> str:
+    """
+    Build prompt for grouping paragraphs based on analysis
+    
+    Args:
+        paragraphs: List of original paragraphs
+        target_segments: Target number of segments (from analysis)
+        analysis_result: Optional analysis result for context
+    
+    Returns:
+        Formatted prompt for grouping
+    """
+    # Create preview with char counts
+    previews = []
+    for i, para in enumerate(paragraphs):
+        preview = para[:50].replace('\n', ' ')
+        char_count = len(para)
+        if len(para) > 50:
+            preview += "..."
+        previews.append(f"[{i}] ({char_count}字) {preview}")
+    
+    paragraphs_preview = "\n".join(previews)
+    
+    # Build analysis hint if available
+    analysis_hint = ""
+    if analysis_result:
+        if "reasoning" in analysis_result:
+            analysis_hint += f"分析理由：{analysis_result['reasoning']}\n"
+        if "scene_boundaries" in analysis_result:
+            boundaries = [str(b.get("after_paragraph", "")) for b in analysis_result["scene_boundaries"]]
+            analysis_hint += f"建议场景切换点（段落后）：{', '.join(boundaries)}"
+    
+    if not analysis_hint:
+        analysis_hint = "无额外分析信息"
+    
+    return PARAGRAPH_GROUPING_PROMPT.format(
+        paragraphs_preview=paragraphs_preview,
+        target_segments=target_segments,
+        total_paragraphs=len(paragraphs),
+        max_index=len(paragraphs) - 1,
+        analysis_hint=analysis_hint
+    )
+
+
+# Legacy support - keep original function name for backward compatibility
+def build_paragraph_merging_prompt(
+    paragraphs: List[str],
+    target_segments: int = 8,
+) -> str:
+    """
+    Legacy function for backward compatibility.
+    Now delegates to build_paragraph_grouping_prompt.
+    """
+    return build_paragraph_grouping_prompt(paragraphs, target_segments)