feat: Add smart paragraph merging mode with AI grouping
Some checks failed
Deploy Documentation / deploy (push) Has been cancelled

- Add "smart" split mode that uses LLM to intelligently merge related paragraphs
- Implement two-step approach: analyze text structure, then group by semantic relevance
- Add paragraph_merging.py with analysis and grouping prompts
- Update UI to support smart mode selection with auto-detect hint
- Add i18n translations for smart mode (en_US, zh_CN)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
empty
2026-01-17 00:19:46 +08:00
parent 3a8ec576ee
commit 3d3aba3670
8 changed files with 427 additions and 6 deletions

View File

@@ -208,7 +208,9 @@ async def generate_narrations_from_content(
async def split_narration_script(
script: str,
split_mode: Literal["paragraph", "line", "sentence"] = "paragraph",
split_mode: Literal["paragraph", "line", "sentence", "smart"] = "paragraph",
llm_service = None,
target_segments: int = 8,
) -> List[str]:
"""
Split user-provided narration script into segments
@@ -219,6 +221,9 @@ async def split_narration_script(
- "paragraph": Split by double newline (\\n\\n), preserve single newlines within paragraphs
- "line": Split by single newline (\\n), each line is a segment
- "sentence": Split by sentence-ending punctuation (。.!?)
- "smart": First split by paragraph, then use LLM to intelligently merge related paragraphs
llm_service: LLM service instance (required for "smart" mode)
target_segments: Target number of segments for "smart" mode (default: 8)
Returns:
List of narration segments
@@ -227,7 +232,31 @@ async def split_narration_script(
narrations = []
if split_mode == "paragraph":
if split_mode == "smart":
# Smart mode: first split by paragraph, then merge intelligently
if llm_service is None:
raise ValueError("llm_service is required for 'smart' split mode")
# Step 1: Split by paragraph first
paragraphs = re.split(r'\n\s*\n', script)
paragraphs = [p.strip() for p in paragraphs if p.strip()]
logger.info(f" Initial split: {len(paragraphs)} paragraphs")
# Step 2: Merge intelligently using LLM
# If target_segments is None, merge_paragraphs_smart will auto-analyze
if target_segments is not None and len(paragraphs) <= target_segments:
# No need to merge if already within target
logger.info(f" Paragraphs count ({len(paragraphs)}) <= target ({target_segments}), no merge needed")
narrations = paragraphs
else:
narrations = await merge_paragraphs_smart(
llm_service=llm_service,
paragraphs=paragraphs,
target_segments=target_segments # Can be None for auto-analysis
)
logger.info(f"✅ Smart split: {len(paragraphs)} paragraphs -> {len(narrations)} segments")
elif split_mode == "paragraph":
# Split by double newline (paragraph mode)
# Preserve single newlines within paragraphs
paragraphs = re.split(r'\n\s*\n', script)
@@ -266,6 +295,150 @@ async def split_narration_script(
return narrations
async def merge_paragraphs_smart(
llm_service,
paragraphs: List[str],
target_segments: int = None, # Now optional - auto-analyze if not provided
max_retries: int = 3,
) -> List[str]:
"""
Use LLM to intelligently merge paragraphs based on semantic relevance.
Two-step approach:
1. If target_segments is not provided, first analyze text to recommend optimal count
2. Then group paragraphs based on the target count
Args:
llm_service: LLM service instance
paragraphs: List of original paragraphs
target_segments: Target number of merged segments (auto-analyzed if None)
max_retries: Maximum retry attempts for each step
Returns:
List of merged paragraphs
"""
from pixelle_video.prompts import (
build_paragraph_analysis_prompt,
build_paragraph_grouping_prompt
)
# ========================================
# Step 1: Analyze and recommend segment count (if not provided)
# ========================================
if target_segments is None:
logger.info(f"Analyzing {len(paragraphs)} paragraphs to recommend segment count...")
analysis_prompt = build_paragraph_analysis_prompt(paragraphs)
analysis_result = None
for attempt in range(1, max_retries + 1):
try:
response = await llm_service(
prompt=analysis_prompt,
temperature=0.3,
max_tokens=1500
)
logger.debug(f"Analysis response length: {len(response)} chars")
result = _parse_json(response)
if "recommended_segments" not in result:
raise KeyError("Missing 'recommended_segments' in analysis")
target_segments = result["recommended_segments"]
analysis_result = result
# Validate range
if target_segments < 3:
target_segments = 3
elif target_segments > 15:
target_segments = 15
reasoning = result.get("reasoning", "N/A")
logger.info(f"✅ Analysis complete: recommended {target_segments} segments")
logger.info(f" Reasoning: {reasoning[:100]}...")
break
except Exception as e:
logger.error(f"Analysis attempt {attempt} failed: {e}")
if attempt >= max_retries:
# Fallback: use simple heuristic
target_segments = max(3, min(12, len(paragraphs) // 3))
logger.warning(f"Using fallback: {target_segments} segments (paragraphs/3)")
analysis_result = None
break
logger.info("Retrying analysis...")
else:
analysis_result = None
logger.info(f"Using provided target: {target_segments} segments")
# ========================================
# Step 2: Group paragraphs
# ========================================
logger.info(f"Grouping {len(paragraphs)} paragraphs into {target_segments} segments...")
grouping_prompt = build_paragraph_grouping_prompt(
paragraphs=paragraphs,
target_segments=target_segments,
analysis_result=analysis_result
)
for attempt in range(1, max_retries + 1):
try:
response = await llm_service(
prompt=grouping_prompt,
temperature=0.3,
max_tokens=2000
)
logger.debug(f"Grouping response length: {len(response)} chars")
result = _parse_json(response)
if "groups" not in result:
raise KeyError("Invalid response format: missing 'groups'")
groups = result["groups"]
# Validate count
if len(groups) != target_segments:
logger.warning(
f"Grouping attempt {attempt}: expected {target_segments} groups, got {len(groups)}"
)
if attempt < max_retries:
continue
logger.warning(f"Accepting {len(groups)} groups after {max_retries} attempts")
# Validate group boundaries
for i, group in enumerate(groups):
if "start" not in group or "end" not in group:
raise ValueError(f"Group {i} missing 'start' or 'end'")
if group["start"] > group["end"]:
raise ValueError(f"Group {i} has invalid range: start > end")
if group["start"] < 0 or group["end"] >= len(paragraphs):
raise ValueError(f"Group {i} has out-of-bounds indices")
# Merge paragraphs based on groups
merged = []
for group in groups:
start, end = group["start"], group["end"]
merged_text = "\n\n".join(paragraphs[start:end + 1])
merged.append(merged_text)
logger.info(f"✅ Successfully merged into {len(merged)} segments")
return merged
except Exception as e:
logger.error(f"Grouping attempt {attempt} failed: {e}")
if attempt >= max_retries:
raise
logger.info("Retrying grouping...")
# Fallback: should not reach here
return paragraphs
async def generate_image_prompts(
llm_service,
narrations: List[str],
@@ -489,8 +662,8 @@ def _parse_json(text: str) -> dict:
except json.JSONDecodeError:
pass
# Try to find any JSON object in the text
json_pattern = r'\{[^{}]*(?:"narrations"|"image_prompts")\s*:\s*\[[^\]]*\][^{}]*\}'
# Try to find any JSON object with known keys (including analysis keys)
json_pattern = r'\{[^{}]*(?:"narrations"|"image_prompts"|"video_prompts"|"merged_paragraphs"|"groups"|"recommended_segments"|"scene_boundaries")\s*:\s*[^{}]*\}'
match = re.search(json_pattern, text, re.DOTALL)
if match:
try:
@@ -498,6 +671,17 @@ def _parse_json(text: str) -> dict:
except json.JSONDecodeError:
pass
# Try to find any JSON object that looks like it contains an array
# This is a more aggressive fallback for complex nested arrays
json_start = text.find('{')
json_end = text.rfind('}')
if json_start != -1 and json_end != -1 and json_end > json_start:
potential_json = text[json_start:json_end + 1]
try:
return json.loads(potential_json)
except json.JSONDecodeError:
pass
# If all fails, raise error
raise json.JSONDecodeError("No valid JSON found", text, 0)