feat: Add smart paragraph merging mode with AI grouping
Some checks failed
Deploy Documentation / deploy (push) Has been cancelled
Some checks failed
Deploy Documentation / deploy (push) Has been cancelled
- Add "smart" split mode that uses LLM to intelligently merge related paragraphs - Implement two-step approach: analyze text structure, then group by semantic relevance - Add paragraph_merging.py with analysis and grouping prompts - Update UI to support smart mode selection with auto-detect hint - Add i18n translations for smart mode (en_US, zh_CN) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -208,7 +208,9 @@ async def generate_narrations_from_content(
|
||||
|
||||
async def split_narration_script(
|
||||
script: str,
|
||||
split_mode: Literal["paragraph", "line", "sentence"] = "paragraph",
|
||||
split_mode: Literal["paragraph", "line", "sentence", "smart"] = "paragraph",
|
||||
llm_service = None,
|
||||
target_segments: int = 8,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Split user-provided narration script into segments
|
||||
@@ -219,6 +221,9 @@ async def split_narration_script(
|
||||
- "paragraph": Split by double newline (\\n\\n), preserve single newlines within paragraphs
|
||||
- "line": Split by single newline (\\n), each line is a segment
|
||||
- "sentence": Split by sentence-ending punctuation (。.!?!?)
|
||||
- "smart": First split by paragraph, then use LLM to intelligently merge related paragraphs
|
||||
llm_service: LLM service instance (required for "smart" mode)
|
||||
target_segments: Target number of segments for "smart" mode (default: 8)
|
||||
|
||||
Returns:
|
||||
List of narration segments
|
||||
@@ -227,7 +232,31 @@ async def split_narration_script(
|
||||
|
||||
narrations = []
|
||||
|
||||
if split_mode == "paragraph":
|
||||
if split_mode == "smart":
|
||||
# Smart mode: first split by paragraph, then merge intelligently
|
||||
if llm_service is None:
|
||||
raise ValueError("llm_service is required for 'smart' split mode")
|
||||
|
||||
# Step 1: Split by paragraph first
|
||||
paragraphs = re.split(r'\n\s*\n', script)
|
||||
paragraphs = [p.strip() for p in paragraphs if p.strip()]
|
||||
logger.info(f" Initial split: {len(paragraphs)} paragraphs")
|
||||
|
||||
# Step 2: Merge intelligently using LLM
|
||||
# If target_segments is None, merge_paragraphs_smart will auto-analyze
|
||||
if target_segments is not None and len(paragraphs) <= target_segments:
|
||||
# No need to merge if already within target
|
||||
logger.info(f" Paragraphs count ({len(paragraphs)}) <= target ({target_segments}), no merge needed")
|
||||
narrations = paragraphs
|
||||
else:
|
||||
narrations = await merge_paragraphs_smart(
|
||||
llm_service=llm_service,
|
||||
paragraphs=paragraphs,
|
||||
target_segments=target_segments # Can be None for auto-analysis
|
||||
)
|
||||
logger.info(f"✅ Smart split: {len(paragraphs)} paragraphs -> {len(narrations)} segments")
|
||||
|
||||
elif split_mode == "paragraph":
|
||||
# Split by double newline (paragraph mode)
|
||||
# Preserve single newlines within paragraphs
|
||||
paragraphs = re.split(r'\n\s*\n', script)
|
||||
@@ -266,6 +295,150 @@ async def split_narration_script(
|
||||
return narrations
|
||||
|
||||
|
||||
async def merge_paragraphs_smart(
|
||||
llm_service,
|
||||
paragraphs: List[str],
|
||||
target_segments: int = None, # Now optional - auto-analyze if not provided
|
||||
max_retries: int = 3,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Use LLM to intelligently merge paragraphs based on semantic relevance.
|
||||
|
||||
Two-step approach:
|
||||
1. If target_segments is not provided, first analyze text to recommend optimal count
|
||||
2. Then group paragraphs based on the target count
|
||||
|
||||
Args:
|
||||
llm_service: LLM service instance
|
||||
paragraphs: List of original paragraphs
|
||||
target_segments: Target number of merged segments (auto-analyzed if None)
|
||||
max_retries: Maximum retry attempts for each step
|
||||
|
||||
Returns:
|
||||
List of merged paragraphs
|
||||
"""
|
||||
from pixelle_video.prompts import (
|
||||
build_paragraph_analysis_prompt,
|
||||
build_paragraph_grouping_prompt
|
||||
)
|
||||
|
||||
# ========================================
|
||||
# Step 1: Analyze and recommend segment count (if not provided)
|
||||
# ========================================
|
||||
if target_segments is None:
|
||||
logger.info(f"Analyzing {len(paragraphs)} paragraphs to recommend segment count...")
|
||||
|
||||
analysis_prompt = build_paragraph_analysis_prompt(paragraphs)
|
||||
analysis_result = None
|
||||
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
response = await llm_service(
|
||||
prompt=analysis_prompt,
|
||||
temperature=0.3,
|
||||
max_tokens=1500
|
||||
)
|
||||
|
||||
logger.debug(f"Analysis response length: {len(response)} chars")
|
||||
|
||||
result = _parse_json(response)
|
||||
|
||||
if "recommended_segments" not in result:
|
||||
raise KeyError("Missing 'recommended_segments' in analysis")
|
||||
|
||||
target_segments = result["recommended_segments"]
|
||||
analysis_result = result
|
||||
|
||||
# Validate range
|
||||
if target_segments < 3:
|
||||
target_segments = 3
|
||||
elif target_segments > 15:
|
||||
target_segments = 15
|
||||
|
||||
reasoning = result.get("reasoning", "N/A")
|
||||
logger.info(f"✅ Analysis complete: recommended {target_segments} segments")
|
||||
logger.info(f" Reasoning: {reasoning[:100]}...")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Analysis attempt {attempt} failed: {e}")
|
||||
if attempt >= max_retries:
|
||||
# Fallback: use simple heuristic
|
||||
target_segments = max(3, min(12, len(paragraphs) // 3))
|
||||
logger.warning(f"Using fallback: {target_segments} segments (paragraphs/3)")
|
||||
analysis_result = None
|
||||
break
|
||||
logger.info("Retrying analysis...")
|
||||
else:
|
||||
analysis_result = None
|
||||
logger.info(f"Using provided target: {target_segments} segments")
|
||||
|
||||
# ========================================
|
||||
# Step 2: Group paragraphs
|
||||
# ========================================
|
||||
logger.info(f"Grouping {len(paragraphs)} paragraphs into {target_segments} segments...")
|
||||
|
||||
grouping_prompt = build_paragraph_grouping_prompt(
|
||||
paragraphs=paragraphs,
|
||||
target_segments=target_segments,
|
||||
analysis_result=analysis_result
|
||||
)
|
||||
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
response = await llm_service(
|
||||
prompt=grouping_prompt,
|
||||
temperature=0.3,
|
||||
max_tokens=2000
|
||||
)
|
||||
|
||||
logger.debug(f"Grouping response length: {len(response)} chars")
|
||||
|
||||
result = _parse_json(response)
|
||||
|
||||
if "groups" not in result:
|
||||
raise KeyError("Invalid response format: missing 'groups'")
|
||||
|
||||
groups = result["groups"]
|
||||
|
||||
# Validate count
|
||||
if len(groups) != target_segments:
|
||||
logger.warning(
|
||||
f"Grouping attempt {attempt}: expected {target_segments} groups, got {len(groups)}"
|
||||
)
|
||||
if attempt < max_retries:
|
||||
continue
|
||||
logger.warning(f"Accepting {len(groups)} groups after {max_retries} attempts")
|
||||
|
||||
# Validate group boundaries
|
||||
for i, group in enumerate(groups):
|
||||
if "start" not in group or "end" not in group:
|
||||
raise ValueError(f"Group {i} missing 'start' or 'end'")
|
||||
if group["start"] > group["end"]:
|
||||
raise ValueError(f"Group {i} has invalid range: start > end")
|
||||
if group["start"] < 0 or group["end"] >= len(paragraphs):
|
||||
raise ValueError(f"Group {i} has out-of-bounds indices")
|
||||
|
||||
# Merge paragraphs based on groups
|
||||
merged = []
|
||||
for group in groups:
|
||||
start, end = group["start"], group["end"]
|
||||
merged_text = "\n\n".join(paragraphs[start:end + 1])
|
||||
merged.append(merged_text)
|
||||
|
||||
logger.info(f"✅ Successfully merged into {len(merged)} segments")
|
||||
return merged
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Grouping attempt {attempt} failed: {e}")
|
||||
if attempt >= max_retries:
|
||||
raise
|
||||
logger.info("Retrying grouping...")
|
||||
|
||||
# Fallback: should not reach here
|
||||
return paragraphs
|
||||
|
||||
|
||||
async def generate_image_prompts(
|
||||
llm_service,
|
||||
narrations: List[str],
|
||||
@@ -489,8 +662,8 @@ def _parse_json(text: str) -> dict:
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Try to find any JSON object in the text
|
||||
json_pattern = r'\{[^{}]*(?:"narrations"|"image_prompts")\s*:\s*\[[^\]]*\][^{}]*\}'
|
||||
# Try to find any JSON object with known keys (including analysis keys)
|
||||
json_pattern = r'\{[^{}]*(?:"narrations"|"image_prompts"|"video_prompts"|"merged_paragraphs"|"groups"|"recommended_segments"|"scene_boundaries")\s*:\s*[^{}]*\}'
|
||||
match = re.search(json_pattern, text, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
@@ -498,6 +671,17 @@ def _parse_json(text: str) -> dict:
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Try to find any JSON object that looks like it contains an array
|
||||
# This is a more aggressive fallback for complex nested arrays
|
||||
json_start = text.find('{')
|
||||
json_end = text.rfind('}')
|
||||
if json_start != -1 and json_end != -1 and json_end > json_start:
|
||||
potential_json = text[json_start:json_end + 1]
|
||||
try:
|
||||
return json.loads(potential_json)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# If all fails, raise error
|
||||
raise json.JSONDecodeError("No valid JSON found", text, 0)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user