支持固定脚本多种分割方式(段落/行/句子)，修复Edge TTS和模板切换问题

2025-12-08 16:59:02 +08:00
parent ea48c4838c
commit 3cf6628022
9 changed files with 100 additions and 15 deletions
--- a/pixelle_video/utils/content_generators.py
+++ b/pixelle_video/utils/content_generators.py
@@ -208,22 +208,55 @@ async def generate_narrations_from_content(

 async def split_narration_script(
    script: str,
+    split_mode: Literal["paragraph", "line", "sentence"] = "paragraph",
 ) -> List[str]:
    """
-    Split user-provided narration script into segments by lines
+    Split user-provided narration script into segments
    
    Args:
-        script: Fixed narration script (each line is a narration)
+        script: Fixed narration script
+        split_mode: Splitting strategy
+            - "paragraph": Split by double newline (\\n\\n), preserve single newlines within paragraphs
+            - "line": Split by single newline (\\n), each line is a segment
+            - "sentence": Split by sentence-ending punctuation (。.!?！？)
    
    Returns:
        List of narration segments
    """
-    logger.info(f"Splitting script by lines (length: {len(script)} chars)")
+    logger.info(f"Splitting script (mode={split_mode}, length={len(script)} chars)")
    
-    # Split by newline, filter empty lines
-    narrations = [line.strip() for line in script.split('\n') if line.strip()]
+    narrations = []
    
-    logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
+    if split_mode == "paragraph":
+        # Split by double newline (paragraph mode)
+        # Preserve single newlines within paragraphs
+        paragraphs = re.split(r'\n\s*\n', script)
+        for para in paragraphs:
+            # Only strip leading/trailing whitespace, preserve internal newlines
+            cleaned = para.strip()
+            if cleaned:
+                narrations.append(para)
+        logger.info(f"✅ Split script into {len(narrations)} segments (by paragraph)")
+    
+    elif split_mode == "line":
+        # Split by single newline (original behavior)
+        narrations = [line.strip() for line in script.split('\n') if line.strip()]
+        logger.info(f"✅ Split script into {len(narrations)} segments (by line)")
+    
+    elif split_mode == "sentence":
+        # Split by sentence-ending punctuation
+        # Supports Chinese (。！？) and English (.!?)
+        # Use regex to split while keeping sentences intact
+        cleaned = re.sub(r'\s+', ' ', script.strip())
+        # Split on sentence-ending punctuation, keeping the punctuation with the sentence
+        sentences = re.split(r'(?<=[。.!?！？])\s*', cleaned)
+        narrations = [s.strip() for s in sentences if s.strip()]
+        logger.info(f"✅ Split script into {len(narrations)} segments (by sentence)")
+    
+    else:
+        # Fallback to line mode
+        logger.warning(f"Unknown split_mode '{split_mode}', falling back to 'line'")
+        narrations = [line.strip() for line in script.split('\n') if line.strip()]
    
    # Log statistics
    if narrations:
--- a/pixelle_video/utils/tts_util.py
+++ b/pixelle_video/utils/tts_util.py
@@ -22,6 +22,7 @@ import ssl
 import random
 import certifi
 import edge_tts as edge_tts_sdk
+from edge_tts.exceptions import NoAudioReceived
 from loguru import logger
 from aiohttp import WSServerHandshakeError, ClientResponseError

@@ -29,8 +30,8 @@ from aiohttp import WSServerHandshakeError, ClientResponseError
 # Use certifi bundle for SSL verification instead of disabling it
 _USE_CERTIFI_SSL = True

-# Retry configuration for Edge TTS (to handle 401 errors)
-_RETRY_COUNT = 10       # Default retry count (increased from 3 to 5)
+# Retry configuration for Edge TTS (to handle 401 errors and NoAudioReceived)
+_RETRY_COUNT = 5           # Default retry count
 _RETRY_BASE_DELAY = 1.0     # Base retry delay in seconds (for exponential backoff)
 _MAX_RETRY_DELAY = 10.0     # Maximum retry delay in seconds

@@ -199,6 +200,18 @@ async def edge_tts(
                    raise
                # Otherwise, continue to next retry
            
+            except NoAudioReceived as e:
+                # NoAudioReceived is often a temporary issue - retry with longer delay
+                last_error = e
+                logger.warning(f"⚠️  Edge TTS NoAudioReceived (attempt {attempt + 1}/{retry_count + 1})")
+                logger.debug(f"This is usually a temporary Microsoft service issue. Will retry with longer delay...")
+                
+                if attempt >= retry_count:
+                    logger.error(f"❌ All {retry_count + 1} attempts failed due to NoAudioReceived")
+                    raise
+                # Add extra delay for NoAudioReceived errors
+                await asyncio.sleep(2.0)
+            
            except Exception as e:
                # Other errors - don't retry, raise immediately
                logger.error(f"Edge TTS error (non-retryable): {type(e).__name__} - {e}")