From 3cf662802209184c0e6fd9c5385f4b24edb89851 Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Mon, 8 Dec 2025 16:59:02 +0800
Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E5=9B=BA=E5=AE=9A=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E5=A4=9A=E7=A7=8D=E5=88=86=E5=89=B2=E6=96=B9=E5=BC=8F?=
 =?UTF-8?q?(=E6=AE=B5=E8=90=BD/=E8=A1=8C/=E5=8F=A5=E5=AD=90)=EF=BC=8C?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8DEdge=20TTS=E5=92=8C=E6=A8=A1=E6=9D=BF?=
 =?UTF-8?q?=E5=88=87=E6=8D=A2=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pixelle_video/pipelines/standard.py       |  5 ++-
 pixelle_video/utils/content_generators.py | 45 ++++++++++++++++++++---
 pixelle_video/utils/tts_util.py           | 17 ++++++++-
 templates/1080x1920/static_default.html   |  1 +
 web/components/content_input.py           | 20 +++++++++-
 web/components/output_preview.py          |  2 +
 web/components/style_config.py            |  7 ++++
 web/i18n/locales/en_US.json               |  9 ++++-
 web/i18n/locales/zh_CN.json               |  9 ++++-
 9 files changed, 100 insertions(+), 15 deletions(-)

diff --git a/pixelle_video/pipelines/standard.py b/pixelle_video/pipelines/standard.py
index f8f80f9..f165d66 100644
--- a/pixelle_video/pipelines/standard.py
+++ b/pixelle_video/pipelines/standard.py
@@ -125,8 +125,9 @@ class StandardPipeline(LinearVideoPipeline):
             logger.info(f"✅ Generated {len(ctx.narrations)} narrations")
         else:  # fixed
             self._report_progress(ctx.progress_callback, "splitting_script", 0.05)
-            ctx.narrations = await split_narration_script(text)
-            logger.info(f"✅ Split script into {len(ctx.narrations)} segments (by lines)")
+            split_mode = ctx.params.get("split_mode", "paragraph")
+            ctx.narrations = await split_narration_script(text, split_mode=split_mode)
+            logger.info(f"✅ Split script into {len(ctx.narrations)} segments (mode={split_mode})")
             logger.info(f"   Note: n_scenes={n_scenes} is ignored in fixed mode")
 
     async def determine_title(self, ctx: PipelineContext):
diff --git a/pixelle_video/utils/content_generators.py b/pixelle_video/utils/content_generators.py
index 02c1471..8a821f7 100644
--- a/pixelle_video/utils/content_generators.py
+++ b/pixelle_video/utils/content_generators.py
@@ -208,22 +208,55 @@ async def generate_narrations_from_content(
 
 async def split_narration_script(
     script: str,
+    split_mode: Literal["paragraph", "line", "sentence"] = "paragraph",
 ) -> List[str]:
     """
-    Split user-provided narration script into segments by lines
+    Split user-provided narration script into segments
     
     Args:
-        script: Fixed narration script (each line is a narration)
+        script: Fixed narration script
+        split_mode: Splitting strategy
+            - "paragraph": Split by double newline (\\n\\n), preserve single newlines within paragraphs
+            - "line": Split by single newline (\\n), each line is a segment
+            - "sentence": Split by sentence-ending punctuation (。.!?！？)
     
     Returns:
         List of narration segments
     """
-    logger.info(f"Splitting script by lines (length: {len(script)} chars)")
+    logger.info(f"Splitting script (mode={split_mode}, length={len(script)} chars)")
     
-    # Split by newline, filter empty lines
-    narrations = [line.strip() for line in script.split('\n') if line.strip()]
+    narrations = []
     
-    logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
+    if split_mode == "paragraph":
+        # Split by double newline (paragraph mode)
+        # Preserve single newlines within paragraphs
+        paragraphs = re.split(r'\n\s*\n', script)
+        for para in paragraphs:
+            # Only strip leading/trailing whitespace, preserve internal newlines
+            cleaned = para.strip()
+            if cleaned:
+                narrations.append(para)
+        logger.info(f"✅ Split script into {len(narrations)} segments (by paragraph)")
+    
+    elif split_mode == "line":
+        # Split by single newline (original behavior)
+        narrations = [line.strip() for line in script.split('\n') if line.strip()]
+        logger.info(f"✅ Split script into {len(narrations)} segments (by line)")
+    
+    elif split_mode == "sentence":
+        # Split by sentence-ending punctuation
+        # Supports Chinese (。！？) and English (.!?)
+        # Use regex to split while keeping sentences intact
+        cleaned = re.sub(r'\s+', ' ', script.strip())
+        # Split on sentence-ending punctuation, keeping the punctuation with the sentence
+        sentences = re.split(r'(?<=[。.!?！？])\s*', cleaned)
+        narrations = [s.strip() for s in sentences if s.strip()]
+        logger.info(f"✅ Split script into {len(narrations)} segments (by sentence)")
+    
+    else:
+        # Fallback to line mode
+        logger.warning(f"Unknown split_mode '{split_mode}', falling back to 'line'")
+        narrations = [line.strip() for line in script.split('\n') if line.strip()]
     
     # Log statistics
     if narrations:
diff --git a/pixelle_video/utils/tts_util.py b/pixelle_video/utils/tts_util.py
index b57652c..5c32832 100644
--- a/pixelle_video/utils/tts_util.py
+++ b/pixelle_video/utils/tts_util.py
@@ -22,6 +22,7 @@ import ssl
 import random
 import certifi
 import edge_tts as edge_tts_sdk
+from edge_tts.exceptions import NoAudioReceived
 from loguru import logger
 from aiohttp import WSServerHandshakeError, ClientResponseError
 
@@ -29,8 +30,8 @@ from aiohttp import WSServerHandshakeError, ClientResponseError
 # Use certifi bundle for SSL verification instead of disabling it
 _USE_CERTIFI_SSL = True
 
-# Retry configuration for Edge TTS (to handle 401 errors)
-_RETRY_COUNT = 10       # Default retry count (increased from 3 to 5)
+# Retry configuration for Edge TTS (to handle 401 errors and NoAudioReceived)
+_RETRY_COUNT = 5           # Default retry count
 _RETRY_BASE_DELAY = 1.0     # Base retry delay in seconds (for exponential backoff)
 _MAX_RETRY_DELAY = 10.0     # Maximum retry delay in seconds
 
@@ -199,6 +200,18 @@ async def edge_tts(
                     raise
                 # Otherwise, continue to next retry
             
+            except NoAudioReceived as e:
+                # NoAudioReceived is often a temporary issue - retry with longer delay
+                last_error = e
+                logger.warning(f"⚠️  Edge TTS NoAudioReceived (attempt {attempt + 1}/{retry_count + 1})")
+                logger.debug(f"This is usually a temporary Microsoft service issue. Will retry with longer delay...")
+                
+                if attempt >= retry_count:
+                    logger.error(f"❌ All {retry_count + 1} attempts failed due to NoAudioReceived")
+                    raise
+                # Add extra delay for NoAudioReceived errors
+                await asyncio.sleep(2.0)
+            
             except Exception as e:
                 # Other errors - don't retry, raise immediately
                 logger.error(f"Edge TTS error (non-retryable): {type(e).__name__} - {e}")
diff --git a/templates/1080x1920/static_default.html b/templates/1080x1920/static_default.html
index 5d8f496..3f61011 100644
--- a/templates/1080x1920/static_default.html
+++ b/templates/1080x1920/static_default.html
@@ -158,6 +158,7 @@
             display: flex;
             align-items: center;
             justify-content: center;
+            white-space: pre-line;  /* Preserve line breaks from \n */
         }
         
         /* Quote marks */
diff --git a/web/components/content_input.py b/web/components/content_input.py
index a283c90..7076451 100644
--- a/web/components/content_input.py
+++ b/web/components/content_input.py
@@ -59,6 +59,23 @@ def render_content_input():
                 help=text_help
             )
             
+            # Split mode selector (only show in fixed mode)
+            if mode == "fixed":
+                split_mode_options = {
+                    "paragraph": tr("split.mode_paragraph"),
+                    "line": tr("split.mode_line"),
+                    "sentence": tr("split.mode_sentence"),
+                }
+                split_mode = st.selectbox(
+                    tr("split.mode_label"),
+                    options=list(split_mode_options.keys()),
+                    format_func=lambda x: split_mode_options[x],
+                    index=0,  # Default to paragraph mode
+                    help=tr("split.mode_help")
+                )
+            else:
+                split_mode = "paragraph"  # Default for generate mode (not used)
+            
             # Title input (optional for both modes)
             title = st.text_input(
                 tr("input.title"),
@@ -87,7 +104,8 @@ def render_content_input():
                 "mode": mode,
                 "text": text,
                 "title": title,
-                "n_scenes": n_scenes
+                "n_scenes": n_scenes,
+                "split_mode": split_mode
             }
         
         else:
diff --git a/web/components/output_preview.py b/web/components/output_preview.py
index 6b0b8cc..19d328a 100644
--- a/web/components/output_preview.py
+++ b/web/components/output_preview.py
@@ -47,6 +47,7 @@ def render_single_output(pixelle_video, video_params):
     mode = video_params.get("mode", "generate")
     title = video_params.get("title")
     n_scenes = video_params.get("n_scenes", 5)
+    split_mode = video_params.get("split_mode", "paragraph")
     bgm_path = video_params.get("bgm_path")
     bgm_volume = video_params.get("bgm_volume", 0.2)
     
@@ -129,6 +130,7 @@ def render_single_output(pixelle_video, video_params):
                     "mode": mode,
                     "title": title if title else None,
                     "n_scenes": n_scenes,
+                    "split_mode": split_mode,
                     "media_workflow": workflow_key,
                     "frame_template": frame_template,
                     "prompt_prefix": prompt_prefix,
diff --git a/web/components/style_config.py b/web/components/style_config.py
index c73737e..00614f1 100644
--- a/web/components/style_config.py
+++ b/web/components/style_config.py
@@ -345,6 +345,13 @@ def render_style_config(pixelle_video):
         if 'selected_template' not in st.session_state:
             st.session_state['selected_template'] = type_specific_default
         
+        # Track last selected template type to detect type changes
+        last_template_type = st.session_state.get('last_template_type', None)
+        if last_template_type != selected_template_type:
+            # Template type changed, reset to type-specific default
+            st.session_state['selected_template'] = type_specific_default
+            st.session_state['last_template_type'] = selected_template_type
+        
         # Collect size groups and prepare tabs
         size_groups = []
         size_labels = []
diff --git a/web/i18n/locales/en_US.json b/web/i18n/locales/en_US.json
index ec64f67..35535a7 100644
--- a/web/i18n/locales/en_US.json
+++ b/web/i18n/locales/en_US.json
@@ -20,9 +20,14 @@
     "input.topic_help": "Enter a topic, AI will generate content based on it",
     "input.text": "Text Input",
     "input.text_help_generate": "Enter topic or theme (AI will create narrations)",
-    "input.text_help_fixed": "Enter complete narration script (used directly without modification, one narration per line)",
+    "input.text_help_fixed": "Enter complete narration script (used directly without modification)",
+    "split.mode_label": "Split Strategy",
+    "split.mode_help": "Choose how to split the text into video segments",
+    "split.mode_paragraph": "📄 By Paragraph (\\n\\n)",
+    "split.mode_line": "📝 By Line (\\n)",
+    "split.mode_sentence": "✂️ By Sentence (。.!?)",
     "input.content": "Content",
-    "input.content_placeholder": "Used directly without modification, one narration per line\nExample:\nHello everyone, today I'll share three study tips\nThe first tip is focus training, meditate for 10 minutes daily\nThe second tip is active recall, review immediately after learning",
+    "input.content_placeholder": "Used directly without modification (split by strategy below)\nExample:\nHello everyone, today I'll share three study tips.\n\nThe first tip is focus training, meditate for 10 minutes daily.\n\nThe second tip is active recall, review immediately after learning.",
     "input.content_help": "Provide your own content for video generation",
     "input.title": "Title (Optional)",
     "input.title_placeholder": "Video title (auto-generated if empty)",
diff --git a/web/i18n/locales/zh_CN.json b/web/i18n/locales/zh_CN.json
index a979bc6..3c1feba 100644
--- a/web/i18n/locales/zh_CN.json
+++ b/web/i18n/locales/zh_CN.json
@@ -20,9 +20,14 @@
     "input.topic_help": "输入一个主题，AI 将根据主题生成内容",
     "input.text": "文本输入",
     "input.text_help_generate": "输入主题或话题（AI 将创作旁白）",
-    "input.text_help_fixed": "输入完整的旁白脚本（直接使用，不做改写，每行一个旁白）",
+    "input.text_help_fixed": "输入完整的旁白脚本（直接使用，不做改写）",
+    "split.mode_label": "分割方式",
+    "split.mode_help": "选择如何将文本分割为视频片段",
+    "split.mode_paragraph": "📄 按段落（\\n\\n）",
+    "split.mode_line": "📝 按行（\\n）",
+    "split.mode_sentence": "✂️ 按句号（。.!?）",
     "input.content": "内容",
-    "input.content_placeholder": "直接使用，不做改写，每行一个旁白\n例如：\n大家好，今天跟你分享三个学习技巧\n第一个技巧是专注力训练，每天冥想10分钟\n第二个技巧是主动回忆，学完立即复述",
+    "input.content_placeholder": "直接使用，不做改写（根据下方分割方式切分）\n例如：\n大家好，今天跟你分享三个学习技巧。\n\n第一个技巧是专注力训练，每天冥想10分钟。\n\n第二个技巧是主动回忆，学完立即复述。",
     "input.content_help": "提供您自己的内容用于视频生成",
     "input.title": "标题（可选）",
     "input.title_placeholder": "视频标题（留空则自动生成）",