支持固定脚本多种分割方式(段落/行/句子)，修复Edge TTS和模板切换问题

2025-12-08 16:59:02 +08:00
parent ea48c4838c
commit 3cf6628022
9 changed files with 100 additions and 15 deletions
--- a/pixelle_video/pipelines/standard.py
+++ b/pixelle_video/pipelines/standard.py
@@ -125,8 +125,9 @@ class StandardPipeline(LinearVideoPipeline):
            logger.info(f"✅ Generated {len(ctx.narrations)} narrations")
        else:  # fixed
            self._report_progress(ctx.progress_callback, "splitting_script", 0.05)
-            ctx.narrations = await split_narration_script(text)
+            split_mode = ctx.params.get("split_mode", "paragraph")
-            logger.info(f"✅ Split script into {len(ctx.narrations)} segments (by lines)")
+            ctx.narrations = await split_narration_script(text, split_mode=split_mode)
            logger.info(f"✅ Split script into {len(ctx.narrations)} segments (mode={split_mode})")
            logger.info(f"   Note: n_scenes={n_scenes} is ignored in fixed mode")
    async def determine_title(self, ctx: PipelineContext):
--- a/pixelle_video/utils/content_generators.py
+++ b/pixelle_video/utils/content_generators.py
@@ -208,22 +208,55 @@ async def generate_narrations_from_content(
 async def split_narration_script(
    script: str,
    split_mode: Literal["paragraph", "line", "sentence"] = "paragraph",
 ) -> List[str]:
    """
-    Split user-provided narration script into segments by lines
+    Split user-provided narration script into segments
    Args:
-        script: Fixed narration script (each line is a narration)
+        script: Fixed narration script
        split_mode: Splitting strategy
            - "paragraph": Split by double newline (\\n\\n), preserve single newlines within paragraphs
            - "line": Split by single newline (\\n), each line is a segment
            - "sentence": Split by sentence-ending punctuation (。.!?！？)
    Returns:
        List of narration segments
    """
-    logger.info(f"Splitting script by lines (length: {len(script)} chars)")
+    logger.info(f"Splitting script (mode={split_mode}, length={len(script)} chars)")
-    # Split by newline, filter empty lines
+    narrations = []
    narrations = [line.strip() for line in script.split('\n') if line.strip()]
-    logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
+    if split_mode == "paragraph":
        # Split by double newline (paragraph mode)
        # Preserve single newlines within paragraphs
        paragraphs = re.split(r'\n\s*\n', script)
        for para in paragraphs:
            # Only strip leading/trailing whitespace, preserve internal newlines
            cleaned = para.strip()
            if cleaned:
                narrations.append(para)
        logger.info(f"✅ Split script into {len(narrations)} segments (by paragraph)")
    elif split_mode == "line":
        # Split by single newline (original behavior)
        narrations = [line.strip() for line in script.split('\n') if line.strip()]
        logger.info(f"✅ Split script into {len(narrations)} segments (by line)")
    elif split_mode == "sentence":
        # Split by sentence-ending punctuation
        # Supports Chinese (。！？) and English (.!?)
        # Use regex to split while keeping sentences intact
        cleaned = re.sub(r'\s+', ' ', script.strip())
        # Split on sentence-ending punctuation, keeping the punctuation with the sentence
        sentences = re.split(r'(?<=[。.!?！？])\s*', cleaned)
        narrations = [s.strip() for s in sentences if s.strip()]
        logger.info(f"✅ Split script into {len(narrations)} segments (by sentence)")
    else:
        # Fallback to line mode
        logger.warning(f"Unknown split_mode '{split_mode}', falling back to 'line'")
        narrations = [line.strip() for line in script.split('\n') if line.strip()]
    # Log statistics
    if narrations:
--- a/pixelle_video/utils/tts_util.py
+++ b/pixelle_video/utils/tts_util.py
@@ -22,6 +22,7 @@ import ssl
 import random
 import certifi
 import edge_tts as edge_tts_sdk
 from edge_tts.exceptions import NoAudioReceived
 from loguru import logger
 from aiohttp import WSServerHandshakeError, ClientResponseError
@@ -29,8 +30,8 @@ from aiohttp import WSServerHandshakeError, ClientResponseError
 # Use certifi bundle for SSL verification instead of disabling it
 _USE_CERTIFI_SSL = True
-# Retry configuration for Edge TTS (to handle 401 errors)
+# Retry configuration for Edge TTS (to handle 401 errors and NoAudioReceived)
-_RETRY_COUNT = 10       # Default retry count (increased from 3 to 5)
+_RETRY_COUNT = 5           # Default retry count
 _RETRY_BASE_DELAY = 1.0     # Base retry delay in seconds (for exponential backoff)
 _MAX_RETRY_DELAY = 10.0     # Maximum retry delay in seconds
@@ -199,6 +200,18 @@ async def edge_tts(
                    raise
                # Otherwise, continue to next retry
            except NoAudioReceived as e:
                # NoAudioReceived is often a temporary issue - retry with longer delay
                last_error = e
                logger.warning(f"⚠️  Edge TTS NoAudioReceived (attempt {attempt + 1}/{retry_count + 1})")
                logger.debug(f"This is usually a temporary Microsoft service issue. Will retry with longer delay...")
                if attempt >= retry_count:
                    logger.error(f"❌ All {retry_count + 1} attempts failed due to NoAudioReceived")
                    raise
                # Add extra delay for NoAudioReceived errors
                await asyncio.sleep(2.0)
            except Exception as e:
                # Other errors - don't retry, raise immediately
                logger.error(f"Edge TTS error (non-retryable): {type(e).__name__} - {e}")
--- a/templates/1080x1920/static_default.html
+++ b/templates/1080x1920/static_default.html
@@ -158,6 +158,7 @@
            display: flex;
            align-items: center;
            justify-content: center;
            white-space: pre-line;  /* Preserve line breaks from \n */
        }
        /* Quote marks */
--- a/web/components/content_input.py
+++ b/web/components/content_input.py
@@ -59,6 +59,23 @@ def render_content_input():
                help=text_help
            )
            # Split mode selector (only show in fixed mode)
            if mode == "fixed":
                split_mode_options = {
                    "paragraph": tr("split.mode_paragraph"),
                    "line": tr("split.mode_line"),
                    "sentence": tr("split.mode_sentence"),
                }
                split_mode = st.selectbox(
                    tr("split.mode_label"),
                    options=list(split_mode_options.keys()),
                    format_func=lambda x: split_mode_options[x],
                    index=0,  # Default to paragraph mode
                    help=tr("split.mode_help")
                )
            else:
                split_mode = "paragraph"  # Default for generate mode (not used)
            # Title input (optional for both modes)
            title = st.text_input(
                tr("input.title"),
@@ -87,7 +104,8 @@ def render_content_input():
                "mode": mode,
                "text": text,
                "title": title,
-                "n_scenes": n_scenes
+                "n_scenes": n_scenes,
                "split_mode": split_mode
            }
        else:
--- a/web/components/output_preview.py
+++ b/web/components/output_preview.py
@@ -47,6 +47,7 @@ def render_single_output(pixelle_video, video_params):
    mode = video_params.get("mode", "generate")
    title = video_params.get("title")
    n_scenes = video_params.get("n_scenes", 5)
    split_mode = video_params.get("split_mode", "paragraph")
    bgm_path = video_params.get("bgm_path")
    bgm_volume = video_params.get("bgm_volume", 0.2)
@@ -129,6 +130,7 @@ def render_single_output(pixelle_video, video_params):
                    "mode": mode,
                    "title": title if title else None,
                    "n_scenes": n_scenes,
                    "split_mode": split_mode,
                    "media_workflow": workflow_key,
                    "frame_template": frame_template,
                    "prompt_prefix": prompt_prefix,
--- a/web/components/style_config.py
+++ b/web/components/style_config.py
@@ -345,6 +345,13 @@ def render_style_config(pixelle_video):
        if 'selected_template' not in st.session_state:
            st.session_state['selected_template'] = type_specific_default
        # Track last selected template type to detect type changes
        last_template_type = st.session_state.get('last_template_type', None)
        if last_template_type != selected_template_type:
            # Template type changed, reset to type-specific default
            st.session_state['selected_template'] = type_specific_default
            st.session_state['last_template_type'] = selected_template_type
        # Collect size groups and prepare tabs
        size_groups = []
        size_labels = []
--- a/web/i18n/locales/en_US.json
+++ b/web/i18n/locales/en_US.json
@@ -20,9 +20,14 @@
    "input.topic_help": "Enter a topic, AI will generate content based on it",
    "input.text": "Text Input",
    "input.text_help_generate": "Enter topic or theme (AI will create narrations)",
-    "input.text_help_fixed": "Enter complete narration script (used directly without modification, one narration per line)",
+    "input.text_help_fixed": "Enter complete narration script (used directly without modification)",
    "split.mode_label": "Split Strategy",
    "split.mode_help": "Choose how to split the text into video segments",
    "split.mode_paragraph": "📄 By Paragraph (\\n\\n)",
    "split.mode_line": "📝 By Line (\\n)",
    "split.mode_sentence": "✂️ By Sentence (。.!?)",
    "input.content": "Content",
-    "input.content_placeholder": "Used directly without modification, one narration per line\nExample:\nHello everyone, today I'll share three study tips\nThe first tip is focus training, meditate for 10 minutes daily\nThe second tip is active recall, review immediately after learning",
+    "input.content_placeholder": "Used directly without modification (split by strategy below)\nExample:\nHello everyone, today I'll share three study tips.\n\nThe first tip is focus training, meditate for 10 minutes daily.\n\nThe second tip is active recall, review immediately after learning.",
    "input.content_help": "Provide your own content for video generation",
    "input.title": "Title (Optional)",
    "input.title_placeholder": "Video title (auto-generated if empty)",
--- a/web/i18n/locales/zh_CN.json
+++ b/web/i18n/locales/zh_CN.json
@@ -20,9 +20,14 @@
    "input.topic_help": "输入一个主题，AI 将根据主题生成内容",
    "input.text": "文本输入",
    "input.text_help_generate": "输入主题或话题（AI 将创作旁白）",
-    "input.text_help_fixed": "输入完整的旁白脚本（直接使用，不做改写，每行一个旁白）",
+    "input.text_help_fixed": "输入完整的旁白脚本（直接使用，不做改写）",
    "split.mode_label": "分割方式",
    "split.mode_help": "选择如何将文本分割为视频片段",
    "split.mode_paragraph": "📄 按段落（\\n\\n）",
    "split.mode_line": "📝 按行（\\n）",
    "split.mode_sentence": "✂️ 按句号（。.!?）",
    "input.content": "内容",
-    "input.content_placeholder": "直接使用，不做改写，每行一个旁白\n例如：\n大家好，今天跟你分享三个学习技巧\n第一个技巧是专注力训练，每天冥想10分钟\n第二个技巧是主动回忆，学完立即复述",
+    "input.content_placeholder": "直接使用，不做改写（根据下方分割方式切分）\n例如：\n大家好，今天跟你分享三个学习技巧。\n\n第一个技巧是专注力训练，每天冥想10分钟。\n\n第二个技巧是主动回忆，学完立即复述。",
    "input.content_help": "提供您自己的内容用于视频生成",
    "input.title": "标题（可选）",
    "input.title_placeholder": "视频标题（留空则自动生成）",