From 3cf662802209184c0e6fd9c5385f4b24edb89851 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Mon, 8 Dec 2025 16:59:02 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E5=9B=BA=E5=AE=9A=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=E5=A4=9A=E7=A7=8D=E5=88=86=E5=89=B2=E6=96=B9=E5=BC=8F?= =?UTF-8?q?(=E6=AE=B5=E8=90=BD/=E8=A1=8C/=E5=8F=A5=E5=AD=90)=EF=BC=8C?= =?UTF-8?q?=E4=BF=AE=E5=A4=8DEdge=20TTS=E5=92=8C=E6=A8=A1=E6=9D=BF?= =?UTF-8?q?=E5=88=87=E6=8D=A2=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pixelle_video/pipelines/standard.py | 5 ++- pixelle_video/utils/content_generators.py | 45 ++++++++++++++++++++--- pixelle_video/utils/tts_util.py | 17 ++++++++- templates/1080x1920/static_default.html | 1 + web/components/content_input.py | 20 +++++++++- web/components/output_preview.py | 2 + web/components/style_config.py | 7 ++++ web/i18n/locales/en_US.json | 9 ++++- web/i18n/locales/zh_CN.json | 9 ++++- 9 files changed, 100 insertions(+), 15 deletions(-) diff --git a/pixelle_video/pipelines/standard.py b/pixelle_video/pipelines/standard.py index f8f80f9..f165d66 100644 --- a/pixelle_video/pipelines/standard.py +++ b/pixelle_video/pipelines/standard.py @@ -125,8 +125,9 @@ class StandardPipeline(LinearVideoPipeline): logger.info(f"✅ Generated {len(ctx.narrations)} narrations") else: # fixed self._report_progress(ctx.progress_callback, "splitting_script", 0.05) - ctx.narrations = await split_narration_script(text) - logger.info(f"✅ Split script into {len(ctx.narrations)} segments (by lines)") + split_mode = ctx.params.get("split_mode", "paragraph") + ctx.narrations = await split_narration_script(text, split_mode=split_mode) + logger.info(f"✅ Split script into {len(ctx.narrations)} segments (mode={split_mode})") logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode") async def determine_title(self, ctx: PipelineContext): diff --git a/pixelle_video/utils/content_generators.py b/pixelle_video/utils/content_generators.py index 02c1471..8a821f7 100644 --- a/pixelle_video/utils/content_generators.py +++ b/pixelle_video/utils/content_generators.py @@ -208,22 +208,55 @@ async def generate_narrations_from_content( async def split_narration_script( script: str, + split_mode: Literal["paragraph", "line", "sentence"] = "paragraph", ) -> List[str]: """ - Split user-provided narration script into segments by lines + Split user-provided narration script into segments Args: - script: Fixed narration script (each line is a narration) + script: Fixed narration script + split_mode: Splitting strategy + - "paragraph": Split by double newline (\\n\\n), preserve single newlines within paragraphs + - "line": Split by single newline (\\n), each line is a segment + - "sentence": Split by sentence-ending punctuation (。.!?!?) Returns: List of narration segments """ - logger.info(f"Splitting script by lines (length: {len(script)} chars)") + logger.info(f"Splitting script (mode={split_mode}, length={len(script)} chars)") - # Split by newline, filter empty lines - narrations = [line.strip() for line in script.split('\n') if line.strip()] + narrations = [] - logger.info(f"✅ Split script into {len(narrations)} segments (by lines)") + if split_mode == "paragraph": + # Split by double newline (paragraph mode) + # Preserve single newlines within paragraphs + paragraphs = re.split(r'\n\s*\n', script) + for para in paragraphs: + # Only strip leading/trailing whitespace, preserve internal newlines + cleaned = para.strip() + if cleaned: + narrations.append(para) + logger.info(f"✅ Split script into {len(narrations)} segments (by paragraph)") + + elif split_mode == "line": + # Split by single newline (original behavior) + narrations = [line.strip() for line in script.split('\n') if line.strip()] + logger.info(f"✅ Split script into {len(narrations)} segments (by line)") + + elif split_mode == "sentence": + # Split by sentence-ending punctuation + # Supports Chinese (。!?) and English (.!?) + # Use regex to split while keeping sentences intact + cleaned = re.sub(r'\s+', ' ', script.strip()) + # Split on sentence-ending punctuation, keeping the punctuation with the sentence + sentences = re.split(r'(?<=[。.!?!?])\s*', cleaned) + narrations = [s.strip() for s in sentences if s.strip()] + logger.info(f"✅ Split script into {len(narrations)} segments (by sentence)") + + else: + # Fallback to line mode + logger.warning(f"Unknown split_mode '{split_mode}', falling back to 'line'") + narrations = [line.strip() for line in script.split('\n') if line.strip()] # Log statistics if narrations: diff --git a/pixelle_video/utils/tts_util.py b/pixelle_video/utils/tts_util.py index b57652c..5c32832 100644 --- a/pixelle_video/utils/tts_util.py +++ b/pixelle_video/utils/tts_util.py @@ -22,6 +22,7 @@ import ssl import random import certifi import edge_tts as edge_tts_sdk +from edge_tts.exceptions import NoAudioReceived from loguru import logger from aiohttp import WSServerHandshakeError, ClientResponseError @@ -29,8 +30,8 @@ from aiohttp import WSServerHandshakeError, ClientResponseError # Use certifi bundle for SSL verification instead of disabling it _USE_CERTIFI_SSL = True -# Retry configuration for Edge TTS (to handle 401 errors) -_RETRY_COUNT = 10 # Default retry count (increased from 3 to 5) +# Retry configuration for Edge TTS (to handle 401 errors and NoAudioReceived) +_RETRY_COUNT = 5 # Default retry count _RETRY_BASE_DELAY = 1.0 # Base retry delay in seconds (for exponential backoff) _MAX_RETRY_DELAY = 10.0 # Maximum retry delay in seconds @@ -199,6 +200,18 @@ async def edge_tts( raise # Otherwise, continue to next retry + except NoAudioReceived as e: + # NoAudioReceived is often a temporary issue - retry with longer delay + last_error = e + logger.warning(f"⚠️ Edge TTS NoAudioReceived (attempt {attempt + 1}/{retry_count + 1})") + logger.debug(f"This is usually a temporary Microsoft service issue. Will retry with longer delay...") + + if attempt >= retry_count: + logger.error(f"❌ All {retry_count + 1} attempts failed due to NoAudioReceived") + raise + # Add extra delay for NoAudioReceived errors + await asyncio.sleep(2.0) + except Exception as e: # Other errors - don't retry, raise immediately logger.error(f"Edge TTS error (non-retryable): {type(e).__name__} - {e}") diff --git a/templates/1080x1920/static_default.html b/templates/1080x1920/static_default.html index 5d8f496..3f61011 100644 --- a/templates/1080x1920/static_default.html +++ b/templates/1080x1920/static_default.html @@ -158,6 +158,7 @@ display: flex; align-items: center; justify-content: center; + white-space: pre-line; /* Preserve line breaks from \n */ } /* Quote marks */ diff --git a/web/components/content_input.py b/web/components/content_input.py index a283c90..7076451 100644 --- a/web/components/content_input.py +++ b/web/components/content_input.py @@ -59,6 +59,23 @@ def render_content_input(): help=text_help ) + # Split mode selector (only show in fixed mode) + if mode == "fixed": + split_mode_options = { + "paragraph": tr("split.mode_paragraph"), + "line": tr("split.mode_line"), + "sentence": tr("split.mode_sentence"), + } + split_mode = st.selectbox( + tr("split.mode_label"), + options=list(split_mode_options.keys()), + format_func=lambda x: split_mode_options[x], + index=0, # Default to paragraph mode + help=tr("split.mode_help") + ) + else: + split_mode = "paragraph" # Default for generate mode (not used) + # Title input (optional for both modes) title = st.text_input( tr("input.title"), @@ -87,7 +104,8 @@ def render_content_input(): "mode": mode, "text": text, "title": title, - "n_scenes": n_scenes + "n_scenes": n_scenes, + "split_mode": split_mode } else: diff --git a/web/components/output_preview.py b/web/components/output_preview.py index 6b0b8cc..19d328a 100644 --- a/web/components/output_preview.py +++ b/web/components/output_preview.py @@ -47,6 +47,7 @@ def render_single_output(pixelle_video, video_params): mode = video_params.get("mode", "generate") title = video_params.get("title") n_scenes = video_params.get("n_scenes", 5) + split_mode = video_params.get("split_mode", "paragraph") bgm_path = video_params.get("bgm_path") bgm_volume = video_params.get("bgm_volume", 0.2) @@ -129,6 +130,7 @@ def render_single_output(pixelle_video, video_params): "mode": mode, "title": title if title else None, "n_scenes": n_scenes, + "split_mode": split_mode, "media_workflow": workflow_key, "frame_template": frame_template, "prompt_prefix": prompt_prefix, diff --git a/web/components/style_config.py b/web/components/style_config.py index c73737e..00614f1 100644 --- a/web/components/style_config.py +++ b/web/components/style_config.py @@ -345,6 +345,13 @@ def render_style_config(pixelle_video): if 'selected_template' not in st.session_state: st.session_state['selected_template'] = type_specific_default + # Track last selected template type to detect type changes + last_template_type = st.session_state.get('last_template_type', None) + if last_template_type != selected_template_type: + # Template type changed, reset to type-specific default + st.session_state['selected_template'] = type_specific_default + st.session_state['last_template_type'] = selected_template_type + # Collect size groups and prepare tabs size_groups = [] size_labels = [] diff --git a/web/i18n/locales/en_US.json b/web/i18n/locales/en_US.json index ec64f67..35535a7 100644 --- a/web/i18n/locales/en_US.json +++ b/web/i18n/locales/en_US.json @@ -20,9 +20,14 @@ "input.topic_help": "Enter a topic, AI will generate content based on it", "input.text": "Text Input", "input.text_help_generate": "Enter topic or theme (AI will create narrations)", - "input.text_help_fixed": "Enter complete narration script (used directly without modification, one narration per line)", + "input.text_help_fixed": "Enter complete narration script (used directly without modification)", + "split.mode_label": "Split Strategy", + "split.mode_help": "Choose how to split the text into video segments", + "split.mode_paragraph": "📄 By Paragraph (\\n\\n)", + "split.mode_line": "📝 By Line (\\n)", + "split.mode_sentence": "✂️ By Sentence (。.!?)", "input.content": "Content", - "input.content_placeholder": "Used directly without modification, one narration per line\nExample:\nHello everyone, today I'll share three study tips\nThe first tip is focus training, meditate for 10 minutes daily\nThe second tip is active recall, review immediately after learning", + "input.content_placeholder": "Used directly without modification (split by strategy below)\nExample:\nHello everyone, today I'll share three study tips.\n\nThe first tip is focus training, meditate for 10 minutes daily.\n\nThe second tip is active recall, review immediately after learning.", "input.content_help": "Provide your own content for video generation", "input.title": "Title (Optional)", "input.title_placeholder": "Video title (auto-generated if empty)", diff --git a/web/i18n/locales/zh_CN.json b/web/i18n/locales/zh_CN.json index a979bc6..3c1feba 100644 --- a/web/i18n/locales/zh_CN.json +++ b/web/i18n/locales/zh_CN.json @@ -20,9 +20,14 @@ "input.topic_help": "输入一个主题,AI 将根据主题生成内容", "input.text": "文本输入", "input.text_help_generate": "输入主题或话题(AI 将创作旁白)", - "input.text_help_fixed": "输入完整的旁白脚本(直接使用,不做改写,每行一个旁白)", + "input.text_help_fixed": "输入完整的旁白脚本(直接使用,不做改写)", + "split.mode_label": "分割方式", + "split.mode_help": "选择如何将文本分割为视频片段", + "split.mode_paragraph": "📄 按段落(\\n\\n)", + "split.mode_line": "📝 按行(\\n)", + "split.mode_sentence": "✂️ 按句号(。.!?)", "input.content": "内容", - "input.content_placeholder": "直接使用,不做改写,每行一个旁白\n例如:\n大家好,今天跟你分享三个学习技巧\n第一个技巧是专注力训练,每天冥想10分钟\n第二个技巧是主动回忆,学完立即复述", + "input.content_placeholder": "直接使用,不做改写(根据下方分割方式切分)\n例如:\n大家好,今天跟你分享三个学习技巧。\n\n第一个技巧是专注力训练,每天冥想10分钟。\n\n第二个技巧是主动回忆,学完立即复述。", "input.content_help": "提供您自己的内容用于视频生成", "input.title": "标题(可选)", "input.title_placeholder": "视频标题(留空则自动生成)",