支持固定脚本多种分割方式(段落/行/句子),修复Edge TTS和模板切换问题

This commit is contained in:
puke
2025-12-08 16:59:02 +08:00
parent ea48c4838c
commit 3cf6628022
9 changed files with 100 additions and 15 deletions

View File

@@ -125,8 +125,9 @@ class StandardPipeline(LinearVideoPipeline):
logger.info(f"✅ Generated {len(ctx.narrations)} narrations") logger.info(f"✅ Generated {len(ctx.narrations)} narrations")
else: # fixed else: # fixed
self._report_progress(ctx.progress_callback, "splitting_script", 0.05) self._report_progress(ctx.progress_callback, "splitting_script", 0.05)
ctx.narrations = await split_narration_script(text) split_mode = ctx.params.get("split_mode", "paragraph")
logger.info(f"✅ Split script into {len(ctx.narrations)} segments (by lines)") ctx.narrations = await split_narration_script(text, split_mode=split_mode)
logger.info(f"✅ Split script into {len(ctx.narrations)} segments (mode={split_mode})")
logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode") logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode")
async def determine_title(self, ctx: PipelineContext): async def determine_title(self, ctx: PipelineContext):

View File

@@ -208,22 +208,55 @@ async def generate_narrations_from_content(
async def split_narration_script( async def split_narration_script(
script: str, script: str,
split_mode: Literal["paragraph", "line", "sentence"] = "paragraph",
) -> List[str]: ) -> List[str]:
""" """
Split user-provided narration script into segments by lines Split user-provided narration script into segments
Args: Args:
script: Fixed narration script (each line is a narration) script: Fixed narration script
split_mode: Splitting strategy
- "paragraph": Split by double newline (\\n\\n), preserve single newlines within paragraphs
- "line": Split by single newline (\\n), each line is a segment
- "sentence": Split by sentence-ending punctuation (。.!?)
Returns: Returns:
List of narration segments List of narration segments
""" """
logger.info(f"Splitting script by lines (length: {len(script)} chars)") logger.info(f"Splitting script (mode={split_mode}, length={len(script)} chars)")
# Split by newline, filter empty lines narrations = []
narrations = [line.strip() for line in script.split('\n') if line.strip()]
logger.info(f"✅ Split script into {len(narrations)} segments (by lines)") if split_mode == "paragraph":
# Split by double newline (paragraph mode)
# Preserve single newlines within paragraphs
paragraphs = re.split(r'\n\s*\n', script)
for para in paragraphs:
# Only strip leading/trailing whitespace, preserve internal newlines
cleaned = para.strip()
if cleaned:
narrations.append(para)
logger.info(f"✅ Split script into {len(narrations)} segments (by paragraph)")
elif split_mode == "line":
# Split by single newline (original behavior)
narrations = [line.strip() for line in script.split('\n') if line.strip()]
logger.info(f"✅ Split script into {len(narrations)} segments (by line)")
elif split_mode == "sentence":
# Split by sentence-ending punctuation
# Supports Chinese (。!?) and English (.!?)
# Use regex to split while keeping sentences intact
cleaned = re.sub(r'\s+', ' ', script.strip())
# Split on sentence-ending punctuation, keeping the punctuation with the sentence
sentences = re.split(r'(?<=[。.!?])\s*', cleaned)
narrations = [s.strip() for s in sentences if s.strip()]
logger.info(f"✅ Split script into {len(narrations)} segments (by sentence)")
else:
# Fallback to line mode
logger.warning(f"Unknown split_mode '{split_mode}', falling back to 'line'")
narrations = [line.strip() for line in script.split('\n') if line.strip()]
# Log statistics # Log statistics
if narrations: if narrations:

View File

@@ -22,6 +22,7 @@ import ssl
import random import random
import certifi import certifi
import edge_tts as edge_tts_sdk import edge_tts as edge_tts_sdk
from edge_tts.exceptions import NoAudioReceived
from loguru import logger from loguru import logger
from aiohttp import WSServerHandshakeError, ClientResponseError from aiohttp import WSServerHandshakeError, ClientResponseError
@@ -29,8 +30,8 @@ from aiohttp import WSServerHandshakeError, ClientResponseError
# Use certifi bundle for SSL verification instead of disabling it # Use certifi bundle for SSL verification instead of disabling it
_USE_CERTIFI_SSL = True _USE_CERTIFI_SSL = True
# Retry configuration for Edge TTS (to handle 401 errors) # Retry configuration for Edge TTS (to handle 401 errors and NoAudioReceived)
_RETRY_COUNT = 10 # Default retry count (increased from 3 to 5) _RETRY_COUNT = 5 # Default retry count
_RETRY_BASE_DELAY = 1.0 # Base retry delay in seconds (for exponential backoff) _RETRY_BASE_DELAY = 1.0 # Base retry delay in seconds (for exponential backoff)
_MAX_RETRY_DELAY = 10.0 # Maximum retry delay in seconds _MAX_RETRY_DELAY = 10.0 # Maximum retry delay in seconds
@@ -199,6 +200,18 @@ async def edge_tts(
raise raise
# Otherwise, continue to next retry # Otherwise, continue to next retry
except NoAudioReceived as e:
# NoAudioReceived is often a temporary issue - retry with longer delay
last_error = e
logger.warning(f"⚠️ Edge TTS NoAudioReceived (attempt {attempt + 1}/{retry_count + 1})")
logger.debug(f"This is usually a temporary Microsoft service issue. Will retry with longer delay...")
if attempt >= retry_count:
logger.error(f"❌ All {retry_count + 1} attempts failed due to NoAudioReceived")
raise
# Add extra delay for NoAudioReceived errors
await asyncio.sleep(2.0)
except Exception as e: except Exception as e:
# Other errors - don't retry, raise immediately # Other errors - don't retry, raise immediately
logger.error(f"Edge TTS error (non-retryable): {type(e).__name__} - {e}") logger.error(f"Edge TTS error (non-retryable): {type(e).__name__} - {e}")

View File

@@ -158,6 +158,7 @@
display: flex; display: flex;
align-items: center; align-items: center;
justify-content: center; justify-content: center;
white-space: pre-line; /* Preserve line breaks from \n */
} }
/* Quote marks */ /* Quote marks */

View File

@@ -59,6 +59,23 @@ def render_content_input():
help=text_help help=text_help
) )
# Split mode selector (only show in fixed mode)
if mode == "fixed":
split_mode_options = {
"paragraph": tr("split.mode_paragraph"),
"line": tr("split.mode_line"),
"sentence": tr("split.mode_sentence"),
}
split_mode = st.selectbox(
tr("split.mode_label"),
options=list(split_mode_options.keys()),
format_func=lambda x: split_mode_options[x],
index=0, # Default to paragraph mode
help=tr("split.mode_help")
)
else:
split_mode = "paragraph" # Default for generate mode (not used)
# Title input (optional for both modes) # Title input (optional for both modes)
title = st.text_input( title = st.text_input(
tr("input.title"), tr("input.title"),
@@ -87,7 +104,8 @@ def render_content_input():
"mode": mode, "mode": mode,
"text": text, "text": text,
"title": title, "title": title,
"n_scenes": n_scenes "n_scenes": n_scenes,
"split_mode": split_mode
} }
else: else:

View File

@@ -47,6 +47,7 @@ def render_single_output(pixelle_video, video_params):
mode = video_params.get("mode", "generate") mode = video_params.get("mode", "generate")
title = video_params.get("title") title = video_params.get("title")
n_scenes = video_params.get("n_scenes", 5) n_scenes = video_params.get("n_scenes", 5)
split_mode = video_params.get("split_mode", "paragraph")
bgm_path = video_params.get("bgm_path") bgm_path = video_params.get("bgm_path")
bgm_volume = video_params.get("bgm_volume", 0.2) bgm_volume = video_params.get("bgm_volume", 0.2)
@@ -129,6 +130,7 @@ def render_single_output(pixelle_video, video_params):
"mode": mode, "mode": mode,
"title": title if title else None, "title": title if title else None,
"n_scenes": n_scenes, "n_scenes": n_scenes,
"split_mode": split_mode,
"media_workflow": workflow_key, "media_workflow": workflow_key,
"frame_template": frame_template, "frame_template": frame_template,
"prompt_prefix": prompt_prefix, "prompt_prefix": prompt_prefix,

View File

@@ -345,6 +345,13 @@ def render_style_config(pixelle_video):
if 'selected_template' not in st.session_state: if 'selected_template' not in st.session_state:
st.session_state['selected_template'] = type_specific_default st.session_state['selected_template'] = type_specific_default
# Track last selected template type to detect type changes
last_template_type = st.session_state.get('last_template_type', None)
if last_template_type != selected_template_type:
# Template type changed, reset to type-specific default
st.session_state['selected_template'] = type_specific_default
st.session_state['last_template_type'] = selected_template_type
# Collect size groups and prepare tabs # Collect size groups and prepare tabs
size_groups = [] size_groups = []
size_labels = [] size_labels = []

View File

@@ -20,9 +20,14 @@
"input.topic_help": "Enter a topic, AI will generate content based on it", "input.topic_help": "Enter a topic, AI will generate content based on it",
"input.text": "Text Input", "input.text": "Text Input",
"input.text_help_generate": "Enter topic or theme (AI will create narrations)", "input.text_help_generate": "Enter topic or theme (AI will create narrations)",
"input.text_help_fixed": "Enter complete narration script (used directly without modification, one narration per line)", "input.text_help_fixed": "Enter complete narration script (used directly without modification)",
"split.mode_label": "Split Strategy",
"split.mode_help": "Choose how to split the text into video segments",
"split.mode_paragraph": "📄 By Paragraph (\\n\\n)",
"split.mode_line": "📝 By Line (\\n)",
"split.mode_sentence": "✂️ By Sentence (。.!?)",
"input.content": "Content", "input.content": "Content",
"input.content_placeholder": "Used directly without modification, one narration per line\nExample:\nHello everyone, today I'll share three study tips\nThe first tip is focus training, meditate for 10 minutes daily\nThe second tip is active recall, review immediately after learning", "input.content_placeholder": "Used directly without modification (split by strategy below)\nExample:\nHello everyone, today I'll share three study tips.\n\nThe first tip is focus training, meditate for 10 minutes daily.\n\nThe second tip is active recall, review immediately after learning.",
"input.content_help": "Provide your own content for video generation", "input.content_help": "Provide your own content for video generation",
"input.title": "Title (Optional)", "input.title": "Title (Optional)",
"input.title_placeholder": "Video title (auto-generated if empty)", "input.title_placeholder": "Video title (auto-generated if empty)",

View File

@@ -20,9 +20,14 @@
"input.topic_help": "输入一个主题AI 将根据主题生成内容", "input.topic_help": "输入一个主题AI 将根据主题生成内容",
"input.text": "文本输入", "input.text": "文本输入",
"input.text_help_generate": "输入主题或话题AI 将创作旁白)", "input.text_help_generate": "输入主题或话题AI 将创作旁白)",
"input.text_help_fixed": "输入完整的旁白脚本(直接使用,不做改写,每行一个旁白", "input.text_help_fixed": "输入完整的旁白脚本(直接使用,不做改写)",
"split.mode_label": "分割方式",
"split.mode_help": "选择如何将文本分割为视频片段",
"split.mode_paragraph": "📄 按段落(\\n\\n",
"split.mode_line": "📝 按行(\\n",
"split.mode_sentence": "✂️ 按句号(。.!?",
"input.content": "内容", "input.content": "内容",
"input.content_placeholder": "直接使用,不做改写,每行一个旁白\n例如\n大家好今天跟你分享三个学习技巧\n第一个技巧是专注力训练每天冥想10分钟\n第二个技巧是主动回忆学完立即复述", "input.content_placeholder": "直接使用,不做改写(根据下方分割方式切分)\n例如\n大家好今天跟你分享三个学习技巧。\n\n第一个技巧是专注力训练每天冥想10分钟。\n\n第二个技巧是主动回忆学完立即复述",
"input.content_help": "提供您自己的内容用于视频生成", "input.content_help": "提供您自己的内容用于视频生成",
"input.title": "标题(可选)", "input.title": "标题(可选)",
"input.title_placeholder": "视频标题(留空则自动生成)", "input.title_placeholder": "视频标题(留空则自动生成)",