TTS支持参考音频逻辑

This commit is contained in:
puke
2025-10-31 15:50:35 +08:00
parent 7c3a49f55b
commit 2fe5e7c0fa
8 changed files with 435 additions and 316 deletions

View File

@@ -471,6 +471,28 @@ def main():
else:
tts_workflow_key = "selfhost/tts_edge.json" # fallback
# Reference audio upload (optional, for voice cloning)
ref_audio_file = st.file_uploader(
tr("tts.ref_audio"),
type=["mp3", "wav", "flac", "m4a", "aac", "ogg"],
help=tr("tts.ref_audio_help"),
key="ref_audio_upload"
)
# Save uploaded ref_audio to temp file if provided
ref_audio_path = None
if ref_audio_file is not None:
# Audio preview player (directly play uploaded file)
st.audio(ref_audio_file)
# Save to temp directory
import tempfile
temp_dir = Path("temp")
temp_dir.mkdir(exist_ok=True)
ref_audio_path = temp_dir / f"ref_audio_{ref_audio_file.name}"
with open(ref_audio_path, "wb") as f:
f.write(ref_audio_file.getbuffer())
# TTS preview expander (simplified, uses default voice and speed)
with st.expander(tr("tts.preview_title"), expanded=False):
# Preview text input
@@ -486,10 +508,15 @@ def main():
with st.spinner(tr("tts.previewing")):
try:
# Generate preview audio using selected workflow (use default voice and speed)
audio_path = run_async(pixelle_video.tts(
text=preview_text,
workflow=tts_workflow_key
))
# Pass ref_audio if uploaded
tts_params = {
"text": preview_text,
"workflow": tts_workflow_key
}
if ref_audio_path:
tts_params["ref_audio"] = str(ref_audio_path)
audio_path = run_async(pixelle_video.tts(**tts_params))
# Play the audio
if audio_path:
@@ -801,18 +828,24 @@ def main():
progress_bar.progress(min(int(event.progress * 100), 99)) # Cap at 99% until complete
# Generate video (directly pass parameters)
result = run_async(pixelle_video.generate_video(
text=text,
mode=mode,
title=title if title else None,
n_scenes=n_scenes,
tts_workflow=tts_workflow_key, # Pass TTS workflow key
image_workflow=workflow_key, # Pass workflow key (e.g., "runninghub/image_flux.json")
frame_template=frame_template,
prompt_prefix=prompt_prefix, # Pass prompt_prefix
bgm_path=bgm_path,
progress_callback=update_progress,
))
video_params = {
"text": text,
"mode": mode,
"title": title if title else None,
"n_scenes": n_scenes,
"tts_workflow": tts_workflow_key,
"image_workflow": workflow_key,
"frame_template": frame_template,
"prompt_prefix": prompt_prefix,
"bgm_path": bgm_path,
"progress_callback": update_progress,
}
# Add ref_audio if uploaded
if ref_audio_path:
video_params["ref_audio"] = str(ref_audio_path)
result = run_async(pixelle_video.generate_video(**video_params))
progress_bar.progress(100)
status_text.text(tr("status.success"))

View File

@@ -164,8 +164,10 @@
"settings.comfyui.runninghub_api_key_help": "Visit https://runninghub.ai to register and get API Key",
"tts.selector": "Workflow Selection",
"tts.what": "Converts narration text to natural human-like speech",
"tts.what": "Converts narration text to natural human-like speech (some workflows support reference audio for voice cloning)",
"tts.how": "Place tts_xxx.json workflow files in workflows/selfhost/ (local ComfyUI) or workflows/runninghub/ (cloud) folder",
"tts.ref_audio": "Reference Audio",
"tts.ref_audio_help": "Upload audio file for voice cloning (only supported by some workflows)",
"tts.preview_title": "Preview TTS",
"tts.preview_text": "Preview Text",
"tts.preview_text_placeholder": "Enter text to preview...",

View File

@@ -164,8 +164,10 @@
"settings.comfyui.runninghub_api_key_help": "访问 https://runninghub.ai 注册并获取 API Key",
"tts.selector": "工作流选择",
"tts.what": "将旁白文本转换为真人般的自然语音",
"tts.what": "将旁白文本转换为真人般的自然语音(部分工作流支持参考音频克隆声音)",
"tts.how": "将 tts_xxx.json 工作流文件放入 workflows/selfhost/(本地 ComfyUI或 workflows/runninghub/(云端)文件夹",
"tts.ref_audio": "参考音频",
"tts.ref_audio_help": "上传音频文件用于声音克隆(仅部分工作流支持)",
"tts.preview_title": "预览 TTS",
"tts.preview_text": "预览文本",
"tts.preview_text_placeholder": "输入要试听的文本...",