TTS支持参考音频逻辑
This commit is contained in:
65
web/app.py
65
web/app.py
@@ -471,6 +471,28 @@ def main():
|
||||
else:
|
||||
tts_workflow_key = "selfhost/tts_edge.json" # fallback
|
||||
|
||||
# Reference audio upload (optional, for voice cloning)
|
||||
ref_audio_file = st.file_uploader(
|
||||
tr("tts.ref_audio"),
|
||||
type=["mp3", "wav", "flac", "m4a", "aac", "ogg"],
|
||||
help=tr("tts.ref_audio_help"),
|
||||
key="ref_audio_upload"
|
||||
)
|
||||
|
||||
# Save uploaded ref_audio to temp file if provided
|
||||
ref_audio_path = None
|
||||
if ref_audio_file is not None:
|
||||
# Audio preview player (directly play uploaded file)
|
||||
st.audio(ref_audio_file)
|
||||
|
||||
# Save to temp directory
|
||||
import tempfile
|
||||
temp_dir = Path("temp")
|
||||
temp_dir.mkdir(exist_ok=True)
|
||||
ref_audio_path = temp_dir / f"ref_audio_{ref_audio_file.name}"
|
||||
with open(ref_audio_path, "wb") as f:
|
||||
f.write(ref_audio_file.getbuffer())
|
||||
|
||||
# TTS preview expander (simplified, uses default voice and speed)
|
||||
with st.expander(tr("tts.preview_title"), expanded=False):
|
||||
# Preview text input
|
||||
@@ -486,10 +508,15 @@ def main():
|
||||
with st.spinner(tr("tts.previewing")):
|
||||
try:
|
||||
# Generate preview audio using selected workflow (use default voice and speed)
|
||||
audio_path = run_async(pixelle_video.tts(
|
||||
text=preview_text,
|
||||
workflow=tts_workflow_key
|
||||
))
|
||||
# Pass ref_audio if uploaded
|
||||
tts_params = {
|
||||
"text": preview_text,
|
||||
"workflow": tts_workflow_key
|
||||
}
|
||||
if ref_audio_path:
|
||||
tts_params["ref_audio"] = str(ref_audio_path)
|
||||
|
||||
audio_path = run_async(pixelle_video.tts(**tts_params))
|
||||
|
||||
# Play the audio
|
||||
if audio_path:
|
||||
@@ -801,18 +828,24 @@ def main():
|
||||
progress_bar.progress(min(int(event.progress * 100), 99)) # Cap at 99% until complete
|
||||
|
||||
# Generate video (directly pass parameters)
|
||||
result = run_async(pixelle_video.generate_video(
|
||||
text=text,
|
||||
mode=mode,
|
||||
title=title if title else None,
|
||||
n_scenes=n_scenes,
|
||||
tts_workflow=tts_workflow_key, # Pass TTS workflow key
|
||||
image_workflow=workflow_key, # Pass workflow key (e.g., "runninghub/image_flux.json")
|
||||
frame_template=frame_template,
|
||||
prompt_prefix=prompt_prefix, # Pass prompt_prefix
|
||||
bgm_path=bgm_path,
|
||||
progress_callback=update_progress,
|
||||
))
|
||||
video_params = {
|
||||
"text": text,
|
||||
"mode": mode,
|
||||
"title": title if title else None,
|
||||
"n_scenes": n_scenes,
|
||||
"tts_workflow": tts_workflow_key,
|
||||
"image_workflow": workflow_key,
|
||||
"frame_template": frame_template,
|
||||
"prompt_prefix": prompt_prefix,
|
||||
"bgm_path": bgm_path,
|
||||
"progress_callback": update_progress,
|
||||
}
|
||||
|
||||
# Add ref_audio if uploaded
|
||||
if ref_audio_path:
|
||||
video_params["ref_audio"] = str(ref_audio_path)
|
||||
|
||||
result = run_async(pixelle_video.generate_video(**video_params))
|
||||
|
||||
progress_bar.progress(100)
|
||||
status_text.text(tr("status.success"))
|
||||
|
||||
@@ -164,8 +164,10 @@
|
||||
"settings.comfyui.runninghub_api_key_help": "Visit https://runninghub.ai to register and get API Key",
|
||||
|
||||
"tts.selector": "Workflow Selection",
|
||||
"tts.what": "Converts narration text to natural human-like speech",
|
||||
"tts.what": "Converts narration text to natural human-like speech (some workflows support reference audio for voice cloning)",
|
||||
"tts.how": "Place tts_xxx.json workflow files in workflows/selfhost/ (local ComfyUI) or workflows/runninghub/ (cloud) folder",
|
||||
"tts.ref_audio": "Reference Audio",
|
||||
"tts.ref_audio_help": "Upload audio file for voice cloning (only supported by some workflows)",
|
||||
"tts.preview_title": "Preview TTS",
|
||||
"tts.preview_text": "Preview Text",
|
||||
"tts.preview_text_placeholder": "Enter text to preview...",
|
||||
|
||||
@@ -164,8 +164,10 @@
|
||||
"settings.comfyui.runninghub_api_key_help": "访问 https://runninghub.ai 注册并获取 API Key",
|
||||
|
||||
"tts.selector": "工作流选择",
|
||||
"tts.what": "将旁白文本转换为真人般的自然语音",
|
||||
"tts.what": "将旁白文本转换为真人般的自然语音(部分工作流支持参考音频克隆声音)",
|
||||
"tts.how": "将 tts_xxx.json 工作流文件放入 workflows/selfhost/(本地 ComfyUI)或 workflows/runninghub/(云端)文件夹",
|
||||
"tts.ref_audio": "参考音频",
|
||||
"tts.ref_audio_help": "上传音频文件用于声音克隆(仅部分工作流支持)",
|
||||
"tts.preview_title": "预览 TTS",
|
||||
"tts.preview_text": "预览文本",
|
||||
"tts.preview_text_placeholder": "输入要试听的文本...",
|
||||
|
||||
Reference in New Issue
Block a user