TTS支持参考音频逻辑

This commit is contained in:
puke
2025-10-31 15:50:35 +08:00
parent 7c3a49f55b
commit 2fe5e7c0fa
8 changed files with 435 additions and 316 deletions

View File

@@ -29,6 +29,7 @@ class StoryboardConfig:
voice_id: str = "[Chinese] zh-CN Yunjian" # Default voice
tts_workflow: Optional[str] = None # TTS workflow filename (None = use default)
tts_speed: float = 1.2 # TTS speed multiplier (1.0 = normal, >1.0 = faster)
ref_audio: Optional[str] = None # Reference audio for voice cloning (only some workflows support this)
# Image parameters
image_width: int = 1024

View File

@@ -125,13 +125,19 @@ class FrameProcessor:
output_path = get_task_frame_path(config.task_id, frame.index, "audio")
# Call TTS with specific output path and workflow
audio_path = await self.core.tts(
text=frame.narration,
workflow=config.tts_workflow, # Use workflow from config
voice=config.voice_id,
speed=config.tts_speed, # Use speed (not rate) from config
output_path=output_path,
)
tts_params = {
"text": frame.narration,
"workflow": config.tts_workflow,
"voice": config.voice_id,
"speed": config.tts_speed,
"output_path": output_path,
}
# Add ref_audio if provided
if config.ref_audio:
tts_params["ref_audio"] = config.ref_audio
audio_path = await self.core.tts(**tts_params)
frame.audio_path = audio_path

View File

@@ -57,6 +57,7 @@ class VideoGeneratorService:
voice_id: str = "[Chinese] zh-CN Yunjian",
tts_workflow: Optional[str] = None,
tts_speed: float = 1.2,
ref_audio: Optional[str] = None, # Reference audio for voice cloning
output_path: Optional[str] = None,
# === LLM Parameters ===
@@ -225,6 +226,7 @@ class VideoGeneratorService:
voice_id=voice_id,
tts_workflow=tts_workflow,
tts_speed=tts_speed,
ref_audio=ref_audio,
image_width=image_width,
image_height=image_height,
image_workflow=image_workflow,