Files
AI-Video/pixelle_video/tts_voices.py
2025-11-07 16:59:47 +08:00

160 lines
4.2 KiB
Python

# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
TTS Voice Configuration
Defines available voices for local Edge TTS inference.
"""
from typing import List, Dict, Any
# Edge TTS voice presets for local inference
EDGE_TTS_VOICES: List[Dict[str, Any]] = [
# Chinese voices
{
"id": "zh-CN-XiaoxiaoNeural",
"label_key": "tts.voice.zh_CN_XiaoxiaoNeural",
"locale": "zh-CN",
"gender": "female"
},
{
"id": "zh-CN-XiaoyiNeural",
"label_key": "tts.voice.zh_CN_XiaoyiNeural",
"locale": "zh-CN",
"gender": "female"
},
{
"id": "zh-CN-YunjianNeural",
"label_key": "tts.voice.zh_CN_YunjianNeural",
"locale": "zh-CN",
"gender": "male"
},
{
"id": "zh-CN-YunxiNeural",
"label_key": "tts.voice.zh_CN_YunxiNeural",
"locale": "zh-CN",
"gender": "male"
},
{
"id": "zh-CN-YunyangNeural",
"label_key": "tts.voice.zh_CN_YunyangNeural",
"locale": "zh-CN",
"gender": "male"
},
{
"id": "zh-CN-YunyeNeural",
"label_key": "tts.voice.zh_CN_YunyeNeural",
"locale": "zh-CN",
"gender": "male"
},
{
"id": "zh-CN-YunfengNeural",
"label_key": "tts.voice.zh_CN_YunfengNeural",
"locale": "zh-CN",
"gender": "male"
},
{
"id": "zh-CN-liaoning-XiaobeiNeural",
"label_key": "tts.voice.zh_CN_liaoning_XiaobeiNeural",
"locale": "zh-CN",
"gender": "female"
},
# English voices
{
"id": "en-US-AriaNeural",
"label_key": "tts.voice.en_US_AriaNeural",
"locale": "en-US",
"gender": "female"
},
{
"id": "en-US-JennyNeural",
"label_key": "tts.voice.en_US_JennyNeural",
"locale": "en-US",
"gender": "female"
},
{
"id": "en-US-GuyNeural",
"label_key": "tts.voice.en_US_GuyNeural",
"locale": "en-US",
"gender": "male"
},
{
"id": "en-US-DavisNeural",
"label_key": "tts.voice.en_US_DavisNeural",
"locale": "en-US",
"gender": "male"
},
{
"id": "en-GB-SoniaNeural",
"label_key": "tts.voice.en_GB_SoniaNeural",
"locale": "en-GB",
"gender": "female"
},
{
"id": "en-GB-RyanNeural",
"label_key": "tts.voice.en_GB_RyanNeural",
"locale": "en-GB",
"gender": "male"
},
]
def get_voice_display_name(voice_id: str, tr_func=None, locale: str = "zh_CN") -> str:
"""
Get display name for voice
Args:
voice_id: Voice ID (e.g., "zh-CN-YunjianNeural")
tr_func: Translation function (optional)
locale: Current locale (default: "zh_CN")
Returns:
Display name (translated label if in Chinese, otherwise voice ID)
"""
# Find voice config
voice_config = next((v for v in EDGE_TTS_VOICES if v["id"] == voice_id), None)
if not voice_config:
return voice_id
# If Chinese locale and translation function available, use translated label
if locale == "zh_CN" and tr_func:
label_key = voice_config["label_key"]
return tr_func(label_key)
# For other locales, return voice ID
return voice_id
def speed_to_rate(speed: float) -> str:
"""
Convert speed multiplier to Edge TTS rate parameter
Args:
speed: Speed multiplier (1.0 = normal, 1.2 = 120%)
Returns:
Rate string (e.g., "+20%", "-10%")
Examples:
1.0 → "+0%"
1.2 → "+20%"
0.8 → "-20%"
"""
percentage = int((speed - 1.0) * 100)
sign = "+" if percentage >= 0 else ""
return f"{sign}{percentage}%"