重构capability层
This commit is contained in:
255
reelforge/utils/tts_util.py
Normal file
255
reelforge/utils/tts_util.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""
|
||||
Edge TTS Utility - Temporarily not used
|
||||
|
||||
This is the original edge-tts implementation, kept here for potential future use.
|
||||
Currently, TTS service uses ComfyUI workflows only.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import ssl
|
||||
import edge_tts as edge_tts_sdk
|
||||
from loguru import logger
|
||||
from aiohttp import WSServerHandshakeError, ClientResponseError
|
||||
|
||||
|
||||
# Global flag for SSL verification (set to False for development only)
|
||||
_SSL_VERIFY_ENABLED = False
|
||||
|
||||
# Retry configuration for Edge TTS (to handle 401 errors)
|
||||
_RETRY_COUNT = 3 # Default retry count
|
||||
_RETRY_DELAY = 2.0 # Retry delay in seconds
|
||||
|
||||
|
||||
async def edge_tts(
|
||||
text: str,
|
||||
voice: str = "zh-CN-YunjianNeural",
|
||||
rate: str = "+0%",
|
||||
volume: str = "+0%",
|
||||
pitch: str = "+0Hz",
|
||||
output_path: str = None,
|
||||
retry_count: int = _RETRY_COUNT,
|
||||
retry_delay: float = _RETRY_DELAY,
|
||||
) -> bytes:
|
||||
"""
|
||||
Convert text to speech using Microsoft Edge TTS
|
||||
|
||||
This service is free and requires no API key.
|
||||
Supports 400+ voices across 100+ languages.
|
||||
|
||||
Returns audio data as bytes (MP3 format).
|
||||
|
||||
Includes automatic retry mechanism to handle 401 authentication errors
|
||||
and temporary network issues (default: 3 retries with 2s delay).
|
||||
|
||||
Args:
|
||||
text: Text to convert to speech
|
||||
voice: Voice ID (e.g., zh-CN-YunjianNeural, en-US-JennyNeural)
|
||||
rate: Speech rate (e.g., +0%, +50%, -20%)
|
||||
volume: Speech volume (e.g., +0%, +50%, -20%)
|
||||
pitch: Speech pitch (e.g., +0Hz, +10Hz, -5Hz)
|
||||
output_path: Optional output file path to save audio
|
||||
retry_count: Number of retries on failure (default: 3)
|
||||
retry_delay: Delay between retries in seconds (default: 2.0)
|
||||
|
||||
Returns:
|
||||
Audio data as bytes (MP3 format)
|
||||
|
||||
Popular Chinese voices:
|
||||
- zh-CN-YunjianNeural (male, default)
|
||||
- zh-CN-XiaoxiaoNeural (female)
|
||||
- zh-CN-YunxiNeural (male)
|
||||
- zh-CN-XiaoyiNeural (female)
|
||||
|
||||
Popular English voices:
|
||||
- en-US-JennyNeural (female)
|
||||
- en-US-GuyNeural (male)
|
||||
- en-GB-SoniaNeural (female, British)
|
||||
|
||||
Example:
|
||||
audio_bytes = await edge_tts(
|
||||
text="你好,世界!",
|
||||
voice="zh-CN-YunjianNeural",
|
||||
rate="+20%"
|
||||
)
|
||||
"""
|
||||
logger.debug(f"Calling Edge TTS with voice: {voice}, rate: {rate}, retry_count: {retry_count}")
|
||||
|
||||
last_error = None
|
||||
|
||||
# Retry loop
|
||||
for attempt in range(retry_count + 1): # +1 because first attempt is not a retry
|
||||
try:
|
||||
if attempt > 0:
|
||||
logger.info(f"🔄 Retrying Edge TTS (attempt {attempt + 1}/{retry_count + 1}) after {retry_delay}s delay...")
|
||||
await asyncio.sleep(retry_delay)
|
||||
|
||||
# Monkey patch ssl.create_default_context if SSL verification is disabled
|
||||
if not _SSL_VERIFY_ENABLED:
|
||||
if attempt == 0: # Only log warning once
|
||||
logger.warning("SSL verification is disabled for development. This is NOT recommended for production!")
|
||||
original_create_default_context = ssl.create_default_context
|
||||
|
||||
def create_unverified_context(*args, **kwargs):
|
||||
ctx = original_create_default_context(*args, **kwargs)
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
return ctx
|
||||
|
||||
# Temporarily replace the function
|
||||
ssl.create_default_context = create_unverified_context
|
||||
|
||||
try:
|
||||
# Create communicate instance
|
||||
communicate = edge_tts_sdk.Communicate(
|
||||
text=text,
|
||||
voice=voice,
|
||||
rate=rate,
|
||||
volume=volume,
|
||||
pitch=pitch,
|
||||
)
|
||||
|
||||
# Collect audio chunks
|
||||
audio_chunks = []
|
||||
async for chunk in communicate.stream():
|
||||
if chunk["type"] == "audio":
|
||||
audio_chunks.append(chunk["data"])
|
||||
|
||||
audio_data = b"".join(audio_chunks)
|
||||
|
||||
if attempt > 0:
|
||||
logger.success(f"✅ Retry succeeded on attempt {attempt + 1}")
|
||||
|
||||
logger.info(f"Generated {len(audio_data)} bytes of audio data")
|
||||
|
||||
# Save to file if output_path is provided
|
||||
if output_path:
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(audio_data)
|
||||
logger.info(f"Audio saved to: {output_path}")
|
||||
|
||||
return audio_data
|
||||
|
||||
finally:
|
||||
# Restore original function if we patched it
|
||||
if not _SSL_VERIFY_ENABLED:
|
||||
ssl.create_default_context = original_create_default_context
|
||||
|
||||
except (WSServerHandshakeError, ClientResponseError) as e:
|
||||
# Network/authentication errors - retry
|
||||
last_error = e
|
||||
error_code = getattr(e, 'status', 'unknown')
|
||||
logger.warning(f"⚠️ Edge TTS error (attempt {attempt + 1}/{retry_count + 1}): {error_code} - {e}")
|
||||
|
||||
if attempt >= retry_count:
|
||||
# Last attempt failed
|
||||
logger.error(f"❌ All {retry_count + 1} attempts failed. Giving up.")
|
||||
raise
|
||||
# Otherwise, continue to next retry
|
||||
|
||||
except Exception as e:
|
||||
# Other errors - don't retry, raise immediately
|
||||
logger.error(f"Edge TTS error (non-retryable): {e}")
|
||||
raise
|
||||
|
||||
# Should not reach here, but just in case
|
||||
if last_error:
|
||||
raise last_error
|
||||
else:
|
||||
raise RuntimeError("Edge TTS failed without error (unexpected)")
|
||||
|
||||
|
||||
async def list_voices(locale: str = None, retry_count: int = _RETRY_COUNT, retry_delay: float = _RETRY_DELAY) -> list[str]:
|
||||
"""
|
||||
List all available voices for Edge TTS
|
||||
|
||||
Returns a list of voice IDs (ShortName).
|
||||
Optionally filter by locale.
|
||||
|
||||
Includes automatic retry mechanism to handle network errors
|
||||
(default: 3 retries with 2s delay).
|
||||
|
||||
Args:
|
||||
locale: Filter by locale (e.g., zh-CN, en-US, ja-JP)
|
||||
retry_count: Number of retries on failure (default: 3)
|
||||
retry_delay: Delay between retries in seconds (default: 2.0)
|
||||
|
||||
Returns:
|
||||
List of voice IDs
|
||||
|
||||
Example:
|
||||
# List all voices
|
||||
voices = await list_voices()
|
||||
# Returns: ['zh-CN-YunjianNeural', 'zh-CN-XiaoxiaoNeural', ...]
|
||||
|
||||
# List Chinese voices only
|
||||
voices = await list_voices(locale="zh-CN")
|
||||
# Returns: ['zh-CN-YunjianNeural', 'zh-CN-XiaoxiaoNeural', ...]
|
||||
"""
|
||||
logger.debug(f"Fetching Edge TTS voices, locale filter: {locale}, retry_count: {retry_count}")
|
||||
|
||||
last_error = None
|
||||
|
||||
# Retry loop
|
||||
for attempt in range(retry_count + 1):
|
||||
try:
|
||||
if attempt > 0:
|
||||
logger.info(f"🔄 Retrying list voices (attempt {attempt + 1}/{retry_count + 1}) after {retry_delay}s delay...")
|
||||
await asyncio.sleep(retry_delay)
|
||||
|
||||
# Monkey patch SSL if verification is disabled
|
||||
if not _SSL_VERIFY_ENABLED:
|
||||
if attempt == 0: # Only log warning once
|
||||
logger.warning("SSL verification is disabled for development. This is NOT recommended for production!")
|
||||
original_create_default_context = ssl.create_default_context
|
||||
|
||||
def create_unverified_context(*args, **kwargs):
|
||||
ctx = original_create_default_context(*args, **kwargs)
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
return ctx
|
||||
|
||||
ssl.create_default_context = create_unverified_context
|
||||
|
||||
try:
|
||||
# Get all voices
|
||||
voices = await edge_tts_sdk.list_voices()
|
||||
|
||||
# Filter by locale if specified
|
||||
if locale:
|
||||
voices = [v for v in voices if v["Locale"].startswith(locale)]
|
||||
|
||||
# Extract voice IDs (ShortName)
|
||||
voice_ids = [voice["ShortName"] for voice in voices]
|
||||
|
||||
if attempt > 0:
|
||||
logger.success(f"✅ Retry succeeded on attempt {attempt + 1}")
|
||||
|
||||
logger.info(f"Found {len(voice_ids)} voices" + (f" for locale '{locale}'" if locale else ""))
|
||||
return voice_ids
|
||||
|
||||
finally:
|
||||
# Restore original function if we patched it
|
||||
if not _SSL_VERIFY_ENABLED:
|
||||
ssl.create_default_context = original_create_default_context
|
||||
|
||||
except (WSServerHandshakeError, ClientResponseError) as e:
|
||||
# Network/authentication errors - retry
|
||||
last_error = e
|
||||
error_code = getattr(e, 'status', 'unknown')
|
||||
logger.warning(f"⚠️ List voices error (attempt {attempt + 1}/{retry_count + 1}): {error_code} - {e}")
|
||||
|
||||
if attempt >= retry_count:
|
||||
logger.error(f"❌ All {retry_count + 1} attempts failed. Giving up.")
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
# Other errors - don't retry, raise immediately
|
||||
logger.error(f"List voices error (non-retryable): {e}")
|
||||
raise
|
||||
|
||||
# Should not reach here, but just in case
|
||||
if last_error:
|
||||
raise last_error
|
||||
else:
|
||||
raise RuntimeError("List voices failed without error (unexpected)")
|
||||
|
||||
Reference in New Issue
Block a user