From 3d8cbe72e2359d7ec317dde0a6cfacd82d54264b Mon Sep 17 00:00:00 2001 From: puke Date: Tue, 28 Oct 2025 14:16:12 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96edgetts=E7=9A=84=E9=87=8D?= =?UTF-8?q?=E8=AF=95=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- reelforge/utils/tts_util.py | 179 +++++++++++++++++++++++------------- 1 file changed, 115 insertions(+), 64 deletions(-) diff --git a/reelforge/utils/tts_util.py b/reelforge/utils/tts_util.py index 4089841..1e5ae3b 100644 --- a/reelforge/utils/tts_util.py +++ b/reelforge/utils/tts_util.py @@ -7,6 +7,7 @@ Currently, TTS service uses ComfyUI workflows only. import asyncio import ssl +import random import edge_tts as edge_tts_sdk from loguru import logger from aiohttp import WSServerHandshakeError, ClientResponseError @@ -16,8 +17,16 @@ from aiohttp import WSServerHandshakeError, ClientResponseError _SSL_VERIFY_ENABLED = False # Retry configuration for Edge TTS (to handle 401 errors) -_RETRY_COUNT = 3 # Default retry count -_RETRY_DELAY = 2.0 # Retry delay in seconds +_RETRY_COUNT = 5 # Default retry count (increased from 3 to 5) +_RETRY_BASE_DELAY = 1.0 # Base retry delay in seconds (for exponential backoff) +_MAX_RETRY_DELAY = 10.0 # Maximum retry delay in seconds + +# Rate limiting configuration +_REQUEST_DELAY = 0.5 # Minimum delay before each request (seconds) +_MAX_CONCURRENT_REQUESTS = 3 # Maximum concurrent requests + +# Global semaphore for rate limiting +_request_semaphore = asyncio.Semaphore(_MAX_CONCURRENT_REQUESTS) async def edge_tts( @@ -28,7 +37,7 @@ async def edge_tts( pitch: str = "+0Hz", output_path: str = None, retry_count: int = _RETRY_COUNT, - retry_delay: float = _RETRY_DELAY, + retry_base_delay: float = _RETRY_BASE_DELAY, ) -> bytes: """ Convert text to speech using Microsoft Edge TTS @@ -38,8 +47,9 @@ async def edge_tts( Returns audio data as bytes (MP3 format). - Includes automatic retry mechanism to handle 401 authentication errors - and temporary network issues (default: 3 retries with 2s delay). + Includes automatic retry mechanism with exponential backoff and jitter + to handle 401 authentication errors and temporary network issues. + Also includes concurrent request limiting and rate limiting. Args: text: Text to convert to speech @@ -48,8 +58,8 @@ async def edge_tts( volume: Speech volume (e.g., +0%, +50%, -20%) pitch: Speech pitch (e.g., +0Hz, +10Hz, -5Hz) output_path: Optional output file path to save audio - retry_count: Number of retries on failure (default: 3) - retry_delay: Delay between retries in seconds (default: 2.0) + retry_count: Number of retries on failure (default: 5) + retry_base_delay: Base delay for exponential backoff (default: 1.0s) Returns: Audio data as bytes (MP3 format) @@ -74,14 +84,27 @@ async def edge_tts( """ logger.debug(f"Calling Edge TTS with voice: {voice}, rate: {rate}, retry_count: {retry_count}") - last_error = None - - # Retry loop - for attempt in range(retry_count + 1): # +1 because first attempt is not a retry - try: - if attempt > 0: - logger.info(f"🔄 Retrying Edge TTS (attempt {attempt + 1}/{retry_count + 1}) after {retry_delay}s delay...") - await asyncio.sleep(retry_delay) + # Use semaphore to limit concurrent requests + async with _request_semaphore: + # Add a small random delay before each request to avoid rate limiting + pre_delay = _REQUEST_DELAY + random.uniform(0, 0.3) + logger.debug(f"Waiting {pre_delay:.2f}s before request (rate limiting)") + await asyncio.sleep(pre_delay) + + last_error = None + + # Retry loop + for attempt in range(retry_count + 1): # +1 because first attempt is not a retry + try: + if attempt > 0: + # Exponential backoff with jitter + # delay = base * (2 ^ attempt) + random jitter + exponential_delay = retry_base_delay * (2 ** (attempt - 1)) + jitter = random.uniform(0, retry_base_delay) + retry_delay = min(exponential_delay + jitter, _MAX_RETRY_DELAY) + + logger.info(f"🔄 Retrying Edge TTS (attempt {attempt + 1}/{retry_count + 1}) after {retry_delay:.2f}s delay...") + await asyncio.sleep(retry_delay) # Monkey patch ssl.create_default_context if SSL verification is disabled if not _SSL_VERIFY_ENABLED: @@ -134,28 +157,36 @@ async def edge_tts( if not _SSL_VERIFY_ENABLED: ssl.create_default_context = original_create_default_context - except (WSServerHandshakeError, ClientResponseError) as e: - # Network/authentication errors - retry - last_error = e - error_code = getattr(e, 'status', 'unknown') - logger.warning(f"⚠️ Edge TTS error (attempt {attempt + 1}/{retry_count + 1}): {error_code} - {e}") + except (WSServerHandshakeError, ClientResponseError) as e: + # Network/authentication errors - retry + last_error = e + error_code = getattr(e, 'status', 'unknown') + error_msg = str(e) + + # Log more detailed information for 401 errors + if error_code == 401 or '401' in error_msg: + logger.warning(f"⚠️ Edge TTS 401 Authentication Error (attempt {attempt + 1}/{retry_count + 1})") + logger.debug(f"Error details: {error_msg}") + logger.debug(f"This is usually caused by rate limiting. Will retry with exponential backoff...") + else: + logger.warning(f"⚠️ Edge TTS error (attempt {attempt + 1}/{retry_count + 1}): {error_code} - {e}") + + if attempt >= retry_count: + # Last attempt failed + logger.error(f"❌ All {retry_count + 1} attempts failed. Last error: {error_code}") + raise + # Otherwise, continue to next retry - if attempt >= retry_count: - # Last attempt failed - logger.error(f"❌ All {retry_count + 1} attempts failed. Giving up.") + except Exception as e: + # Other errors - don't retry, raise immediately + logger.error(f"Edge TTS error (non-retryable): {type(e).__name__} - {e}") raise - # Otherwise, continue to next retry - except Exception as e: - # Other errors - don't retry, raise immediately - logger.error(f"Edge TTS error (non-retryable): {e}") - raise - - # Should not reach here, but just in case - if last_error: - raise last_error - else: - raise RuntimeError("Edge TTS failed without error (unexpected)") + # Should not reach here, but just in case + if last_error: + raise last_error + else: + raise RuntimeError("Edge TTS failed without error (unexpected)") def get_audio_duration(audio_path: str) -> float: @@ -184,20 +215,20 @@ def get_audio_duration(audio_path: str) -> float: return max(1.0, estimated_duration) # At least 1 second -async def list_voices(locale: str = None, retry_count: int = _RETRY_COUNT, retry_delay: float = _RETRY_DELAY) -> list[str]: +async def list_voices(locale: str = None, retry_count: int = _RETRY_COUNT, retry_base_delay: float = _RETRY_BASE_DELAY) -> list[str]: """ List all available voices for Edge TTS Returns a list of voice IDs (ShortName). Optionally filter by locale. - Includes automatic retry mechanism to handle network errors - (default: 3 retries with 2s delay). + Includes automatic retry mechanism with exponential backoff and jitter + to handle network errors and rate limiting. Args: locale: Filter by locale (e.g., zh-CN, en-US, ja-JP) - retry_count: Number of retries on failure (default: 3) - retry_delay: Delay between retries in seconds (default: 2.0) + retry_count: Number of retries on failure (default: 5) + retry_base_delay: Base delay for exponential backoff (default: 1.0s) Returns: List of voice IDs @@ -213,14 +244,26 @@ async def list_voices(locale: str = None, retry_count: int = _RETRY_COUNT, retry """ logger.debug(f"Fetching Edge TTS voices, locale filter: {locale}, retry_count: {retry_count}") - last_error = None - - # Retry loop - for attempt in range(retry_count + 1): - try: - if attempt > 0: - logger.info(f"🔄 Retrying list voices (attempt {attempt + 1}/{retry_count + 1}) after {retry_delay}s delay...") - await asyncio.sleep(retry_delay) + # Use semaphore to limit concurrent requests + async with _request_semaphore: + # Add a small random delay before each request to avoid rate limiting + pre_delay = _REQUEST_DELAY + random.uniform(0, 0.3) + logger.debug(f"Waiting {pre_delay:.2f}s before request (rate limiting)") + await asyncio.sleep(pre_delay) + + last_error = None + + # Retry loop + for attempt in range(retry_count + 1): + try: + if attempt > 0: + # Exponential backoff with jitter + exponential_delay = retry_base_delay * (2 ** (attempt - 1)) + jitter = random.uniform(0, retry_base_delay) + retry_delay = min(exponential_delay + jitter, _MAX_RETRY_DELAY) + + logger.info(f"🔄 Retrying list voices (attempt {attempt + 1}/{retry_count + 1}) after {retry_delay:.2f}s delay...") + await asyncio.sleep(retry_delay) # Monkey patch SSL if verification is disabled if not _SSL_VERIFY_ENABLED: @@ -258,24 +301,32 @@ async def list_voices(locale: str = None, retry_count: int = _RETRY_COUNT, retry if not _SSL_VERIFY_ENABLED: ssl.create_default_context = original_create_default_context - except (WSServerHandshakeError, ClientResponseError) as e: - # Network/authentication errors - retry - last_error = e - error_code = getattr(e, 'status', 'unknown') - logger.warning(f"⚠️ List voices error (attempt {attempt + 1}/{retry_count + 1}): {error_code} - {e}") + except (WSServerHandshakeError, ClientResponseError) as e: + # Network/authentication errors - retry + last_error = e + error_code = getattr(e, 'status', 'unknown') + error_msg = str(e) + + # Log more detailed information for 401 errors + if error_code == 401 or '401' in error_msg: + logger.warning(f"⚠️ Edge TTS 401 Authentication Error (list_voices attempt {attempt + 1}/{retry_count + 1})") + logger.debug(f"Error details: {error_msg}") + logger.debug(f"This is usually caused by rate limiting. Will retry with exponential backoff...") + else: + logger.warning(f"⚠️ List voices error (attempt {attempt + 1}/{retry_count + 1}): {error_code} - {e}") + + if attempt >= retry_count: + logger.error(f"❌ All {retry_count + 1} attempts failed. Last error: {error_code}") + raise - if attempt >= retry_count: - logger.error(f"❌ All {retry_count + 1} attempts failed. Giving up.") + except Exception as e: + # Other errors - don't retry, raise immediately + logger.error(f"List voices error (non-retryable): {type(e).__name__} - {e}") raise - except Exception as e: - # Other errors - don't retry, raise immediately - logger.error(f"List voices error (non-retryable): {e}") - raise - - # Should not reach here, but just in case - if last_error: - raise last_error - else: - raise RuntimeError("List voices failed without error (unexpected)") + # Should not reach here, but just in case + if last_error: + raise last_error + else: + raise RuntimeError("List voices failed without error (unexpected)")