支持runninghub并行执行,提高视频生成效率

2025-11-17 14:33:51 +08:00
parent 4a558170ad
commit c8df34a318
2 changed files with 239 additions and 278 deletions
--- a/pixelle_video/pipelines/standard.py
+++ b/pixelle_video/pipelines/standard.py
@@ -19,9 +19,10 @@ This is the default pipeline for general-purpose video generation.

 from datetime import datetime
 from pathlib import Path
-from typing import Optional, Callable, Literal, Dict, Any
+from typing import Optional, Callable, Literal

 from loguru import logger
+import asyncio

 from pixelle_video.pipelines.base import BasePipeline
 from pixelle_video.models.progress import ProgressEvent
@@ -40,6 +41,12 @@ from pixelle_video.utils.content_generators import (
 )


+# Whether to enable parallel processing for RunningHub workflows
+RUNNING_HUB_PARALLEL_ENABLED = True
+# Parallel limit for RunningHub workflows
+RUNNING_HUB_PARALLEL_LIMIT = 5
+
+
 class StandardPipeline(BasePipeline):
    """
    Standard video generation pipeline
@@ -75,16 +82,13 @@ class StandardPipeline(BasePipeline):
        # === Basic Config ===
        n_scenes: int = 5,  # Only used in generate mode; ignored in fixed mode
        
-        # === TTS Parameters ===
-        tts_inference_mode: Optional[str] = None,  # "local" or "comfyui"
-        tts_voice: Optional[str] = None,  # For local mode: Edge TTS voice ID
-        tts_speed: Optional[float] = None,  # Speed multiplier (0.5-2.0)
-        tts_workflow: Optional[str] = None,  # For ComfyUI mode: workflow path
-        ref_audio: Optional[str] = None,  # For ComfyUI mode: reference audio
-        
-        # Deprecated (kept for backward compatibility)
-        voice_id: Optional[str] = None,
-        
+        # === TTS Parameters (supports both old and new parameter names) ===
+        tts_inference_mode: Optional[str] = None,  # "local" or "comfyui" (web UI)
+        voice_id: Optional[str] = None,  # For backward compatibility (deprecated)
+        tts_voice: Optional[str] = None,  # Voice ID for local mode (web UI)
+        tts_workflow: Optional[str] = None,
+        tts_speed: float = 1.2,
+        ref_audio: Optional[str] = None,  # Reference audio for voice cloning
        output_path: Optional[str] = None,
        
        # === LLM Parameters ===
@@ -94,7 +98,8 @@ class StandardPipeline(BasePipeline):
        max_image_prompt_words: int = 60,
        
        # === Image Parameters ===
-        # Note: image_width and image_height are now auto-determined from template meta tags
+        image_width: int = 1024,
+        image_height: int = 1024,
        image_workflow: Optional[str] = None,
        
        # === Video Parameters ===
@@ -102,7 +107,9 @@ class StandardPipeline(BasePipeline):
        
        # === Frame Template (determines video size) ===
        frame_template: Optional[str] = None,
-        template_params: Optional[Dict[str, Any]] = None,  # Custom template parameters
+        
+        # === Template Custom Parameters ===
+        template_params: Optional[dict] = None,  # Custom template parameters
        
        # === Image Style ===
        prompt_prefix: Optional[str] = None,
@@ -150,8 +157,9 @@ class StandardPipeline(BasePipeline):
            min_image_prompt_words: Min image prompt length
            max_image_prompt_words: Max image prompt length
            
+            image_width: Generated image width (default 1024)
+            image_height: Generated image height (default 1024)
            image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)
-                           Note: Image/video size is now auto-determined from template meta tags
            
            video_fps: Video frame rate (default 30)
            
@@ -159,6 +167,9 @@ class StandardPipeline(BasePipeline):
                           Format: "SIZExSIZE/template.html" (e.g., "1080x1920/default.html", "1920x1080/modern.html")
                           Video size is automatically determined from template path
            
+            template_params: Custom template parameters (optional dict)
+                            e.g., {"accent_color": "#ff0000", "author": "John Doe"}
+            
            prompt_prefix: Image prompt prefix (overrides config.yaml if provided)
                          e.g., "anime style, vibrant colors" or "" for no prefix
            
@@ -176,6 +187,29 @@ class StandardPipeline(BasePipeline):
        logger.info(f"🚀 Starting StandardPipeline in '{mode}' mode")
        logger.info(f"   Text length: {len(text)} chars")
        
+        # === Handle TTS parameter compatibility ===
+        # Support both old API (voice_id) and new API (tts_inference_mode + tts_voice)
+        final_voice_id = None
+        final_tts_workflow = tts_workflow
+        
+        if tts_inference_mode:
+            # New API from web UI
+            if tts_inference_mode == "local":
+                # Local Edge TTS mode - use tts_voice
+                final_voice_id = tts_voice or "zh-CN-YunjianNeural"
+                final_tts_workflow = None  # Don't use workflow in local mode
+                logger.debug(f"TTS Mode: local (voice={final_voice_id})")
+            elif tts_inference_mode == "comfyui":
+                # ComfyUI workflow mode
+                final_voice_id = None  # Don't use voice_id in ComfyUI mode
+                # tts_workflow already set from parameter
+                logger.debug(f"TTS Mode: comfyui (workflow={final_tts_workflow})")
+        else:
+            # Old API (backward compatibility)
+            final_voice_id = voice_id or tts_voice or "zh-CN-YunjianNeural"
+            # tts_workflow already set from parameter
+            logger.debug(f"TTS Mode: legacy (voice_id={final_voice_id}, workflow={final_tts_workflow})")
+        
        # Determine final title
        if title:
            final_title = title
@@ -208,45 +242,6 @@ class StandardPipeline(BasePipeline):
            output_path = get_task_final_video_path(task_id)
            logger.info(f"   Will copy final video to: {user_specified_output}")
        
-        # Determine TTS inference mode and parameters
-        # Priority: explicit params > backward compatibility > config defaults
-        if tts_inference_mode is None:
-            # Check if user provided ComfyUI-specific params
-            if tts_workflow is not None or ref_audio is not None:
-                tts_inference_mode = "comfyui"
-            # Check if user provided old voice_id param (backward compatibility)
-            elif voice_id is not None:
-                tts_inference_mode = "comfyui"
-                if tts_voice is None:
-                    tts_voice = voice_id
-            else:
-                # Use config default
-                tts_config = self.core.config.get("comfyui", {}).get("tts", {})
-                tts_inference_mode = tts_config.get("inference_mode", "local")
-        
-        # Set voice_id based on mode for StoryboardConfig
-        final_voice_id = None
-        if tts_inference_mode == "local":
-            final_voice_id = tts_voice or voice_id
-        else:  # comfyui
-            final_voice_id = voice_id  # For ComfyUI, might be None
-        
-        # Determine frame template
-        # Priority: explicit param > config default > hardcoded default
-        if frame_template is None:
-            template_config = self.core.config.get("template", {})
-            frame_template = template_config.get("default_template", "1080x1920/default.html")
-        
-        # Read media size from template meta tags
-        from pixelle_video.services.frame_html import HTMLFrameGenerator
-        from pixelle_video.utils.template_util import resolve_template_path
-        
-        template_path = resolve_template_path(frame_template)
-        temp_generator = HTMLFrameGenerator(template_path)
-        image_width, image_height = temp_generator.get_media_size()
-        
-        logger.info(f"📐 Media size from template: {image_width}x{image_height}")
-        
        # Create storyboard config
        config = StoryboardConfig(
            task_id=task_id,
@@ -256,16 +251,15 @@ class StandardPipeline(BasePipeline):
            min_image_prompt_words=min_image_prompt_words,
            max_image_prompt_words=max_image_prompt_words,
            video_fps=video_fps,
-            tts_inference_mode=tts_inference_mode,
-            voice_id=final_voice_id,
-            tts_workflow=tts_workflow,
+            voice_id=final_voice_id,  # Use processed voice_id
+            tts_workflow=final_tts_workflow,  # Use processed workflow
            tts_speed=tts_speed,
            ref_audio=ref_audio,
            image_width=image_width,
            image_height=image_height,
            image_workflow=image_workflow,
-            frame_template=frame_template,
-            template_params=template_params
+            frame_template=frame_template or "1080x1920/default.html",
+            template_params=template_params  # Custom template parameters
        )
        
        # Create storyboard
@@ -276,16 +270,6 @@ class StandardPipeline(BasePipeline):
            created_at=datetime.now()
        )
        
-        # ========== Step 0.8: Check template requirements ==========
-        template_media_type = self._check_template_media_type(config.frame_template)
-        if template_media_type == "video":
-            logger.info(f"🎬 Template requires video generation")
-        elif template_media_type == "image":
-            logger.info(f"📸 Template requires image generation")
-        else:  # static
-            logger.info(f"⚡ Static template - skipping media generation pipeline")
-            logger.info(f"   💡 Benefits: Faster generation + Lower cost + No ComfyUI dependency")
-        
        try:
            # ========== Step 1: Generate/Split narrations ==========
            if mode == "generate":
@@ -304,115 +288,54 @@ class StandardPipeline(BasePipeline):
                logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
                logger.info(f"   Note: n_scenes={n_scenes} is ignored in fixed mode")
            
-            # ========== Step 2: Generate media prompts (conditional) ==========
-            if template_media_type == "video":
-                # Video template: generate video prompts
-                self._report_progress(progress_callback, "generating_video_prompts", 0.15)
-                
-                from pixelle_video.utils.content_generators import generate_video_prompts
-                
-                # Override prompt_prefix if provided
-                original_prefix = None
-                if prompt_prefix is not None:
-                    image_config = self.core.config.get("comfyui", {}).get("image", {})
-                    original_prefix = image_config.get("prompt_prefix")
-                    image_config["prompt_prefix"] = prompt_prefix
-                    logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
-                
-                try:
-                    # Create progress callback wrapper for video prompt generation
-                    def video_prompt_progress(completed: int, total: int, message: str):
-                        batch_progress = completed / total if total > 0 else 0
-                        overall_progress = 0.15 + (batch_progress * 0.15)
-                        self._report_progress(
-                            progress_callback,
-                            "generating_video_prompts",
-                            overall_progress,
-                            extra_info=message
-                        )
-                    
-                    # Generate base video prompts
-                    base_image_prompts = await generate_video_prompts(
-                        self.llm,
-                        narrations=narrations,
-                        min_words=min_image_prompt_words,
-                        max_words=max_image_prompt_words,
-                        progress_callback=video_prompt_progress
-                    )
-                    
-                    # Apply prompt prefix
-                    from pixelle_video.utils.prompt_helper import build_image_prompt
-                    image_config = self.core.config.get("comfyui", {}).get("image", {})
-                    prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
-                    
-                    image_prompts = []
-                    for base_prompt in base_image_prompts:
-                        final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
-                        image_prompts.append(final_prompt)
-                    
-                finally:
-                    # Restore original prompt_prefix
-                    if original_prefix is not None:
-                        image_config["prompt_prefix"] = original_prefix
-                
-                logger.info(f"✅ Generated {len(image_prompts)} video prompts")
+            # ========== Step 2: Generate image prompts ==========
+            self._report_progress(progress_callback, "generating_image_prompts", 0.15)
            
-            elif template_media_type == "image":
-                # Image template: generate image prompts
-                self._report_progress(progress_callback, "generating_image_prompts", 0.15)
-                
-                # Override prompt_prefix if provided
-                original_prefix = None
-                if prompt_prefix is not None:
-                    image_config = self.core.config.get("comfyui", {}).get("image", {})
-                    original_prefix = image_config.get("prompt_prefix")
-                    image_config["prompt_prefix"] = prompt_prefix
-                    logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
-                
-                try:
-                    # Create progress callback wrapper for image prompt generation
-                    def image_prompt_progress(completed: int, total: int, message: str):
-                        batch_progress = completed / total if total > 0 else 0
-                        overall_progress = 0.15 + (batch_progress * 0.15)
-                        self._report_progress(
-                            progress_callback,
-                            "generating_image_prompts",
-                            overall_progress,
-                            extra_info=message
-                        )
-                    
-                    # Generate base image prompts
-                    base_image_prompts = await generate_image_prompts(
-                        self.llm,
-                        narrations=narrations,
-                        min_words=min_image_prompt_words,
-                        max_words=max_image_prompt_words,
-                        progress_callback=image_prompt_progress
-                    )
-                    
-                    # Apply prompt prefix
-                    from pixelle_video.utils.prompt_helper import build_image_prompt
-                    image_config = self.core.config.get("comfyui", {}).get("image", {})
-                    prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
-                    
-                    image_prompts = []
-                    for base_prompt in base_image_prompts:
-                        final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
-                        image_prompts.append(final_prompt)
-                    
-                finally:
-                    # Restore original prompt_prefix
-                    if original_prefix is not None:
-                        image_config["prompt_prefix"] = original_prefix
-                
-                logger.info(f"✅ Generated {len(image_prompts)} image prompts")
+            # Override prompt_prefix if provided
+            original_prefix = None
+            if prompt_prefix is not None:
+                image_config = self.core.config.get("comfyui", {}).get("image", {})
+                original_prefix = image_config.get("prompt_prefix")
+                image_config["prompt_prefix"] = prompt_prefix
+                logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
            
-            else:  # text
-                # Text-only template: skip media prompt generation
-                image_prompts = [None] * len(narrations)
-                self._report_progress(progress_callback, "preparing_frames", 0.15)
-                logger.info(f"⚡ Skipped media prompt generation (text-only template)")
-                logger.info(f"   💡 Savings: {len(narrations)} LLM calls + {len(narrations)} media generations")
+            try:
+                # Create progress callback wrapper for image prompt generation
+                def image_prompt_progress(completed: int, total: int, message: str):
+                    batch_progress = completed / total if total > 0 else 0
+                    overall_progress = 0.15 + (batch_progress * 0.15)
+                    self._report_progress(
+                        progress_callback,
+                        "generating_image_prompts",
+                        overall_progress,
+                        extra_info=message
+                    )
+                
+                # Generate base image prompts
+                base_image_prompts = await generate_image_prompts(
+                    self.llm,
+                    narrations=narrations,
+                    min_words=min_image_prompt_words,
+                    max_words=max_image_prompt_words,
+                    progress_callback=image_prompt_progress
+                )
+                
+                # Apply prompt prefix
+                from pixelle_video.utils.prompt_helper import build_image_prompt
+                image_config = self.core.config.get("comfyui", {}).get("image", {})
+                prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")
+                
+                image_prompts = []
+                for base_prompt in base_image_prompts:
+                    final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
+                    image_prompts.append(final_prompt)
+                
+            finally:
+                # Restore original prompt_prefix
+                if original_prefix is not None:
+                    image_config["prompt_prefix"] = original_prefix
+            
+            logger.info(f"✅ Generated {len(image_prompts)} image prompts")
            
            # ========== Step 3: Create frames ==========
            for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
@@ -425,43 +348,114 @@ class StandardPipeline(BasePipeline):
                storyboard.frames.append(frame)
            
            # ========== Step 4: Process each frame ==========
-            for i, frame in enumerate(storyboard.frames):
-                base_progress = 0.2
-                frame_range = 0.6
-                per_frame_progress = frame_range / len(storyboard.frames)
+            # Check if using RunningHub workflows for parallel processing
+            # Enable parallel if either TTS or Image uses RunningHub (most time-consuming parts)
+            is_runninghub = (
+                (config.tts_workflow and config.tts_workflow.startswith("runninghub/")) or
+                (config.image_workflow and config.image_workflow.startswith("runninghub/"))
+            )
+            
+            if is_runninghub and RUNNING_HUB_PARALLEL_ENABLED:
+                logger.info(f"🚀 Using parallel processing for RunningHub workflows (max {RUNNING_HUB_PARALLEL_LIMIT} concurrent)")
+                logger.info(f"   TTS: {'runninghub' if config.tts_workflow and config.tts_workflow.startswith('runninghub/') else 'local'}")
+                logger.info(f"   Image: {'runninghub' if config.image_workflow and config.image_workflow.startswith('runninghub/') else 'local'}")
                
-                # Create frame-specific progress callback
-                def frame_progress_callback(event: ProgressEvent):
-                    overall_progress = base_progress + (per_frame_progress * i) + (per_frame_progress * event.progress)
-                    if progress_callback:
-                        adjusted_event = ProgressEvent(
-                            event_type=event.event_type,
-                            progress=overall_progress,
-                            frame_current=event.frame_current,
-                            frame_total=event.frame_total,
-                            step=event.step,
-                            action=event.action
+                semaphore = asyncio.Semaphore(RUNNING_HUB_PARALLEL_LIMIT)
+                completed_count = 0
+                
+                async def process_frame_with_semaphore(i: int, frame: StoryboardFrame):
+                    nonlocal completed_count
+                    async with semaphore:
+                        base_progress = 0.2
+                        frame_range = 0.6
+                        per_frame_progress = frame_range / len(storyboard.frames)
+                        
+                        # Create frame-specific progress callback
+                        def frame_progress_callback(event: ProgressEvent):
+                            overall_progress = base_progress + (per_frame_progress * completed_count) + (per_frame_progress * event.progress)
+                            if progress_callback:
+                                adjusted_event = ProgressEvent(
+                                    event_type=event.event_type,
+                                    progress=overall_progress,
+                                    frame_current=i+1,
+                                    frame_total=len(storyboard.frames),
+                                    step=event.step,
+                                    action=event.action
+                                )
+                                progress_callback(adjusted_event)
+                        
+                        # Report frame start
+                        self._report_progress(
+                            progress_callback,
+                            "processing_frame",
+                            base_progress + (per_frame_progress * completed_count),
+                            frame_current=i+1,
+                            frame_total=len(storyboard.frames)
                        )
-                        progress_callback(adjusted_event)
+                        
+                        processed_frame = await self.core.frame_processor(
+                            frame=frame,
+                            storyboard=storyboard,
+                            config=config,
+                            total_frames=len(storyboard.frames),
+                            progress_callback=frame_progress_callback
+                        )
+                        
+                        completed_count += 1
+                        logger.info(f"✅ Frame {i+1} completed ({processed_frame.duration:.2f}s) [{completed_count}/{len(storyboard.frames)}]")
+                        return i, processed_frame
                
-                # Report frame start
-                self._report_progress(
-                    progress_callback,
-                    "processing_frame",
-                    base_progress + (per_frame_progress * i),
-                    frame_current=i+1,
-                    frame_total=len(storyboard.frames)
-                )
+                # Create all tasks and execute in parallel
+                tasks = [process_frame_with_semaphore(i, frame) for i, frame in enumerate(storyboard.frames)]
+                results = await asyncio.gather(*tasks)
                
-                processed_frame = await self.core.frame_processor(
-                    frame=frame,
-                    storyboard=storyboard,
-                    config=config,
-                    total_frames=len(storyboard.frames),
-                    progress_callback=frame_progress_callback
-                )
-                storyboard.total_duration += processed_frame.duration
-                logger.info(f"✅ Frame {i+1} completed ({processed_frame.duration:.2f}s)")
+                # Update frames in order and calculate total duration
+                for idx, processed_frame in sorted(results, key=lambda x: x[0]):
+                    storyboard.frames[idx] = processed_frame
+                    storyboard.total_duration += processed_frame.duration
+                
+                logger.info(f"✅ All frames processed in parallel (total duration: {storyboard.total_duration:.2f}s)")
+            else:
+                # Serial processing for non-RunningHub workflows
+                logger.info("⚙️ Using serial processing (non-RunningHub workflow)")
+                
+                for i, frame in enumerate(storyboard.frames):
+                    base_progress = 0.2
+                    frame_range = 0.6
+                    per_frame_progress = frame_range / len(storyboard.frames)
+                    
+                    # Create frame-specific progress callback
+                    def frame_progress_callback(event: ProgressEvent):
+                        overall_progress = base_progress + (per_frame_progress * i) + (per_frame_progress * event.progress)
+                        if progress_callback:
+                            adjusted_event = ProgressEvent(
+                                event_type=event.event_type,
+                                progress=overall_progress,
+                                frame_current=event.frame_current,
+                                frame_total=event.frame_total,
+                                step=event.step,
+                                action=event.action
+                            )
+                            progress_callback(adjusted_event)
+                    
+                    # Report frame start
+                    self._report_progress(
+                        progress_callback,
+                        "processing_frame",
+                        base_progress + (per_frame_progress * i),
+                        frame_current=i+1,
+                        frame_total=len(storyboard.frames)
+                    )
+                    
+                    processed_frame = await self.core.frame_processor(
+                        frame=frame,
+                        storyboard=storyboard,
+                        config=config,
+                        total_frames=len(storyboard.frames),
+                        progress_callback=frame_progress_callback
+                    )
+                    storyboard.total_duration += processed_frame.duration
+                    logger.info(f"✅ Frame {i+1} completed ({processed_frame.duration:.2f}s)")
            
            # ========== Step 5: Concatenate videos ==========
            self._report_progress(progress_callback, "concatenating", 0.85)
@@ -515,33 +509,4 @@ class StandardPipeline(BasePipeline):
        except Exception as e:
            logger.error(f"❌ Video generation failed: {e}")
            raise
-    
-    def _check_template_media_type(self, frame_template: str) -> str:
-        """
-        Check template media type requirement
-        
-        This is checked at pipeline level to avoid unnecessary:
-        - LLM calls (generating media prompts)
-        - Media generation API calls
-        - ComfyUI dependency
-        
-        Template naming convention:
-        - static_*.html: Static style template (returns "static")
-        - image_*.html: Image template (returns "image")
-        - video_*.html: Video template (returns "video")
-        
-        Args:
-            frame_template: Template path (e.g., "1080x1920/image_default.html" or "1080x1920/video_default.html")
-        
-        Returns:
-            "static", "image", or "video"
-        """
-        from pixelle_video.utils.template_util import get_template_type
-        
-        # Determine type by template filename prefix
-        template_name = Path(frame_template).name
-        template_type = get_template_type(template_name)
-        
-        logger.debug(f"Template '{frame_template}' is {template_type} template")
-        return template_type

--- a/pixelle_video/utils/tts_util.py
+++ b/pixelle_video/utils/tts_util.py
@@ -30,7 +30,7 @@ from aiohttp import WSServerHandshakeError, ClientResponseError
 _USE_CERTIFI_SSL = True

 # Retry configuration for Edge TTS (to handle 401 errors)
-_RETRY_COUNT = 5       # Default retry count (increased from 3 to 5)
+_RETRY_COUNT = 10       # Default retry count (increased from 3 to 5)
 _RETRY_BASE_DELAY = 1.0     # Base retry delay in seconds (for exponential backoff)
 _MAX_RETRY_DELAY = 10.0     # Maximum retry delay in seconds

@@ -38,8 +38,27 @@ _MAX_RETRY_DELAY = 10.0     # Maximum retry delay in seconds
 _REQUEST_DELAY = 0.5        # Minimum delay before each request (seconds)
 _MAX_CONCURRENT_REQUESTS = 3  # Maximum concurrent requests

-# Global semaphore for rate limiting
-_request_semaphore = asyncio.Semaphore(_MAX_CONCURRENT_REQUESTS)
+# Global semaphore for rate limiting (created per event loop)
+_request_semaphore = None
+_semaphore_loop = None
+
+
+def _get_request_semaphore():
+    """Get or create request semaphore for current event loop"""
+    global _request_semaphore, _semaphore_loop
+    
+    try:
+        current_loop = asyncio.get_running_loop()
+    except RuntimeError:
+        # No running loop
+        return asyncio.Semaphore(_MAX_CONCURRENT_REQUESTS)
+    
+    # If semaphore doesn't exist or belongs to different loop, create new one
+    if _request_semaphore is None or _semaphore_loop != current_loop:
+        _request_semaphore = asyncio.Semaphore(_MAX_CONCURRENT_REQUESTS)
+        _semaphore_loop = current_loop
+    
+    return _request_semaphore


 async def edge_tts(
@@ -98,7 +117,8 @@ async def edge_tts(
    logger.debug(f"Calling Edge TTS with voice: {voice}, rate: {rate}, retry_count: {retry_count}")
    
    # Use semaphore to limit concurrent requests
-    async with _request_semaphore:
+    request_semaphore = _get_request_semaphore()
+    async with request_semaphore:
        # Add a small random delay before each request to avoid rate limiting
        pre_delay = _REQUEST_DELAY + random.uniform(0, 0.3)
        logger.debug(f"Waiting {pre_delay:.2f}s before request (rate limiting)")
@@ -118,20 +138,17 @@ async def edge_tts(
                logger.info(f"🔄 Retrying Edge TTS (attempt {attempt + 1}/{retry_count + 1}) after {retry_delay:.2f}s delay...")
                await asyncio.sleep(retry_delay)
            
-            # Use certifi SSL context for proper certificate verification
-            if _USE_CERTIFI_SSL:
-                if attempt == 0:  # Only log info once
-                    logger.debug("Using certifi SSL certificates for secure Edge TTS connection")
-                original_create_default_context = ssl.create_default_context
-                
-                def create_certifi_context(*args, **kwargs):
-                    # Build SSL context that uses certifi bundle (resolves Windows / missing CA issues)
-                    return original_create_default_context(cafile=certifi.where())
-                
-                # Temporarily replace the function
-                ssl.create_default_context = create_certifi_context
-            
            try:
+                # Create communicate instance with certifi SSL context
+                if _USE_CERTIFI_SSL:
+                    if attempt == 0:  # Only log info once
+                        logger.debug("Using certifi SSL certificates for secure Edge TTS connection")
+                    # Create SSL context with certifi bundle
+                    import certifi
+                    ssl_context = ssl.create_default_context(cafile=certifi.where())
+                else:
+                    ssl_context = None
+                
                # Create communicate instance
                communicate = edge_tts_sdk.Communicate(
                    text=text,
@@ -186,11 +203,6 @@ async def edge_tts(
                # Other errors - don't retry, raise immediately
                logger.error(f"Edge TTS error (non-retryable): {type(e).__name__} - {e}")
                raise
-            
-            finally:
-                # Restore original function if we patched it
-                if _USE_CERTIFI_SSL:
-                    ssl.create_default_context = original_create_default_context
        
        # Should not reach here, but just in case
        if last_error:
@@ -255,7 +267,8 @@ async def list_voices(locale: str = None, retry_count: int = _RETRY_COUNT, retry
    logger.debug(f"Fetching Edge TTS voices, locale filter: {locale}, retry_count: {retry_count}")
    
    # Use semaphore to limit concurrent requests
-    async with _request_semaphore:
+    request_semaphore = _get_request_semaphore()
+    async with request_semaphore:
        # Add a small random delay before each request to avoid rate limiting
        pre_delay = _REQUEST_DELAY + random.uniform(0, 0.3)
        logger.debug(f"Waiting {pre_delay:.2f}s before request (rate limiting)")
@@ -274,20 +287,8 @@ async def list_voices(locale: str = None, retry_count: int = _RETRY_COUNT, retry
                logger.info(f"🔄 Retrying list voices (attempt {attempt + 1}/{retry_count + 1}) after {retry_delay:.2f}s delay...")
                await asyncio.sleep(retry_delay)
            
-            # Use certifi SSL context for proper certificate verification
-            if _USE_CERTIFI_SSL:
-                if attempt == 0:  # Only log info once
-                    logger.debug("Using certifi SSL certificates for secure Edge TTS connection")
-                original_create_default_context = ssl.create_default_context
-                
-                def create_certifi_context(*args, **kwargs):
-                    # Build SSL context that uses certifi bundle (resolves Windows / missing CA issues)
-                    return original_create_default_context(cafile=certifi.where())
-                
-                ssl.create_default_context = create_certifi_context
-            
            try:
-                # Get all voices
+                # Get all voices (edge-tts handles SSL internally)
                voices = await edge_tts_sdk.list_voices()
                
                # Filter by locale if specified
@@ -325,11 +326,6 @@ async def list_voices(locale: str = None, retry_count: int = _RETRY_COUNT, retry
                # Other errors - don't retry, raise immediately
                logger.error(f"List voices error (non-retryable): {type(e).__name__} - {e}")
                raise
-            
-            finally:
-                # Restore original function if we patched it
-                if _USE_CERTIFI_SSL:
-                    ssl.create_default_context = original_create_default_context
        
        # Should not reach here, but just in case
        if last_error: