优化tts逻辑

2025-10-29 21:40:37 +08:00
parent 8c03bd1bcd
commit fb18adf318
16 changed files with 505 additions and 318 deletions
--- a/reelforge/config/init.py
+++ b/reelforge/config/init.py
@@ -17,7 +17,7 @@ Usage:
    if config_manager.validate():
        print("Config is valid!")
 """
-from .schema import ReelForgeConfig, LLMConfig, TTSConfig, ImageConfig
+from .schema import ReelForgeConfig, LLMConfig, ComfyUIConfig, TTSSubConfig, ImageSubConfig
 from .manager import ConfigManager
 from .loader import load_config_dict, save_config_dict

@@ -27,8 +27,9 @@ config_manager = ConfigManager()
 __all__ = [
    "ReelForgeConfig",
    "LLMConfig", 
-    "TTSConfig",
-    "ImageConfig",
+    "ComfyUIConfig",
+    "TTSSubConfig",
+    "ImageSubConfig",
    "ConfigManager",
    "config_manager",
    "load_config_dict",
--- a/reelforge/config/manager.py
+++ b/reelforge/config/manager.py
@@ -93,21 +93,26 @@ class ConfigManager:
            }
        })
    
-    def get_image_config(self) -> dict:
-        """Get image configuration as dict"""
+    def get_comfyui_config(self) -> dict:
+        """Get ComfyUI configuration as dict"""
        return {
-            "default_workflow": self.config.image.default_workflow,
-            "comfyui_url": self.config.image.comfyui_url,
-            "runninghub_api_key": self.config.image.runninghub_api_key,
-            "prompt_prefix": self.config.image.prompt_prefix,
+            "comfyui_url": self.config.comfyui.comfyui_url,
+            "runninghub_api_key": self.config.comfyui.runninghub_api_key,
+            "tts": {
+                "default_workflow": self.config.comfyui.tts.default_workflow,
+            },
+            "image": {
+                "default_workflow": self.config.comfyui.image.default_workflow,
+                "prompt_prefix": self.config.comfyui.image.prompt_prefix,
+            }
        }
    
-    def set_image_config(
+    def set_comfyui_config(
        self, 
        comfyui_url: Optional[str] = None, 
        runninghub_api_key: Optional[str] = None
    ):
-        """Set image configuration"""
+        """Set ComfyUI global configuration"""
        updates = {}
        if comfyui_url is not None:
            updates["comfyui_url"] = comfyui_url
@@ -115,5 +120,5 @@ class ConfigManager:
            updates["runninghub_api_key"] = runninghub_api_key
        
        if updates:
-            self.update({"image": updates})
+            self.update({"comfyui": updates})

--- a/reelforge/config/schema.py
+++ b/reelforge/config/schema.py
@@ -13,32 +13,37 @@ class LLMConfig(BaseModel):
    model: str = Field(default="", description="LLM Model Name")


-class TTSConfig(BaseModel):
-    """TTS configuration"""
+class TTSSubConfig(BaseModel):
+    """TTS-specific configuration (under comfyui.tts)"""
    model_config = {"populate_by_name": True}  # Allow both field name and alias
    
-    default_workflow: str = Field(default="edge", description="Default TTS workflow", alias="default")
+    default_workflow: str = Field(default=None, description="Default TTS workflow (required, no fallback)", alias="default")


-class ImageConfig(BaseModel):
-    """Image generation configuration"""
+class ImageSubConfig(BaseModel):
+    """Image-specific configuration (under comfyui.image)"""
    model_config = {"populate_by_name": True}  # Allow both field name and alias
    
    default_workflow: str = Field(default=None, description="Default image workflow (required, no fallback)", alias="default")
-    comfyui_url: str = Field(default="http://127.0.0.1:8188", description="ComfyUI Server URL")
-    runninghub_api_key: str = Field(default="", description="RunningHub API Key (optional)")
    prompt_prefix: str = Field(
        default="Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines",
        description="Prompt prefix for all image generation"
    )


+class ComfyUIConfig(BaseModel):
+    """ComfyUI configuration (includes global settings and service-specific configs)"""
+    comfyui_url: str = Field(default="http://127.0.0.1:8188", description="ComfyUI Server URL")
+    runninghub_api_key: str = Field(default="", description="RunningHub API Key (optional)")
+    tts: TTSSubConfig = Field(default_factory=TTSSubConfig, description="TTS-specific configuration")
+    image: ImageSubConfig = Field(default_factory=ImageSubConfig, description="Image-specific configuration")
+
+
 class ReelForgeConfig(BaseModel):
    """ReelForge main configuration"""
    project_name: str = Field(default="ReelForge", description="Project name")
    llm: LLMConfig = Field(default_factory=LLMConfig)
-    tts: TTSConfig = Field(default_factory=TTSConfig)
-    image: ImageConfig = Field(default_factory=ImageConfig)
+    comfyui: ComfyUIConfig = Field(default_factory=ComfyUIConfig)
    
    def is_llm_configured(self) -> bool:
        """Check if LLM is properly configured"""
--- a/reelforge/services/comfy_base_service.py
+++ b/reelforge/services/comfy_base_service.py
@@ -19,7 +19,7 @@ class ComfyBaseService:
    
    Subclasses should define:
    - WORKFLOW_PREFIX: Prefix for workflow files (e.g., "image_", "tts_")
-    - DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_default.json")
+    - DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_flux.json")
    - WORKFLOWS_DIR: Directory containing workflows (default: "workflows")
    """
    
@@ -35,7 +35,13 @@ class ComfyBaseService:
            config: Full application config dict
            service_name: Service name in config (e.g., "tts", "image")
        """
-        self.config = config.get(service_name, {})
+        # Service-specific config (e.g., config["comfyui"]["tts"])
+        comfyui_config = config.get("comfyui", {})
+        self.config = comfyui_config.get(service_name, {})
+        
+        # Global ComfyUI config (for comfyui_url and runninghub_api_key)
+        self.global_config = comfyui_config
+        
        self.service_name = service_name
        self._workflows_cache: Optional[List[str]] = None
    
@@ -47,18 +53,18 @@ class ComfyBaseService:
            List of workflow info dicts
            Example: [
                {
-                    "name": "image_default.json",
-                    "display_name": "image_default.json - Selfhost",
+                    "name": "image_flux.json",
+                    "display_name": "image_flux.json - Selfhost",
                    "source": "selfhost",
-                    "path": "workflows/selfhost/image_default.json",
-                    "key": "selfhost/image_default.json"
+                    "path": "workflows/selfhost/image_flux.json",
+                    "key": "selfhost/image_flux.json"
                },
                {
-                    "name": "image_default.json",
-                    "display_name": "image_default.json - Runninghub", 
+                    "name": "image_flux.json",
+                    "display_name": "image_flux.json - Runninghub", 
                    "source": "runninghub",
-                    "path": "workflows/runninghub/image_default.json",
-                    "key": "runninghub/image_default.json",
+                    "path": "workflows/runninghub/image_flux.json",
+                    "key": "runninghub/image_flux.json",
                    "workflow_id": "123456"
                }
            ]
@@ -101,11 +107,11 @@ class ComfyBaseService:
        Returns:
            Workflow info dict with structure:
            {
-                "name": "image_default.json",
-                "display_name": "image_default.json - Runninghub",
+                "name": "image_flux.json",
+                "display_name": "image_flux.json - Runninghub",
                "source": "runninghub",
-                "path": "workflows/runninghub/image_default.json",
-                "key": "runninghub/image_default.json",
+                "path": "workflows/runninghub/image_flux.json",
+                "key": "runninghub/image_flux.json",
                "workflow_id": "123456"  # Only for RunningHub
            }
        """
@@ -134,7 +140,7 @@ class ComfyBaseService:
        Get default workflow from config (required, no fallback)
        
        Returns:
-            Default workflow key (e.g., "runninghub/image_default.json")
+            Default workflow key (e.g., "runninghub/image_flux.json")
        
        Raises:
            ValueError: If default_workflow not configured
@@ -155,17 +161,17 @@ class ComfyBaseService:
        Resolve workflow key to workflow info
        
        Args:
-            workflow: Workflow key (e.g., "runninghub/image_default.json")
+            workflow: Workflow key (e.g., "runninghub/image_flux.json")
                     If None, uses default from config
        
        Returns:
            Workflow info dict with structure:
            {
-                "name": "image_default.json",
-                "display_name": "image_default.json - Runninghub",
+                "name": "image_flux.json",
+                "display_name": "image_flux.json - Runninghub",
                "source": "runninghub",
-                "path": "workflows/runninghub/image_default.json",
-                "key": "runninghub/image_default.json",
+                "path": "workflows/runninghub/image_flux.json",
+                "key": "runninghub/image_flux.json",
                "workflow_id": "123456"  # Only for RunningHub
            }
        
@@ -210,19 +216,19 @@ class ComfyBaseService:
        """
        kit_config = {}
        
-        # ComfyUI URL (priority: param > config > env > default)
+        # ComfyUI URL (priority: param > global config > env > default)
        final_comfyui_url = (
            comfyui_url 
-            or self.config.get("comfyui_url")
+            or self.global_config.get("comfyui_url")
            or os.getenv("COMFYUI_BASE_URL")
            or "http://127.0.0.1:8188"
        )
        kit_config["comfyui_url"] = final_comfyui_url
        
-        # RunningHub API key (priority: param > config > env)
+        # RunningHub API key (priority: param > global config > env)
        final_rh_key = (
            runninghub_api_key
-            or self.config.get("runninghub_api_key")
+            or self.global_config.get("runninghub_api_key")
            or os.getenv("RUNNINGHUB_API_KEY")
        )
        if final_rh_key:
@@ -242,11 +248,11 @@ class ComfyBaseService:
            workflows = service.list_workflows()
            # [
            #     {
-            #         "name": "image_default.json",
-            #         "display_name": "image_default.json - Runninghub",
+            #         "name": "image_flux.json",
+            #         "display_name": "image_flux.json - Runninghub",
            #         "source": "runninghub",
-            #         "path": "workflows/runninghub/image_default.json",
-            #         "key": "runninghub/image_default.json",
+            #         "path": "workflows/runninghub/image_flux.json",
+            #         "key": "runninghub/image_flux.json",
            #         "workflow_id": "123456"
            #     },
            #     ...
@@ -260,7 +266,7 @@ class ComfyBaseService:
        List available workflow keys
        
        Returns:
-            List of available workflow keys (e.g., ["runninghub/image_default.json", ...])
+            List of available workflow keys (e.g., ["runninghub/image_flux.json", ...])
        
        Example:
            print(f"Available workflows: {service.available}")
--- a/reelforge/services/image.py
+++ b/reelforge/services/image.py
@@ -17,7 +17,7 @@ class ImageService(ComfyBaseService):
    Uses ComfyKit to execute image generation workflows.
    
    Usage:
-        # Use default workflow (workflows/image_default.json)
+        # Use default workflow (workflows/image_flux.json)
        image_url = await reelforge.image(prompt="a cat")
        
        # Use specific workflow
@@ -65,7 +65,7 @@ class ImageService(ComfyBaseService):
        
        Args:
            prompt: Image generation prompt
-            workflow: Workflow filename (default: from config or "image_default.json")
+            workflow: Workflow filename (default: from config or "image_flux.json")
            comfyui_url: ComfyUI URL (optional, overrides config)
            runninghub_api_key: RunningHub API key (optional, overrides config)
            width: Image width
@@ -81,7 +81,7 @@ class ImageService(ComfyBaseService):
            Generated image URL/path
        
        Examples:
-            # Simplest: use default workflow (workflows/image_default.json)
+            # Simplest: use default workflow (workflows/image_flux.json)
            image_url = await reelforge.image(prompt="a beautiful cat")
            
            # Use specific workflow
--- a/reelforge/services/tts_service.py
+++ b/reelforge/services/tts_service.py
@@ -1,39 +1,29 @@
 """
-TTS (Text-to-Speech) Service - Dual implementation (Edge TTS + ComfyUI)
+TTS (Text-to-Speech) Service - ComfyUI Workflow-based implementation
 """

-import uuid
 from typing import Optional

 from comfykit import ComfyKit
 from loguru import logger

 from reelforge.services.comfy_base_service import ComfyBaseService
-from reelforge.utils.os_util import get_temp_path


 class TTSService(ComfyBaseService):
    """
-    TTS (Text-to-Speech) service - Dual implementation
+    TTS (Text-to-Speech) service - Workflow-based
    
-    Supports two TTS methods:
-    1. Edge TTS (default) - Free, local SDK, no workflow needed
-    2. ComfyUI Workflow - Workflow-based, requires ComfyUI setup
+    Uses ComfyKit to execute TTS workflows.
    
    Usage:
-        # Use default (edge-tts)
+        # Use default workflow
        audio_path = await reelforge.tts(text="Hello, world!")
        
-        # Explicitly use edge-tts
+        # Use specific workflow
        audio_path = await reelforge.tts(
            text="你好，世界！",
-            workflow="edge"
-        )
-        
-        # Use ComfyUI workflow
-        audio_path = await reelforge.tts(
-            text="Hello",
-            workflow="tts_comfyui.json"
+            workflow="tts_edge.json"
        )
        
        # List available workflows
@@ -41,12 +31,9 @@ class TTSService(ComfyBaseService):
    """
    
    WORKFLOW_PREFIX = "tts_"
-    DEFAULT_WORKFLOW = "edge"  # Default to edge-tts
+    DEFAULT_WORKFLOW = None  # No hardcoded default, must be configured
    WORKFLOWS_DIR = "workflows"
    
-    # Built-in providers (not workflow files)
-    BUILTIN_PROVIDERS = ["edge", "edge-tts"]
-    
    def __init__(self, config: dict):
        """
        Initialize TTS service
@@ -56,81 +43,53 @@ class TTSService(ComfyBaseService):
        """
        super().__init__(config, service_name="tts")
    
-    def _resolve_workflow(self, workflow: Optional[str] = None) -> str:
-        """
-        Resolve workflow to actual workflow path or provider name
-        
-        Args:
-            workflow: Workflow filename or provider name (e.g., "edge", "tts_default.json")
-        
-        Returns:
-            Workflow file path or provider name
-        """
-        # 1. If not specified, use default
-        if workflow is None:
-            workflow = self._get_default_workflow()
-        
-        # 2. If it's a built-in provider, return as-is
-        if workflow in self.BUILTIN_PROVIDERS:
-            logger.debug(f"Using built-in TTS provider: {workflow}")
-            return workflow
-        
-        # 3. Otherwise, treat as workflow file (use parent logic)
-        return super()._resolve_workflow(workflow)
    
    async def __call__(
        self,
        text: str,
        workflow: Optional[str] = None,
-        # ComfyUI connection (optional overrides, only for workflow mode)
+        # ComfyUI connection (optional overrides)
        comfyui_url: Optional[str] = None,
        runninghub_api_key: Optional[str] = None,
-        # Common TTS parameters (work for both edge-tts and workflows)
+        # TTS parameters
        voice: Optional[str] = None,
-        rate: Optional[str] = None,
-        volume: Optional[str] = None,
-        pitch: Optional[str] = None,
+        speed: float = 1.0,
        # Output path
        output_path: Optional[str] = None,
        **params
    ) -> str:
        """
-        Generate speech using edge-tts or ComfyUI workflow
+        Generate speech using ComfyUI workflow
        
        Args:
            text: Text to convert to speech
-            workflow: Workflow filename or provider name (default: "edge")
-                     - "edge" or "edge-tts": Use local edge-tts SDK
-                     - "tts_xxx.json": Use ComfyUI workflow
-                     - Absolute path/URL/RunningHub ID: Also supported
-            comfyui_url: ComfyUI URL (only for workflow mode)
-            runninghub_api_key: RunningHub API key (only for workflow mode)
-            voice: Voice ID
-            rate: Speech rate (e.g., "+0%", "+50%", "-20%")
-            volume: Speech volume (e.g., "+0%")
-            pitch: Speech pitch (e.g., "+0Hz")
+            workflow: Workflow filename (default: from config)
+            comfyui_url: ComfyUI URL (optional, overrides config)
+            runninghub_api_key: RunningHub API key (optional, overrides config)
+            voice: Voice ID (workflow-specific)
+            speed: Speech speed multiplier (1.0 = normal, >1.0 = faster, <1.0 = slower)
            output_path: Custom output path (auto-generated if None)
-            **params: Additional parameters
+            **params: Additional workflow parameters
        
        Returns:
            Generated audio file path
        
        Examples:
-            # Simplest: use default (edge-tts)
+            # Simplest: use default workflow
            audio_path = await reelforge.tts(text="Hello, world!")
            
-            # Explicitly use edge-tts with parameters
+            # Use specific workflow
            audio_path = await reelforge.tts(
                text="你好，世界！",
-                workflow="edge",
-                voice="zh-CN-XiaoxiaoNeural",
-                rate="+20%"
+                workflow="tts_edge.json"
            )
            
-            # Use ComfyUI workflow
+            # With voice and speed
            audio_path = await reelforge.tts(
                text="Hello",
-                workflow="tts_default.json"
+                workflow="tts_edge.json",
+                voice="zh-CN-XiaoxiaoNeural",
+                speed=1.2
            )
            
            # With absolute path
@@ -138,92 +97,28 @@ class TTSService(ComfyBaseService):
                text="Hello",
                workflow="/path/to/custom_tts.json"
            )
-        """
-        # 1. Check if it's a builtin provider (edge-tts)
-        if workflow in self.BUILTIN_PROVIDERS or workflow is None and self._get_default_workflow() in self.BUILTIN_PROVIDERS:
-            # Use edge-tts
-            return await self._call_edge_tts(
-                text=text,
-                voice=voice,
-                rate=rate,
-                volume=volume,
-                pitch=pitch,
-                output_path=output_path,
-                **params
+            
+            # With custom ComfyUI server
+            audio_path = await reelforge.tts(
+                text="Hello",
+                comfyui_url="http://192.168.1.100:8188"
            )
-        
-        # 2. Use ComfyUI workflow - resolve to structured info
+        """
+        # 1. Resolve workflow (returns structured info)
        workflow_info = self._resolve_workflow(workflow=workflow)
        
+        # 2. Execute ComfyUI workflow
        return await self._call_comfyui_workflow(
            workflow_info=workflow_info,
            text=text,
            comfyui_url=comfyui_url,
            runninghub_api_key=runninghub_api_key,
            voice=voice,
-            rate=rate,
-            volume=volume,
-            pitch=pitch,
+            speed=speed,
            output_path=output_path,
            **params
        )
    
-    async def _call_edge_tts(
-        self,
-        text: str,
-        voice: Optional[str] = None,
-        rate: Optional[str] = None,
-        volume: Optional[str] = None,
-        pitch: Optional[str] = None,
-        output_path: Optional[str] = None,
-        **params
-    ) -> str:
-        """
-        Generate speech using edge-tts SDK
-        
-        Args:
-            text: Text to convert to speech
-            voice: Voice ID (default: zh-CN-YunjianNeural)
-            rate: Speech rate (default: +0%)
-            volume: Speech volume (default: +0%)
-            pitch: Speech pitch (default: +0Hz)
-            output_path: Custom output path (auto-generated if None)
-            **params: Additional parameters (e.g., retry_count, retry_delay)
-        
-        Returns:
-            Generated audio file path
-        """
-        from reelforge.utils.tts_util import edge_tts
-        
-        logger.info(f"🎙️  Using edge-tts (local SDK)")
-        
-        # Generate output path (use provided path or auto-generate)
-        if output_path is None:
-            output_path = get_temp_path(f"{uuid.uuid4().hex}.mp3")
-        else:
-            # Ensure parent directory exists
-            import os
-            os.makedirs(os.path.dirname(output_path), exist_ok=True)
-        
-        # Call edge-tts with output_path to save directly
-        try:
-            audio_bytes = await edge_tts(
-                text=text,
-                voice=voice or "zh-CN-YunjianNeural",
-                rate=rate or "+0%",
-                volume=volume or "+0%",
-                pitch=pitch or "+0Hz",
-                output_path=output_path,
-                **params
-            )
-            
-            logger.info(f"✅ Generated audio (edge-tts): {output_path}")
-            return output_path
-        
-        except Exception as e:
-            logger.error(f"Edge TTS generation error: {e}")
-            raise
-    
    async def _call_comfyui_workflow(
        self,
        workflow_info: dict,
@@ -231,9 +126,7 @@ class TTSService(ComfyBaseService):
        comfyui_url: Optional[str] = None,
        runninghub_api_key: Optional[str] = None,
        voice: Optional[str] = None,
-        rate: Optional[str] = None,
-        volume: Optional[str] = None,
-        pitch: Optional[str] = None,
+        speed: float = 1.0,
        output_path: Optional[str] = None,
        **params
    ) -> str:
@@ -246,9 +139,7 @@ class TTSService(ComfyBaseService):
            comfyui_url: ComfyUI URL
            runninghub_api_key: RunningHub API key
            voice: Voice ID (workflow-specific)
-            rate: Speech rate (workflow-specific)
-            volume: Speech volume (workflow-specific)
-            pitch: Speech pitch (workflow-specific)
+            speed: Speech speed multiplier (workflow-specific)
            output_path: Custom output path (downloads if URL returned)
            **params: Additional workflow parameters
        
@@ -269,12 +160,8 @@ class TTSService(ComfyBaseService):
        # Add optional TTS parameters
        if voice is not None:
            workflow_params["voice"] = voice
-        if rate is not None:
-            workflow_params["rate"] = rate
-        if volume is not None:
-            workflow_params["volume"] = volume
-        if pitch is not None:
-            workflow_params["pitch"] = pitch
+        if speed != 1.0:
+            workflow_params["speed"] = speed
        
        # Add any additional parameters
        workflow_params.update(params)