优化tts逻辑

2025-10-29 21:40:37 +08:00
parent 8c03bd1bcd
commit fb18adf318
16 changed files with 505 additions and 318 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -71,9 +71,7 @@ examples/
 repositories/
 # Workflows - ignore user customizations but keep defaults
-workflows/*
+
 !workflows/*_default.json
 !workflows/README.md
 # Templates - ignore user customizations but keep presets
 templates/*
--- a/README.md
+++ b/README.md
@@ -147,7 +147,7 @@ uv run streamlit run web/app.py
 **ComfyUI 工作流**  
 - 选择图像生成的工作流文件
- 默认使用 `image_default.json`
+- 默认使用 `image_flux.json`
 - 如果懂 ComfyUI，可以放自己的工作流到 `workflows/` 文件夹
 **提示词前缀（Prompt Prefix）**  
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -17,22 +17,22 @@ llm:
 # DeepSeek:        base_url: "https://api.deepseek.com"                           model: "deepseek-chat"
 # Ollama (Local):  base_url: "http://localhost:11434/v1"                          model: "llama3.2"
-# ==================== TTS Configuration ====================
+# ==================== ComfyUI Configuration ====================
-tts:
+comfyui:
-  default: edge  # "edge" (free) or "tts_xxx.json" (ComfyUI workflow)
+  # Global ComfyUI settings
  comfyui_url: http://127.0.0.1:8188  # ComfyUI server URL (required for selfhost workflows)
  runninghub_api_key: ""  # RunningHub API key (required for runninghub workflows)
-# ==================== Image Generation Configuration ====================
+  # TTS-specific configuration
-image:
+  tts:
    default: selfhost/tts_edge.json  # TTS workflow to use
  # Image-specific configuration
  image:
    # Required: Default workflow to use (no fallback)
-  # Options: runninghub/image_default.json (recommended, no local setup)
+    # Options: runninghub/image_flux.json (recommended, no local setup)
-  #          selfhost/image_default.json (requires local ComfyUI)
+    #          selfhost/image_flux.json (requires local ComfyUI)
-  default_workflow: runninghub/image_default.json
+    default_workflow: runninghub/image_flux.json
  # Local ComfyUI configuration (required if using selfhost workflows)
  comfyui_url: http://127.0.0.1:8188
  # RunningHub cloud configuration (required if using runninghub workflows)
  runninghub_api_key: ""
    # Image prompt prefix (optional)
    prompt_prefix: "Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines"
--- a/reelforge/config/init.py
+++ b/reelforge/config/init.py
@@ -17,7 +17,7 @@ Usage:
    if config_manager.validate():
        print("Config is valid!")
 """
-from .schema import ReelForgeConfig, LLMConfig, TTSConfig, ImageConfig
+from .schema import ReelForgeConfig, LLMConfig, ComfyUIConfig, TTSSubConfig, ImageSubConfig
 from .manager import ConfigManager
 from .loader import load_config_dict, save_config_dict
@@ -27,8 +27,9 @@ config_manager = ConfigManager()
 __all__ = [
    "ReelForgeConfig",
    "LLMConfig", 
-    "TTSConfig",
+    "ComfyUIConfig",
-    "ImageConfig",
+    "TTSSubConfig",
    "ImageSubConfig",
    "ConfigManager",
    "config_manager",
    "load_config_dict",
--- a/reelforge/config/manager.py
+++ b/reelforge/config/manager.py
@@ -93,21 +93,26 @@ class ConfigManager:
            }
        })
-    def get_image_config(self) -> dict:
+    def get_comfyui_config(self) -> dict:
-        """Get image configuration as dict"""
+        """Get ComfyUI configuration as dict"""
        return {
-            "default_workflow": self.config.image.default_workflow,
+            "comfyui_url": self.config.comfyui.comfyui_url,
-            "comfyui_url": self.config.image.comfyui_url,
+            "runninghub_api_key": self.config.comfyui.runninghub_api_key,
-            "runninghub_api_key": self.config.image.runninghub_api_key,
+            "tts": {
-            "prompt_prefix": self.config.image.prompt_prefix,
+                "default_workflow": self.config.comfyui.tts.default_workflow,
            },
            "image": {
                "default_workflow": self.config.comfyui.image.default_workflow,
                "prompt_prefix": self.config.comfyui.image.prompt_prefix,
            }
        }
-    def set_image_config(
+    def set_comfyui_config(
        self, 
        comfyui_url: Optional[str] = None, 
        runninghub_api_key: Optional[str] = None
    ):
-        """Set image configuration"""
+        """Set ComfyUI global configuration"""
        updates = {}
        if comfyui_url is not None:
            updates["comfyui_url"] = comfyui_url
@@ -115,5 +120,5 @@ class ConfigManager:
            updates["runninghub_api_key"] = runninghub_api_key
        if updates:
-            self.update({"image": updates})
+            self.update({"comfyui": updates})
--- a/reelforge/config/schema.py
+++ b/reelforge/config/schema.py
@@ -13,32 +13,37 @@ class LLMConfig(BaseModel):
    model: str = Field(default="", description="LLM Model Name")
-class TTSConfig(BaseModel):
+class TTSSubConfig(BaseModel):
-    """TTS configuration"""
+    """TTS-specific configuration (under comfyui.tts)"""
    model_config = {"populate_by_name": True}  # Allow both field name and alias
-    default_workflow: str = Field(default="edge", description="Default TTS workflow", alias="default")
+    default_workflow: str = Field(default=None, description="Default TTS workflow (required, no fallback)", alias="default")
-class ImageConfig(BaseModel):
+class ImageSubConfig(BaseModel):
-    """Image generation configuration"""
+    """Image-specific configuration (under comfyui.image)"""
    model_config = {"populate_by_name": True}  # Allow both field name and alias
    default_workflow: str = Field(default=None, description="Default image workflow (required, no fallback)", alias="default")
    comfyui_url: str = Field(default="http://127.0.0.1:8188", description="ComfyUI Server URL")
    runninghub_api_key: str = Field(default="", description="RunningHub API Key (optional)")
    prompt_prefix: str = Field(
        default="Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines",
        description="Prompt prefix for all image generation"
    )
 class ComfyUIConfig(BaseModel):
    """ComfyUI configuration (includes global settings and service-specific configs)"""
    comfyui_url: str = Field(default="http://127.0.0.1:8188", description="ComfyUI Server URL")
    runninghub_api_key: str = Field(default="", description="RunningHub API Key (optional)")
    tts: TTSSubConfig = Field(default_factory=TTSSubConfig, description="TTS-specific configuration")
    image: ImageSubConfig = Field(default_factory=ImageSubConfig, description="Image-specific configuration")
 class ReelForgeConfig(BaseModel):
    """ReelForge main configuration"""
    project_name: str = Field(default="ReelForge", description="Project name")
    llm: LLMConfig = Field(default_factory=LLMConfig)
-    tts: TTSConfig = Field(default_factory=TTSConfig)
+    comfyui: ComfyUIConfig = Field(default_factory=ComfyUIConfig)
    image: ImageConfig = Field(default_factory=ImageConfig)
    def is_llm_configured(self) -> bool:
        """Check if LLM is properly configured"""
--- a/reelforge/services/comfy_base_service.py
+++ b/reelforge/services/comfy_base_service.py
@@ -19,7 +19,7 @@ class ComfyBaseService:
    Subclasses should define:
    - WORKFLOW_PREFIX: Prefix for workflow files (e.g., "image_", "tts_")
-    - DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_default.json")
+    - DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_flux.json")
    - WORKFLOWS_DIR: Directory containing workflows (default: "workflows")
    """
@@ -35,7 +35,13 @@ class ComfyBaseService:
            config: Full application config dict
            service_name: Service name in config (e.g., "tts", "image")
        """
-        self.config = config.get(service_name, {})
+        # Service-specific config (e.g., config["comfyui"]["tts"])
        comfyui_config = config.get("comfyui", {})
        self.config = comfyui_config.get(service_name, {})
        # Global ComfyUI config (for comfyui_url and runninghub_api_key)
        self.global_config = comfyui_config
        self.service_name = service_name
        self._workflows_cache: Optional[List[str]] = None
@@ -47,18 +53,18 @@ class ComfyBaseService:
            List of workflow info dicts
            Example: [
                {
-                    "name": "image_default.json",
+                    "name": "image_flux.json",
-                    "display_name": "image_default.json - Selfhost",
+                    "display_name": "image_flux.json - Selfhost",
                    "source": "selfhost",
-                    "path": "workflows/selfhost/image_default.json",
+                    "path": "workflows/selfhost/image_flux.json",
-                    "key": "selfhost/image_default.json"
+                    "key": "selfhost/image_flux.json"
                },
                {
-                    "name": "image_default.json",
+                    "name": "image_flux.json",
-                    "display_name": "image_default.json - Runninghub", 
+                    "display_name": "image_flux.json - Runninghub", 
                    "source": "runninghub",
-                    "path": "workflows/runninghub/image_default.json",
+                    "path": "workflows/runninghub/image_flux.json",
-                    "key": "runninghub/image_default.json",
+                    "key": "runninghub/image_flux.json",
                    "workflow_id": "123456"
                }
            ]
@@ -101,11 +107,11 @@ class ComfyBaseService:
        Returns:
            Workflow info dict with structure:
            {
-                "name": "image_default.json",
+                "name": "image_flux.json",
-                "display_name": "image_default.json - Runninghub",
+                "display_name": "image_flux.json - Runninghub",
                "source": "runninghub",
-                "path": "workflows/runninghub/image_default.json",
+                "path": "workflows/runninghub/image_flux.json",
-                "key": "runninghub/image_default.json",
+                "key": "runninghub/image_flux.json",
                "workflow_id": "123456"  # Only for RunningHub
            }
        """
@@ -134,7 +140,7 @@ class ComfyBaseService:
        Get default workflow from config (required, no fallback)
        Returns:
-            Default workflow key (e.g., "runninghub/image_default.json")
+            Default workflow key (e.g., "runninghub/image_flux.json")
        Raises:
            ValueError: If default_workflow not configured
@@ -155,17 +161,17 @@ class ComfyBaseService:
        Resolve workflow key to workflow info
        Args:
-            workflow: Workflow key (e.g., "runninghub/image_default.json")
+            workflow: Workflow key (e.g., "runninghub/image_flux.json")
                     If None, uses default from config
        Returns:
            Workflow info dict with structure:
            {
-                "name": "image_default.json",
+                "name": "image_flux.json",
-                "display_name": "image_default.json - Runninghub",
+                "display_name": "image_flux.json - Runninghub",
                "source": "runninghub",
-                "path": "workflows/runninghub/image_default.json",
+                "path": "workflows/runninghub/image_flux.json",
-                "key": "runninghub/image_default.json",
+                "key": "runninghub/image_flux.json",
                "workflow_id": "123456"  # Only for RunningHub
            }
@@ -210,19 +216,19 @@ class ComfyBaseService:
        """
        kit_config = {}
-        # ComfyUI URL (priority: param > config > env > default)
+        # ComfyUI URL (priority: param > global config > env > default)
        final_comfyui_url = (
            comfyui_url 
-            or self.config.get("comfyui_url")
+            or self.global_config.get("comfyui_url")
            or os.getenv("COMFYUI_BASE_URL")
            or "http://127.0.0.1:8188"
        )
        kit_config["comfyui_url"] = final_comfyui_url
-        # RunningHub API key (priority: param > config > env)
+        # RunningHub API key (priority: param > global config > env)
        final_rh_key = (
            runninghub_api_key
-            or self.config.get("runninghub_api_key")
+            or self.global_config.get("runninghub_api_key")
            or os.getenv("RUNNINGHUB_API_KEY")
        )
        if final_rh_key:
@@ -242,11 +248,11 @@ class ComfyBaseService:
            workflows = service.list_workflows()
            # [
            #     {
-            #         "name": "image_default.json",
+            #         "name": "image_flux.json",
-            #         "display_name": "image_default.json - Runninghub",
+            #         "display_name": "image_flux.json - Runninghub",
            #         "source": "runninghub",
-            #         "path": "workflows/runninghub/image_default.json",
+            #         "path": "workflows/runninghub/image_flux.json",
-            #         "key": "runninghub/image_default.json",
+            #         "key": "runninghub/image_flux.json",
            #         "workflow_id": "123456"
            #     },
            #     ...
@@ -260,7 +266,7 @@ class ComfyBaseService:
        List available workflow keys
        Returns:
-            List of available workflow keys (e.g., ["runninghub/image_default.json", ...])
+            List of available workflow keys (e.g., ["runninghub/image_flux.json", ...])
        Example:
            print(f"Available workflows: {service.available}")
--- a/reelforge/services/image.py
+++ b/reelforge/services/image.py
@@ -17,7 +17,7 @@ class ImageService(ComfyBaseService):
    Uses ComfyKit to execute image generation workflows.
    Usage:
-        # Use default workflow (workflows/image_default.json)
+        # Use default workflow (workflows/image_flux.json)
        image_url = await reelforge.image(prompt="a cat")
        # Use specific workflow
@@ -65,7 +65,7 @@ class ImageService(ComfyBaseService):
        Args:
            prompt: Image generation prompt
-            workflow: Workflow filename (default: from config or "image_default.json")
+            workflow: Workflow filename (default: from config or "image_flux.json")
            comfyui_url: ComfyUI URL (optional, overrides config)
            runninghub_api_key: RunningHub API key (optional, overrides config)
            width: Image width
@@ -81,7 +81,7 @@ class ImageService(ComfyBaseService):
            Generated image URL/path
        Examples:
-            # Simplest: use default workflow (workflows/image_default.json)
+            # Simplest: use default workflow (workflows/image_flux.json)
            image_url = await reelforge.image(prompt="a beautiful cat")
            # Use specific workflow
--- a/reelforge/services/tts_service.py
+++ b/reelforge/services/tts_service.py
@@ -1,39 +1,29 @@
 """
-TTS (Text-to-Speech) Service - Dual implementation (Edge TTS + ComfyUI)
+TTS (Text-to-Speech) Service - ComfyUI Workflow-based implementation
 """
 import uuid
 from typing import Optional
 from comfykit import ComfyKit
 from loguru import logger
 from reelforge.services.comfy_base_service import ComfyBaseService
 from reelforge.utils.os_util import get_temp_path
 class TTSService(ComfyBaseService):
    """
-    TTS (Text-to-Speech) service - Dual implementation
+    TTS (Text-to-Speech) service - Workflow-based
-    Supports two TTS methods:
+    Uses ComfyKit to execute TTS workflows.
    1. Edge TTS (default) - Free, local SDK, no workflow needed
    2. ComfyUI Workflow - Workflow-based, requires ComfyUI setup
    Usage:
-        # Use default (edge-tts)
+        # Use default workflow
        audio_path = await reelforge.tts(text="Hello, world!")
-        # Explicitly use edge-tts
+        # Use specific workflow
        audio_path = await reelforge.tts(
            text="你好，世界！",
-            workflow="edge"
+            workflow="tts_edge.json"
        )
        # Use ComfyUI workflow
        audio_path = await reelforge.tts(
            text="Hello",
            workflow="tts_comfyui.json"
        )
        # List available workflows
@@ -41,12 +31,9 @@ class TTSService(ComfyBaseService):
    """
    WORKFLOW_PREFIX = "tts_"
-    DEFAULT_WORKFLOW = "edge"  # Default to edge-tts
+    DEFAULT_WORKFLOW = None  # No hardcoded default, must be configured
    WORKFLOWS_DIR = "workflows"
    # Built-in providers (not workflow files)
    BUILTIN_PROVIDERS = ["edge", "edge-tts"]
    def __init__(self, config: dict):
        """
        Initialize TTS service
@@ -56,81 +43,53 @@ class TTSService(ComfyBaseService):
        """
        super().__init__(config, service_name="tts")
    def _resolve_workflow(self, workflow: Optional[str] = None) -> str:
        """
        Resolve workflow to actual workflow path or provider name
        Args:
            workflow: Workflow filename or provider name (e.g., "edge", "tts_default.json")
        Returns:
            Workflow file path or provider name
        """
        # 1. If not specified, use default
        if workflow is None:
            workflow = self._get_default_workflow()
        # 2. If it's a built-in provider, return as-is
        if workflow in self.BUILTIN_PROVIDERS:
            logger.debug(f"Using built-in TTS provider: {workflow}")
            return workflow
        # 3. Otherwise, treat as workflow file (use parent logic)
        return super()._resolve_workflow(workflow)
    async def __call__(
        self,
        text: str,
        workflow: Optional[str] = None,
-        # ComfyUI connection (optional overrides, only for workflow mode)
+        # ComfyUI connection (optional overrides)
        comfyui_url: Optional[str] = None,
        runninghub_api_key: Optional[str] = None,
-        # Common TTS parameters (work for both edge-tts and workflows)
+        # TTS parameters
        voice: Optional[str] = None,
-        rate: Optional[str] = None,
+        speed: float = 1.0,
        volume: Optional[str] = None,
        pitch: Optional[str] = None,
        # Output path
        output_path: Optional[str] = None,
        **params
    ) -> str:
        """
-        Generate speech using edge-tts or ComfyUI workflow
+        Generate speech using ComfyUI workflow
        Args:
            text: Text to convert to speech
-            workflow: Workflow filename or provider name (default: "edge")
+            workflow: Workflow filename (default: from config)
-                     - "edge" or "edge-tts": Use local edge-tts SDK
+            comfyui_url: ComfyUI URL (optional, overrides config)
-                     - "tts_xxx.json": Use ComfyUI workflow
+            runninghub_api_key: RunningHub API key (optional, overrides config)
-                     - Absolute path/URL/RunningHub ID: Also supported
+            voice: Voice ID (workflow-specific)
-            comfyui_url: ComfyUI URL (only for workflow mode)
+            speed: Speech speed multiplier (1.0 = normal, >1.0 = faster, <1.0 = slower)
            runninghub_api_key: RunningHub API key (only for workflow mode)
            voice: Voice ID
            rate: Speech rate (e.g., "+0%", "+50%", "-20%")
            volume: Speech volume (e.g., "+0%")
            pitch: Speech pitch (e.g., "+0Hz")
            output_path: Custom output path (auto-generated if None)
-            **params: Additional parameters
+            **params: Additional workflow parameters
        Returns:
            Generated audio file path
        Examples:
-            # Simplest: use default (edge-tts)
+            # Simplest: use default workflow
            audio_path = await reelforge.tts(text="Hello, world!")
-            # Explicitly use edge-tts with parameters
+            # Use specific workflow
            audio_path = await reelforge.tts(
                text="你好，世界！",
-                workflow="edge",
+                workflow="tts_edge.json"
                voice="zh-CN-XiaoxiaoNeural",
                rate="+20%"
            )
-            # Use ComfyUI workflow
+            # With voice and speed
            audio_path = await reelforge.tts(
                text="Hello",
-                workflow="tts_default.json"
+                workflow="tts_edge.json",
                voice="zh-CN-XiaoxiaoNeural",
                speed=1.2
            )
            # With absolute path
@@ -138,92 +97,28 @@ class TTSService(ComfyBaseService):
                text="Hello",
                workflow="/path/to/custom_tts.json"
            )
        """
        # 1. Check if it's a builtin provider (edge-tts)
        if workflow in self.BUILTIN_PROVIDERS or workflow is None and self._get_default_workflow() in self.BUILTIN_PROVIDERS:
            # Use edge-tts
            return await self._call_edge_tts(
                text=text,
                voice=voice,
                rate=rate,
                volume=volume,
                pitch=pitch,
                output_path=output_path,
                **params
            )
-        # 2. Use ComfyUI workflow - resolve to structured info
+            # With custom ComfyUI server
            audio_path = await reelforge.tts(
                text="Hello",
                comfyui_url="http://192.168.1.100:8188"
            )
        """
        # 1. Resolve workflow (returns structured info)
        workflow_info = self._resolve_workflow(workflow=workflow)
        # 2. Execute ComfyUI workflow
        return await self._call_comfyui_workflow(
            workflow_info=workflow_info,
            text=text,
            comfyui_url=comfyui_url,
            runninghub_api_key=runninghub_api_key,
            voice=voice,
-            rate=rate,
+            speed=speed,
            volume=volume,
            pitch=pitch,
            output_path=output_path,
            **params
        )
    async def _call_edge_tts(
        self,
        text: str,
        voice: Optional[str] = None,
        rate: Optional[str] = None,
        volume: Optional[str] = None,
        pitch: Optional[str] = None,
        output_path: Optional[str] = None,
        **params
    ) -> str:
        """
        Generate speech using edge-tts SDK
        Args:
            text: Text to convert to speech
            voice: Voice ID (default: zh-CN-YunjianNeural)
            rate: Speech rate (default: +0%)
            volume: Speech volume (default: +0%)
            pitch: Speech pitch (default: +0Hz)
            output_path: Custom output path (auto-generated if None)
            **params: Additional parameters (e.g., retry_count, retry_delay)
        Returns:
            Generated audio file path
        """
        from reelforge.utils.tts_util import edge_tts
        logger.info(f"🎙️  Using edge-tts (local SDK)")
        # Generate output path (use provided path or auto-generate)
        if output_path is None:
            output_path = get_temp_path(f"{uuid.uuid4().hex}.mp3")
        else:
            # Ensure parent directory exists
            import os
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
        # Call edge-tts with output_path to save directly
        try:
            audio_bytes = await edge_tts(
                text=text,
                voice=voice or "zh-CN-YunjianNeural",
                rate=rate or "+0%",
                volume=volume or "+0%",
                pitch=pitch or "+0Hz",
                output_path=output_path,
                **params
            )
            logger.info(f"✅ Generated audio (edge-tts): {output_path}")
            return output_path
        except Exception as e:
            logger.error(f"Edge TTS generation error: {e}")
            raise
    async def _call_comfyui_workflow(
        self,
        workflow_info: dict,
@@ -231,9 +126,7 @@ class TTSService(ComfyBaseService):
        comfyui_url: Optional[str] = None,
        runninghub_api_key: Optional[str] = None,
        voice: Optional[str] = None,
-        rate: Optional[str] = None,
+        speed: float = 1.0,
        volume: Optional[str] = None,
        pitch: Optional[str] = None,
        output_path: Optional[str] = None,
        **params
    ) -> str:
@@ -246,9 +139,7 @@ class TTSService(ComfyBaseService):
            comfyui_url: ComfyUI URL
            runninghub_api_key: RunningHub API key
            voice: Voice ID (workflow-specific)
-            rate: Speech rate (workflow-specific)
+            speed: Speech speed multiplier (workflow-specific)
            volume: Speech volume (workflow-specific)
            pitch: Speech pitch (workflow-specific)
            output_path: Custom output path (downloads if URL returned)
            **params: Additional workflow parameters
@@ -269,12 +160,8 @@ class TTSService(ComfyBaseService):
        # Add optional TTS parameters
        if voice is not None:
            workflow_params["voice"] = voice
-        if rate is not None:
+        if speed != 1.0:
-            workflow_params["rate"] = rate
+            workflow_params["speed"] = speed
        if volume is not None:
            workflow_params["volume"] = volume
        if pitch is not None:
            workflow_params["pitch"] = pitch
        # Add any additional parameters
        workflow_params.update(params)
--- a/web/app.py
+++ b/web/app.py
@@ -100,11 +100,11 @@ def render_advanced_settings():
    # Expand if not configured, collapse if configured
    with st.expander(tr("settings.title"), expanded=not is_configured):
-        # 2-column layout: LLM | Image
+        # 2-column layout: LLM | ComfyUI
-        llm_col, image_col = st.columns(2)
+        llm_col, comfyui_col = st.columns(2)
        # ====================================================================
-        # Column 1: LLM Settings (Simplified 3-field format)
+        # Column 1: LLM Settings
        # ====================================================================
        with llm_col:
            with st.container(border=True):
@@ -195,21 +195,21 @@ def render_advanced_settings():
                )
        # ====================================================================
-        # Column 2: Image Settings
+        # Column 2: ComfyUI Settings
        # ====================================================================
-        with image_col:
+        with comfyui_col:
            with st.container(border=True):
-                st.markdown(f"**{tr('settings.image.title')}**")
+                st.markdown(f"**{tr('settings.comfyui.title')}**")
                # Get current configuration
-                image_config = config_manager.get_image_config()
+                comfyui_config = config_manager.get_comfyui_config()
                # Local/Self-hosted ComfyUI configuration
-                st.markdown(f"**{tr('settings.image.local_title')}**")
+                st.markdown(f"**{tr('settings.comfyui.local_title')}**")
                comfyui_url = st.text_input(
-                    tr("settings.image.comfyui_url"),
+                    tr("settings.comfyui.comfyui_url"),
-                    value=image_config.get("comfyui_url", "http://127.0.0.1:8188"),
+                    value=comfyui_config.get("comfyui_url", "http://127.0.0.1:8188"),
-                    help=tr("settings.image.comfyui_url_help"),
+                    help=tr("settings.comfyui.comfyui_url_help"),
                    key="comfyui_url_input"
                )
@@ -228,12 +228,12 @@ def render_advanced_settings():
                st.markdown("---")
                # RunningHub cloud configuration
-                st.markdown(f"**{tr('settings.image.cloud_title')}**")
+                st.markdown(f"**{tr('settings.comfyui.cloud_title')}**")
                runninghub_api_key = st.text_input(
-                    tr("settings.image.runninghub_api_key"),
+                    tr("settings.comfyui.runninghub_api_key"),
-                    value=image_config.get("runninghub_api_key", ""),
+                    value=comfyui_config.get("runninghub_api_key", ""),
                    type="password",
-                    help=tr("settings.image.runninghub_api_key_help"),
+                    help=tr("settings.comfyui.runninghub_api_key_help"),
                    key="runninghub_api_key_input"
                )
@@ -250,8 +250,8 @@ def render_advanced_settings():
                    if llm_api_key and llm_base_url and llm_model:
                        config_manager.set_llm_config(llm_api_key, llm_base_url, llm_model)
-                    # Save Image configuration
+                    # Save ComfyUI configuration
-                    config_manager.set_image_config(
+                    config_manager.set_comfyui_config(
                        comfyui_url=comfyui_url if comfyui_url else None,
                        runninghub_api_key=runninghub_api_key if runninghub_api_key else None
                    )
@@ -380,53 +380,12 @@ def main():
                st.info(tr("video.frames_fixed_mode_hint"))
        # ====================================================================
-        # Audio Settings (Voice + BGM)
+        # Audio Settings (BGM + TTS)
        # ====================================================================
        with st.container(border=True):
            st.markdown(f"**{tr('section.audio_settings')}**")
-            # Voice selection
+            # Background music (moved to top)
            st.markdown(f"**{tr('voice.title')}**")
            voice_id = st.selectbox(
                "Voice",
                [
                    "zh-CN-YunjianNeural",  # 男声-专业
                    "zh-CN-YunxiNeural",    # 男声-年轻
                    "zh-CN-XiaoxiaoNeural", # 女声-温柔
                    "zh-CN-XiaoyiNeural",   # 女声-活力
                ],
                format_func=lambda x: {
                    "zh-CN-YunjianNeural": tr("voice.male_professional"),
                    "zh-CN-YunxiNeural": tr("voice.male_young"),
                    "zh-CN-XiaoxiaoNeural": tr("voice.female_gentle"),
                    "zh-CN-XiaoyiNeural": tr("voice.female_energetic"),
                }[x],
                label_visibility="collapsed"
            )
            # Voice preview button
            if st.button(tr("voice.preview"), key="preview_voice", use_container_width=True):
                with st.spinner(tr("voice.previewing")):
                    try:
                        # Generate preview audio
                        preview_text = "大家好，这是一段测试语音。"
                        # Use TTS service to generate audio (auto temp path)
                        audio_path = run_async(reelforge.tts(
                            text=preview_text,
                            voice=voice_id
                        ))
                        # Play the audio
                        if os.path.exists(audio_path):
                            st.audio(audio_path, format="audio/mp3")
                        else:
                            st.error("Failed to generate preview audio")
                    except Exception as e:
                        st.error(tr("voice.preview_failed", error=str(e)))
                        logger.exception(e)
            # Background music
            st.markdown(f"**{tr('bgm.title')}**")
            st.caption(tr("bgm.custom_help"))
@@ -466,6 +425,78 @@ def main():
            # Use full filename for bgm_path (including extension)
            bgm_path = None if bgm_choice == tr("bgm.none") else bgm_choice
            # TTS Workflow selection
            st.markdown(f"**{tr('tts.title')}**")
            st.caption(tr("tts.workflow_help"))
            # Get available TTS workflows
            tts_workflows = reelforge.tts.list_workflows()
            # Build options for selectbox
            tts_workflow_options = [wf["display_name"] for wf in tts_workflows]
            tts_workflow_keys = [wf["key"] for wf in tts_workflows]
            # Default to saved workflow if exists
            default_tts_index = 0
            comfyui_config = config_manager.get_comfyui_config()
            saved_tts_workflow = comfyui_config["tts"]["default_workflow"]
            if saved_tts_workflow and saved_tts_workflow in tts_workflow_keys:
                default_tts_index = tts_workflow_keys.index(saved_tts_workflow)
            tts_workflow_display = st.selectbox(
                "TTS Workflow",
                tts_workflow_options if tts_workflow_options else ["No TTS workflows found"],
                index=default_tts_index,
                label_visibility="collapsed",
                key="tts_workflow_select"
            )
            # Get the actual workflow key
            if tts_workflow_options:
                tts_selected_index = tts_workflow_options.index(tts_workflow_display)
                tts_workflow_key = tts_workflow_keys[tts_selected_index]
            else:
                tts_workflow_key = "selfhost/tts_edge.json"  # fallback
            # TTS preview expander (similar to image preview)
            with st.expander(tr("tts.preview_title"), expanded=False):
                # Preview text input
                preview_text = st.text_input(
                    tr("tts.preview_text"),
                    value="大家好，这是一段测试语音。",
                    placeholder=tr("tts.preview_text_placeholder"),
                    key="tts_preview_text"
                )
                # Preview button
                if st.button(tr("tts.preview_button"), key="preview_tts", use_container_width=True):
                    with st.spinner(tr("tts.previewing")):
                        try:
                            # Generate preview audio using selected workflow
                            audio_path = run_async(reelforge.tts(
                                text=preview_text,
                                workflow=tts_workflow_key
                            ))
                            # Play the audio
                            if audio_path:
                                st.success(tr("tts.preview_success"))
                                if os.path.exists(audio_path):
                                    st.audio(audio_path, format="audio/mp3")
                                elif audio_path.startswith('http'):
                                    st.audio(audio_path)
                                else:
                                    st.error("Failed to generate preview audio")
                                # Show file path
                                st.caption(f"📁 {audio_path}")
                            else:
                                st.error("Failed to generate preview audio")
                        except Exception as e:
                            st.error(tr("tts.preview_failed", error=str(e)))
                            logger.exception(e)
    # ========================================================================
    # Middle Column: Visual Settings (Style & Template)
    # ========================================================================
@@ -484,8 +515,8 @@ def main():
            workflows = reelforge.image.list_workflows()
            # Build options for selectbox
-            # Display: "image_default.json - Runninghub"
+            # Display: "image_flux.json - Runninghub"
-            # Value: "runninghub/image_default.json"
+            # Value: "runninghub/image_flux.json"
            workflow_options = [wf["display_name"] for wf in workflows]
            workflow_keys = [wf["key"] for wf in workflows]
@@ -493,8 +524,8 @@ def main():
            default_workflow_index = 0
            # If user has a saved preference in config, try to match it
-            image_config = config_manager.get_image_config()
+            comfyui_config = config_manager.get_comfyui_config()
-            saved_workflow = image_config.get("default_workflow")
+            saved_workflow = comfyui_config["image"]["default_workflow"]
            if saved_workflow and saved_workflow in workflow_keys:
                default_workflow_index = workflow_keys.index(saved_workflow)
@@ -506,20 +537,19 @@ def main():
                key="image_workflow_select"
            )
-            # Get the actual workflow key (e.g., "runninghub/image_default.json")
+            # Get the actual workflow key (e.g., "runninghub/image_flux.json")
            if workflow_options:
                workflow_selected_index = workflow_options.index(workflow_display)
                workflow_key = workflow_keys[workflow_selected_index]
            else:
-                workflow_key = "runninghub/image_default.json"  # fallback
+                workflow_key = "runninghub/image_flux.json"  # fallback
            # 2. Prompt prefix input
            st.caption(tr("style.prompt_prefix"))
            # Get current prompt_prefix from config
-            image_config = config_manager.get_image_config()
+            current_prefix = comfyui_config["image"]["prompt_prefix"]
            current_prefix = image_config.get("prompt_prefix", "")
            # Prompt prefix input (temporary, not saved to config)
            prompt_prefix = st.text_area(
@@ -757,8 +787,8 @@ def main():
                        mode=mode,
                        title=title if title else None,
                        n_scenes=n_scenes,
-                        voice_id=voice_id,
+                        tts_workflow=tts_workflow_key,  # Pass TTS workflow key
-                        image_workflow=workflow_key,  # Pass workflow key (e.g., "runninghub/image_default.json")
+                        image_workflow=workflow_key,  # Pass workflow key (e.g., "runninghub/image_flux.json")
                        frame_template=frame_template,
                        prompt_prefix=prompt_prefix,  # Pass prompt_prefix
                        bgm_path=bgm_path,
--- a/web/i18n/locales/en_US.json
+++ b/web/i18n/locales/en_US.json
@@ -58,7 +58,7 @@
    "style.title": "🎨 Image Settings",
    "style.workflow": "ComfyUI Workflow",
-    "style.workflow_help": "💡 Custom: Place image_xxx.json in workflows/ folder",
+    "style.workflow_help": "💡 Custom: Place image_xxx.json in workflows/selfhost/ or workflows/runninghub/ folder",
    "style.prompt_prefix": "Style Prompt Prefix",
    "style.prompt_prefix_placeholder": "Enter style prefix (leave empty for config default)",
    "style.prompt_prefix_help": "This text will be automatically added before all image generation prompts. To permanently change, edit config.yaml",
@@ -163,18 +163,24 @@
    "settings.llm.model": "Model",
    "settings.llm.model_help": "Model name",
-    "settings.tts.title": "🎤 Text-to-Speech",
+    "settings.comfyui.title": "🔧 ComfyUI Configuration",
-    "settings.tts.provider": "Provider",
+    "settings.comfyui.local_title": "Local/Self-hosted ComfyUI",
-    "settings.tts.provider_help": "Select TTS service provider",
+    "settings.comfyui.cloud_title": "RunningHub Cloud",
-    "settings.tts.edge_info": "💡 Edge TTS is free and requires no configuration",
+    "settings.comfyui.comfyui_url": "ComfyUI Server URL",
    "settings.comfyui.comfyui_url_help": "Local or remote ComfyUI server address",
    "settings.comfyui.runninghub_api_key": "RunningHub API Key",
    "settings.comfyui.runninghub_api_key_help": "Visit https://runninghub.ai to register and get API Key",
-    "settings.image.title": "🎨 Image Generation",
+    "tts.title": "🎤 TTS Workflow",
-    "settings.image.local_title": "Local/Self-hosted ComfyUI",
+    "tts.workflow": "TTS Workflow",
-    "settings.image.cloud_title": "RunningHub Cloud",
+    "tts.workflow_help": "💡 Custom: Place tts_xxx.json in workflows/selfhost/ or workflows/runninghub/ folder",
-    "settings.image.comfyui_url": "ComfyUI Service URL",
+    "tts.preview_title": "🔍 Preview TTS",
-    "settings.image.comfyui_url_help": "Local or remote ComfyUI service URL, default: http://127.0.0.1:8188",
+    "tts.preview_text": "Preview Text",
-    "settings.image.runninghub_api_key": "RunningHub API Key",
+    "tts.preview_text_placeholder": "Enter text to preview...",
-    "settings.image.runninghub_api_key_help": "Visit https://runninghub.ai to register and get API Key",
+    "tts.preview_button": "🔊 Generate Preview",
    "tts.previewing": "Generating TTS preview...",
    "tts.preview_success": "✅ Preview generated successfully!",
    "tts.preview_failed": "❌ Preview failed: {error}",
    "settings.book.title": "📚 Book Information",
    "settings.book.provider": "Provider",
--- a/web/i18n/locales/zh_CN.json
+++ b/web/i18n/locales/zh_CN.json
@@ -58,7 +58,7 @@
    "style.title": "🎨 插图设置",
    "style.workflow": "生图工作流",
-    "style.workflow_help": "💡 自定义：将 image_xxx.json 放入 workflows/ 文件夹",
+    "style.workflow_help": "💡 自定义：将 image_xxx.json 放入 workflows/selfhost/ 或 workflows/runninghub/ 文件夹",
    "style.prompt_prefix": "风格提示词前缀",
    "style.prompt_prefix_placeholder": "输入风格前缀（留空则使用配置文件默认值）",
    "style.prompt_prefix_help": "此文本将自动添加到所有图像生成提示词之前。要永久修改，请编辑 config.yaml",
@@ -163,18 +163,24 @@
    "settings.llm.model": "Model",
    "settings.llm.model_help": "模型名称",
-    "settings.tts.title": "🎤 语音合成",
+    "settings.comfyui.title": "🔧 ComfyUI 配置",
-    "settings.tts.provider": "服务商",
+    "settings.comfyui.local_title": "本地/自建 ComfyUI",
-    "settings.tts.provider_help": "选择 TTS 服务提供商",
+    "settings.comfyui.cloud_title": "RunningHub 云端",
-    "settings.tts.edge_info": "💡 Edge TTS 是免费的，无需配置",
+    "settings.comfyui.comfyui_url": "ComfyUI 服务器地址",
    "settings.comfyui.comfyui_url_help": "本地或远程 ComfyUI 服务器地址",
    "settings.comfyui.runninghub_api_key": "RunningHub API 密钥",
    "settings.comfyui.runninghub_api_key_help": "访问 https://runninghub.ai 注册并获取 API Key",
-    "settings.image.title": "🎨 图像生成",
+    "tts.title": "🎤 TTS 工作流",
-    "settings.image.local_title": "本地/自建 ComfyUI",
+    "tts.workflow": "TTS 工作流",
-    "settings.image.cloud_title": "RunningHub 云端",
+    "tts.workflow_help": "💡 自定义：将 tts_xxx.json 放入 workflows/selfhost/ 或 workflows/runninghub/ 文件夹",
-    "settings.image.comfyui_url": "ComfyUI 服务地址",
+    "tts.preview_title": "🔍 预览 TTS",
-    "settings.image.comfyui_url_help": "本地或远程 ComfyUI 服务地址，默认: http://127.0.0.1:8188",
+    "tts.preview_text": "预览文本",
-    "settings.image.runninghub_api_key": "RunningHub API Key",
+    "tts.preview_text_placeholder": "输入要试听的文本...",
-    "settings.image.runninghub_api_key_help": "访问 https://runninghub.ai 注册并获取 API Key",
+    "tts.preview_button": "🔊 生成预览",
    "tts.previewing": "正在生成 TTS 预览...",
    "tts.preview_success": "✅ 预览生成成功！",
    "tts.preview_failed": "❌ 预览失败：{error}",
    "settings.book.title": "📚 书籍信息",
    "settings.book.provider": "服务商",
--- a/workflows/runninghub/image_flux.json
+++ b/workflows/runninghub/image_flux.json
@@ -0,0 +1,5 @@
 {
  "source": "runninghub",
  "workflow_id": "1983427617984585729"
 }
--- a/workflows/runninghub/tts_edge.json
+++ b/workflows/runninghub/tts_edge.json
@@ -0,0 +1,5 @@
 {
  "source": "runninghub",
  "workflow_id": "1983513964837543938"
 }
--- a/workflows/selfhost/image_flux.json
+++ b/workflows/selfhost/image_flux.json
@@ -0,0 +1,155 @@
 {
  "29": {
    "inputs": {
      "seed": 362283278588365,
      "steps": 20,
      "cfg": 1.5,
      "sampler_name": "euler",
      "scheduler": "simple",
      "denoise": 1,
      "model": [
        "30",
        0
      ],
      "positive": [
        "35",
        0
      ],
      "negative": [
        "33",
        0
      ],
      "latent_image": [
        "43",
        0
      ]
    },
    "class_type": "KSampler",
    "_meta": {
      "title": "KSampler"
    }
  },
  "30": {
    "inputs": {
      "ckpt_name": "flux1-dev-fp8.safetensors"
    },
    "class_type": "CheckpointLoaderSimple",
    "_meta": {
      "title": "Load Checkpoint"
    }
  },
  "31": {
    "inputs": {
      "text": [
        "46",
        0
      ],
      "clip": [
        "30",
        1
      ]
    },
    "class_type": "CLIPTextEncode",
    "_meta": {
      "title": "CLIP Text Encode (Prompt)"
    }
  },
  "33": {
    "inputs": {
      "conditioning": [
        "31",
        0
      ]
    },
    "class_type": "ConditioningZeroOut",
    "_meta": {
      "title": "ConditioningZeroOut"
    }
  },
  "35": {
    "inputs": {
      "guidance": 3.5,
      "conditioning": [
        "31",
        0
      ]
    },
    "class_type": "FluxGuidance",
    "_meta": {
      "title": "FluxGuidance"
    }
  },
  "36": {
    "inputs": {
      "filename_prefix": "ComfyUI",
      "images": [
        "37",
        0
      ]
    },
    "class_type": "SaveImage",
    "_meta": {
      "title": "Save Image"
    }
  },
  "37": {
    "inputs": {
      "samples": [
        "29",
        0
      ],
      "vae": [
        "30",
        2
      ]
    },
    "class_type": "VAEDecode",
    "_meta": {
      "title": "VAE Decode"
    }
  },
  "41": {
    "inputs": {
      "value": 512
    },
    "class_type": "easy int",
    "_meta": {
      "title": "$width.value"
    }
  },
  "42": {
    "inputs": {
      "value": 512
    },
    "class_type": "easy int",
    "_meta": {
      "title": "$height.value"
    }
  },
  "43": {
    "inputs": {
      "width": [
        "41",
        0
      ],
      "height": [
        "42",
        0
      ],
      "batch_size": 1
    },
    "class_type": "EmptyLatentImage",
    "_meta": {
      "title": "Empty Latent Image"
    }
  },
  "46": {
    "inputs": {
      "value": "a dog"
    },
    "class_type": "PrimitiveStringMultiline",
    "_meta": {
      "title": "$prompt.value!"
    }
  }
 }
--- a/workflows/selfhost/tts_edge.json
+++ b/workflows/selfhost/tts_edge.json
@@ -0,0 +1,78 @@
 {
  "1": {
    "inputs": {
      "text": [
        "3",
        0
      ],
      "voice": [
        "5",
        0
      ],
      "speed": [
        "8",
        0
      ],
      "pitch": 0
    },
    "class_type": "EdgeTTS",
    "_meta": {
      "title": "Edge TTS 🔊"
    }
  },
  "3": {
    "inputs": {
      "value": "床前明月光，疑是地上霜。"
    },
    "class_type": "PrimitiveStringMultiline",
    "_meta": {
      "title": "$text.value!"
    }
  },
  "4": {
    "inputs": {
      "filename_prefix": "audio/ComfyUI",
      "quality": "V0",
      "audioUI": "",
      "audio": [
        "1",
        0
      ]
    },
    "class_type": "SaveAudioMP3",
    "_meta": {
      "title": "Save Audio (MP3)"
    }
  },
  "5": {
    "inputs": {
      "text": "[Chinese] zh-CN Yunjian",
      "anything": [
        "7",
        0
      ]
    },
    "class_type": "easy showAnything",
    "_meta": {
      "title": "Show Any"
    }
  },
  "7": {
    "inputs": {
      "value": "[Chinese] zh-CN Yunjian"
    },
    "class_type": "PrimitiveStringMultiline",
    "_meta": {
      "title": "$voice.value"
    }
  },
  "8": {
    "inputs": {
      "value": 1
    },
    "class_type": "easy float",
    "_meta": {
      "title": "$speed.value"
    }
  }
 }