From fb18adf318883835423af64e2e39e5f40834e445 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Wed, 29 Oct 2025 21:40:37 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96tts=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 +- README.md | 2 +- config.example.yaml | 34 ++--- reelforge/config/__init__.py | 7 +- reelforge/config/manager.py | 23 +-- reelforge/config/schema.py | 23 +-- reelforge/services/comfy_base_service.py | 64 ++++---- reelforge/services/image.py | 6 +- reelforge/services/tts_service.py | 187 +++++------------------ web/app.py | 170 ++++++++++++--------- web/i18n/locales/en_US.json | 30 ++-- web/i18n/locales/zh_CN.json | 30 ++-- workflows/runninghub/image_flux.json | 5 + workflows/runninghub/tts_edge.json | 5 + workflows/selfhost/image_flux.json | 155 +++++++++++++++++++ workflows/selfhost/tts_edge.json | 78 ++++++++++ 16 files changed, 505 insertions(+), 318 deletions(-) create mode 100644 workflows/runninghub/image_flux.json create mode 100644 workflows/runninghub/tts_edge.json create mode 100644 workflows/selfhost/image_flux.json create mode 100644 workflows/selfhost/tts_edge.json diff --git a/.gitignore b/.gitignore index 479d71f..1b27369 100644 --- a/.gitignore +++ b/.gitignore @@ -71,9 +71,7 @@ examples/ repositories/ # Workflows - ignore user customizations but keep defaults -workflows/* -!workflows/*_default.json -!workflows/README.md + # Templates - ignore user customizations but keep presets templates/* diff --git a/README.md b/README.md index 5057d58..f5fa33f 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ uv run streamlit run web/app.py **ComfyUI 工作流** - 选择图像生成的工作流文件 -- 默认使用 `image_default.json` +- 默认使用 `image_flux.json` - 如果懂 ComfyUI,可以放自己的工作流到 `workflows/` 文件夹 **提示词前缀(Prompt Prefix)** diff --git a/config.example.yaml b/config.example.yaml index ad581db..0826440 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -17,22 +17,22 @@ llm: # DeepSeek: base_url: "https://api.deepseek.com" model: "deepseek-chat" # Ollama (Local): base_url: "http://localhost:11434/v1" model: "llama3.2" -# ==================== TTS Configuration ==================== -tts: - default: edge # "edge" (free) or "tts_xxx.json" (ComfyUI workflow) - -# ==================== Image Generation Configuration ==================== -image: - # Required: Default workflow to use (no fallback) - # Options: runninghub/image_default.json (recommended, no local setup) - # selfhost/image_default.json (requires local ComfyUI) - default_workflow: runninghub/image_default.json +# ==================== ComfyUI Configuration ==================== +comfyui: + # Global ComfyUI settings + comfyui_url: http://127.0.0.1:8188 # ComfyUI server URL (required for selfhost workflows) + runninghub_api_key: "" # RunningHub API key (required for runninghub workflows) - # Local ComfyUI configuration (required if using selfhost workflows) - comfyui_url: http://127.0.0.1:8188 + # TTS-specific configuration + tts: + default: selfhost/tts_edge.json # TTS workflow to use - # RunningHub cloud configuration (required if using runninghub workflows) - runninghub_api_key: "" - - # Image prompt prefix (optional) - prompt_prefix: "Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines" + # Image-specific configuration + image: + # Required: Default workflow to use (no fallback) + # Options: runninghub/image_flux.json (recommended, no local setup) + # selfhost/image_flux.json (requires local ComfyUI) + default_workflow: runninghub/image_flux.json + + # Image prompt prefix (optional) + prompt_prefix: "Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines" diff --git a/reelforge/config/__init__.py b/reelforge/config/__init__.py index c8ded09..6540af2 100644 --- a/reelforge/config/__init__.py +++ b/reelforge/config/__init__.py @@ -17,7 +17,7 @@ Usage: if config_manager.validate(): print("Config is valid!") """ -from .schema import ReelForgeConfig, LLMConfig, TTSConfig, ImageConfig +from .schema import ReelForgeConfig, LLMConfig, ComfyUIConfig, TTSSubConfig, ImageSubConfig from .manager import ConfigManager from .loader import load_config_dict, save_config_dict @@ -27,8 +27,9 @@ config_manager = ConfigManager() __all__ = [ "ReelForgeConfig", "LLMConfig", - "TTSConfig", - "ImageConfig", + "ComfyUIConfig", + "TTSSubConfig", + "ImageSubConfig", "ConfigManager", "config_manager", "load_config_dict", diff --git a/reelforge/config/manager.py b/reelforge/config/manager.py index ccb1928..1d31cd1 100644 --- a/reelforge/config/manager.py +++ b/reelforge/config/manager.py @@ -93,21 +93,26 @@ class ConfigManager: } }) - def get_image_config(self) -> dict: - """Get image configuration as dict""" + def get_comfyui_config(self) -> dict: + """Get ComfyUI configuration as dict""" return { - "default_workflow": self.config.image.default_workflow, - "comfyui_url": self.config.image.comfyui_url, - "runninghub_api_key": self.config.image.runninghub_api_key, - "prompt_prefix": self.config.image.prompt_prefix, + "comfyui_url": self.config.comfyui.comfyui_url, + "runninghub_api_key": self.config.comfyui.runninghub_api_key, + "tts": { + "default_workflow": self.config.comfyui.tts.default_workflow, + }, + "image": { + "default_workflow": self.config.comfyui.image.default_workflow, + "prompt_prefix": self.config.comfyui.image.prompt_prefix, + } } - def set_image_config( + def set_comfyui_config( self, comfyui_url: Optional[str] = None, runninghub_api_key: Optional[str] = None ): - """Set image configuration""" + """Set ComfyUI global configuration""" updates = {} if comfyui_url is not None: updates["comfyui_url"] = comfyui_url @@ -115,5 +120,5 @@ class ConfigManager: updates["runninghub_api_key"] = runninghub_api_key if updates: - self.update({"image": updates}) + self.update({"comfyui": updates}) diff --git a/reelforge/config/schema.py b/reelforge/config/schema.py index bedeceb..7a61900 100644 --- a/reelforge/config/schema.py +++ b/reelforge/config/schema.py @@ -13,32 +13,37 @@ class LLMConfig(BaseModel): model: str = Field(default="", description="LLM Model Name") -class TTSConfig(BaseModel): - """TTS configuration""" +class TTSSubConfig(BaseModel): + """TTS-specific configuration (under comfyui.tts)""" model_config = {"populate_by_name": True} # Allow both field name and alias - default_workflow: str = Field(default="edge", description="Default TTS workflow", alias="default") + default_workflow: str = Field(default=None, description="Default TTS workflow (required, no fallback)", alias="default") -class ImageConfig(BaseModel): - """Image generation configuration""" +class ImageSubConfig(BaseModel): + """Image-specific configuration (under comfyui.image)""" model_config = {"populate_by_name": True} # Allow both field name and alias default_workflow: str = Field(default=None, description="Default image workflow (required, no fallback)", alias="default") - comfyui_url: str = Field(default="http://127.0.0.1:8188", description="ComfyUI Server URL") - runninghub_api_key: str = Field(default="", description="RunningHub API Key (optional)") prompt_prefix: str = Field( default="Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines", description="Prompt prefix for all image generation" ) +class ComfyUIConfig(BaseModel): + """ComfyUI configuration (includes global settings and service-specific configs)""" + comfyui_url: str = Field(default="http://127.0.0.1:8188", description="ComfyUI Server URL") + runninghub_api_key: str = Field(default="", description="RunningHub API Key (optional)") + tts: TTSSubConfig = Field(default_factory=TTSSubConfig, description="TTS-specific configuration") + image: ImageSubConfig = Field(default_factory=ImageSubConfig, description="Image-specific configuration") + + class ReelForgeConfig(BaseModel): """ReelForge main configuration""" project_name: str = Field(default="ReelForge", description="Project name") llm: LLMConfig = Field(default_factory=LLMConfig) - tts: TTSConfig = Field(default_factory=TTSConfig) - image: ImageConfig = Field(default_factory=ImageConfig) + comfyui: ComfyUIConfig = Field(default_factory=ComfyUIConfig) def is_llm_configured(self) -> bool: """Check if LLM is properly configured""" diff --git a/reelforge/services/comfy_base_service.py b/reelforge/services/comfy_base_service.py index 9456558..8d659c8 100644 --- a/reelforge/services/comfy_base_service.py +++ b/reelforge/services/comfy_base_service.py @@ -19,7 +19,7 @@ class ComfyBaseService: Subclasses should define: - WORKFLOW_PREFIX: Prefix for workflow files (e.g., "image_", "tts_") - - DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_default.json") + - DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_flux.json") - WORKFLOWS_DIR: Directory containing workflows (default: "workflows") """ @@ -35,7 +35,13 @@ class ComfyBaseService: config: Full application config dict service_name: Service name in config (e.g., "tts", "image") """ - self.config = config.get(service_name, {}) + # Service-specific config (e.g., config["comfyui"]["tts"]) + comfyui_config = config.get("comfyui", {}) + self.config = comfyui_config.get(service_name, {}) + + # Global ComfyUI config (for comfyui_url and runninghub_api_key) + self.global_config = comfyui_config + self.service_name = service_name self._workflows_cache: Optional[List[str]] = None @@ -47,18 +53,18 @@ class ComfyBaseService: List of workflow info dicts Example: [ { - "name": "image_default.json", - "display_name": "image_default.json - Selfhost", + "name": "image_flux.json", + "display_name": "image_flux.json - Selfhost", "source": "selfhost", - "path": "workflows/selfhost/image_default.json", - "key": "selfhost/image_default.json" + "path": "workflows/selfhost/image_flux.json", + "key": "selfhost/image_flux.json" }, { - "name": "image_default.json", - "display_name": "image_default.json - Runninghub", + "name": "image_flux.json", + "display_name": "image_flux.json - Runninghub", "source": "runninghub", - "path": "workflows/runninghub/image_default.json", - "key": "runninghub/image_default.json", + "path": "workflows/runninghub/image_flux.json", + "key": "runninghub/image_flux.json", "workflow_id": "123456" } ] @@ -101,11 +107,11 @@ class ComfyBaseService: Returns: Workflow info dict with structure: { - "name": "image_default.json", - "display_name": "image_default.json - Runninghub", + "name": "image_flux.json", + "display_name": "image_flux.json - Runninghub", "source": "runninghub", - "path": "workflows/runninghub/image_default.json", - "key": "runninghub/image_default.json", + "path": "workflows/runninghub/image_flux.json", + "key": "runninghub/image_flux.json", "workflow_id": "123456" # Only for RunningHub } """ @@ -134,7 +140,7 @@ class ComfyBaseService: Get default workflow from config (required, no fallback) Returns: - Default workflow key (e.g., "runninghub/image_default.json") + Default workflow key (e.g., "runninghub/image_flux.json") Raises: ValueError: If default_workflow not configured @@ -155,17 +161,17 @@ class ComfyBaseService: Resolve workflow key to workflow info Args: - workflow: Workflow key (e.g., "runninghub/image_default.json") + workflow: Workflow key (e.g., "runninghub/image_flux.json") If None, uses default from config Returns: Workflow info dict with structure: { - "name": "image_default.json", - "display_name": "image_default.json - Runninghub", + "name": "image_flux.json", + "display_name": "image_flux.json - Runninghub", "source": "runninghub", - "path": "workflows/runninghub/image_default.json", - "key": "runninghub/image_default.json", + "path": "workflows/runninghub/image_flux.json", + "key": "runninghub/image_flux.json", "workflow_id": "123456" # Only for RunningHub } @@ -210,19 +216,19 @@ class ComfyBaseService: """ kit_config = {} - # ComfyUI URL (priority: param > config > env > default) + # ComfyUI URL (priority: param > global config > env > default) final_comfyui_url = ( comfyui_url - or self.config.get("comfyui_url") + or self.global_config.get("comfyui_url") or os.getenv("COMFYUI_BASE_URL") or "http://127.0.0.1:8188" ) kit_config["comfyui_url"] = final_comfyui_url - # RunningHub API key (priority: param > config > env) + # RunningHub API key (priority: param > global config > env) final_rh_key = ( runninghub_api_key - or self.config.get("runninghub_api_key") + or self.global_config.get("runninghub_api_key") or os.getenv("RUNNINGHUB_API_KEY") ) if final_rh_key: @@ -242,11 +248,11 @@ class ComfyBaseService: workflows = service.list_workflows() # [ # { - # "name": "image_default.json", - # "display_name": "image_default.json - Runninghub", + # "name": "image_flux.json", + # "display_name": "image_flux.json - Runninghub", # "source": "runninghub", - # "path": "workflows/runninghub/image_default.json", - # "key": "runninghub/image_default.json", + # "path": "workflows/runninghub/image_flux.json", + # "key": "runninghub/image_flux.json", # "workflow_id": "123456" # }, # ... @@ -260,7 +266,7 @@ class ComfyBaseService: List available workflow keys Returns: - List of available workflow keys (e.g., ["runninghub/image_default.json", ...]) + List of available workflow keys (e.g., ["runninghub/image_flux.json", ...]) Example: print(f"Available workflows: {service.available}") diff --git a/reelforge/services/image.py b/reelforge/services/image.py index 086f6c4..50e64b8 100644 --- a/reelforge/services/image.py +++ b/reelforge/services/image.py @@ -17,7 +17,7 @@ class ImageService(ComfyBaseService): Uses ComfyKit to execute image generation workflows. Usage: - # Use default workflow (workflows/image_default.json) + # Use default workflow (workflows/image_flux.json) image_url = await reelforge.image(prompt="a cat") # Use specific workflow @@ -65,7 +65,7 @@ class ImageService(ComfyBaseService): Args: prompt: Image generation prompt - workflow: Workflow filename (default: from config or "image_default.json") + workflow: Workflow filename (default: from config or "image_flux.json") comfyui_url: ComfyUI URL (optional, overrides config) runninghub_api_key: RunningHub API key (optional, overrides config) width: Image width @@ -81,7 +81,7 @@ class ImageService(ComfyBaseService): Generated image URL/path Examples: - # Simplest: use default workflow (workflows/image_default.json) + # Simplest: use default workflow (workflows/image_flux.json) image_url = await reelforge.image(prompt="a beautiful cat") # Use specific workflow diff --git a/reelforge/services/tts_service.py b/reelforge/services/tts_service.py index 6cf42a0..6f4d11f 100644 --- a/reelforge/services/tts_service.py +++ b/reelforge/services/tts_service.py @@ -1,39 +1,29 @@ """ -TTS (Text-to-Speech) Service - Dual implementation (Edge TTS + ComfyUI) +TTS (Text-to-Speech) Service - ComfyUI Workflow-based implementation """ -import uuid from typing import Optional from comfykit import ComfyKit from loguru import logger from reelforge.services.comfy_base_service import ComfyBaseService -from reelforge.utils.os_util import get_temp_path class TTSService(ComfyBaseService): """ - TTS (Text-to-Speech) service - Dual implementation + TTS (Text-to-Speech) service - Workflow-based - Supports two TTS methods: - 1. Edge TTS (default) - Free, local SDK, no workflow needed - 2. ComfyUI Workflow - Workflow-based, requires ComfyUI setup + Uses ComfyKit to execute TTS workflows. Usage: - # Use default (edge-tts) + # Use default workflow audio_path = await reelforge.tts(text="Hello, world!") - # Explicitly use edge-tts + # Use specific workflow audio_path = await reelforge.tts( text="你好,世界!", - workflow="edge" - ) - - # Use ComfyUI workflow - audio_path = await reelforge.tts( - text="Hello", - workflow="tts_comfyui.json" + workflow="tts_edge.json" ) # List available workflows @@ -41,12 +31,9 @@ class TTSService(ComfyBaseService): """ WORKFLOW_PREFIX = "tts_" - DEFAULT_WORKFLOW = "edge" # Default to edge-tts + DEFAULT_WORKFLOW = None # No hardcoded default, must be configured WORKFLOWS_DIR = "workflows" - # Built-in providers (not workflow files) - BUILTIN_PROVIDERS = ["edge", "edge-tts"] - def __init__(self, config: dict): """ Initialize TTS service @@ -56,81 +43,53 @@ class TTSService(ComfyBaseService): """ super().__init__(config, service_name="tts") - def _resolve_workflow(self, workflow: Optional[str] = None) -> str: - """ - Resolve workflow to actual workflow path or provider name - - Args: - workflow: Workflow filename or provider name (e.g., "edge", "tts_default.json") - - Returns: - Workflow file path or provider name - """ - # 1. If not specified, use default - if workflow is None: - workflow = self._get_default_workflow() - - # 2. If it's a built-in provider, return as-is - if workflow in self.BUILTIN_PROVIDERS: - logger.debug(f"Using built-in TTS provider: {workflow}") - return workflow - - # 3. Otherwise, treat as workflow file (use parent logic) - return super()._resolve_workflow(workflow) async def __call__( self, text: str, workflow: Optional[str] = None, - # ComfyUI connection (optional overrides, only for workflow mode) + # ComfyUI connection (optional overrides) comfyui_url: Optional[str] = None, runninghub_api_key: Optional[str] = None, - # Common TTS parameters (work for both edge-tts and workflows) + # TTS parameters voice: Optional[str] = None, - rate: Optional[str] = None, - volume: Optional[str] = None, - pitch: Optional[str] = None, + speed: float = 1.0, # Output path output_path: Optional[str] = None, **params ) -> str: """ - Generate speech using edge-tts or ComfyUI workflow + Generate speech using ComfyUI workflow Args: text: Text to convert to speech - workflow: Workflow filename or provider name (default: "edge") - - "edge" or "edge-tts": Use local edge-tts SDK - - "tts_xxx.json": Use ComfyUI workflow - - Absolute path/URL/RunningHub ID: Also supported - comfyui_url: ComfyUI URL (only for workflow mode) - runninghub_api_key: RunningHub API key (only for workflow mode) - voice: Voice ID - rate: Speech rate (e.g., "+0%", "+50%", "-20%") - volume: Speech volume (e.g., "+0%") - pitch: Speech pitch (e.g., "+0Hz") + workflow: Workflow filename (default: from config) + comfyui_url: ComfyUI URL (optional, overrides config) + runninghub_api_key: RunningHub API key (optional, overrides config) + voice: Voice ID (workflow-specific) + speed: Speech speed multiplier (1.0 = normal, >1.0 = faster, <1.0 = slower) output_path: Custom output path (auto-generated if None) - **params: Additional parameters + **params: Additional workflow parameters Returns: Generated audio file path Examples: - # Simplest: use default (edge-tts) + # Simplest: use default workflow audio_path = await reelforge.tts(text="Hello, world!") - # Explicitly use edge-tts with parameters + # Use specific workflow audio_path = await reelforge.tts( text="你好,世界!", - workflow="edge", - voice="zh-CN-XiaoxiaoNeural", - rate="+20%" + workflow="tts_edge.json" ) - # Use ComfyUI workflow + # With voice and speed audio_path = await reelforge.tts( text="Hello", - workflow="tts_default.json" + workflow="tts_edge.json", + voice="zh-CN-XiaoxiaoNeural", + speed=1.2 ) # With absolute path @@ -138,92 +97,28 @@ class TTSService(ComfyBaseService): text="Hello", workflow="/path/to/custom_tts.json" ) - """ - # 1. Check if it's a builtin provider (edge-tts) - if workflow in self.BUILTIN_PROVIDERS or workflow is None and self._get_default_workflow() in self.BUILTIN_PROVIDERS: - # Use edge-tts - return await self._call_edge_tts( - text=text, - voice=voice, - rate=rate, - volume=volume, - pitch=pitch, - output_path=output_path, - **params + + # With custom ComfyUI server + audio_path = await reelforge.tts( + text="Hello", + comfyui_url="http://192.168.1.100:8188" ) - - # 2. Use ComfyUI workflow - resolve to structured info + """ + # 1. Resolve workflow (returns structured info) workflow_info = self._resolve_workflow(workflow=workflow) + # 2. Execute ComfyUI workflow return await self._call_comfyui_workflow( workflow_info=workflow_info, text=text, comfyui_url=comfyui_url, runninghub_api_key=runninghub_api_key, voice=voice, - rate=rate, - volume=volume, - pitch=pitch, + speed=speed, output_path=output_path, **params ) - async def _call_edge_tts( - self, - text: str, - voice: Optional[str] = None, - rate: Optional[str] = None, - volume: Optional[str] = None, - pitch: Optional[str] = None, - output_path: Optional[str] = None, - **params - ) -> str: - """ - Generate speech using edge-tts SDK - - Args: - text: Text to convert to speech - voice: Voice ID (default: zh-CN-YunjianNeural) - rate: Speech rate (default: +0%) - volume: Speech volume (default: +0%) - pitch: Speech pitch (default: +0Hz) - output_path: Custom output path (auto-generated if None) - **params: Additional parameters (e.g., retry_count, retry_delay) - - Returns: - Generated audio file path - """ - from reelforge.utils.tts_util import edge_tts - - logger.info(f"🎙️ Using edge-tts (local SDK)") - - # Generate output path (use provided path or auto-generate) - if output_path is None: - output_path = get_temp_path(f"{uuid.uuid4().hex}.mp3") - else: - # Ensure parent directory exists - import os - os.makedirs(os.path.dirname(output_path), exist_ok=True) - - # Call edge-tts with output_path to save directly - try: - audio_bytes = await edge_tts( - text=text, - voice=voice or "zh-CN-YunjianNeural", - rate=rate or "+0%", - volume=volume or "+0%", - pitch=pitch or "+0Hz", - output_path=output_path, - **params - ) - - logger.info(f"✅ Generated audio (edge-tts): {output_path}") - return output_path - - except Exception as e: - logger.error(f"Edge TTS generation error: {e}") - raise - async def _call_comfyui_workflow( self, workflow_info: dict, @@ -231,9 +126,7 @@ class TTSService(ComfyBaseService): comfyui_url: Optional[str] = None, runninghub_api_key: Optional[str] = None, voice: Optional[str] = None, - rate: Optional[str] = None, - volume: Optional[str] = None, - pitch: Optional[str] = None, + speed: float = 1.0, output_path: Optional[str] = None, **params ) -> str: @@ -246,9 +139,7 @@ class TTSService(ComfyBaseService): comfyui_url: ComfyUI URL runninghub_api_key: RunningHub API key voice: Voice ID (workflow-specific) - rate: Speech rate (workflow-specific) - volume: Speech volume (workflow-specific) - pitch: Speech pitch (workflow-specific) + speed: Speech speed multiplier (workflow-specific) output_path: Custom output path (downloads if URL returned) **params: Additional workflow parameters @@ -269,12 +160,8 @@ class TTSService(ComfyBaseService): # Add optional TTS parameters if voice is not None: workflow_params["voice"] = voice - if rate is not None: - workflow_params["rate"] = rate - if volume is not None: - workflow_params["volume"] = volume - if pitch is not None: - workflow_params["pitch"] = pitch + if speed != 1.0: + workflow_params["speed"] = speed # Add any additional parameters workflow_params.update(params) diff --git a/web/app.py b/web/app.py index 24db05b..23bfc3e 100644 --- a/web/app.py +++ b/web/app.py @@ -100,11 +100,11 @@ def render_advanced_settings(): # Expand if not configured, collapse if configured with st.expander(tr("settings.title"), expanded=not is_configured): - # 2-column layout: LLM | Image - llm_col, image_col = st.columns(2) + # 2-column layout: LLM | ComfyUI + llm_col, comfyui_col = st.columns(2) # ==================================================================== - # Column 1: LLM Settings (Simplified 3-field format) + # Column 1: LLM Settings # ==================================================================== with llm_col: with st.container(border=True): @@ -195,21 +195,21 @@ def render_advanced_settings(): ) # ==================================================================== - # Column 2: Image Settings + # Column 2: ComfyUI Settings # ==================================================================== - with image_col: + with comfyui_col: with st.container(border=True): - st.markdown(f"**{tr('settings.image.title')}**") + st.markdown(f"**{tr('settings.comfyui.title')}**") # Get current configuration - image_config = config_manager.get_image_config() + comfyui_config = config_manager.get_comfyui_config() # Local/Self-hosted ComfyUI configuration - st.markdown(f"**{tr('settings.image.local_title')}**") + st.markdown(f"**{tr('settings.comfyui.local_title')}**") comfyui_url = st.text_input( - tr("settings.image.comfyui_url"), - value=image_config.get("comfyui_url", "http://127.0.0.1:8188"), - help=tr("settings.image.comfyui_url_help"), + tr("settings.comfyui.comfyui_url"), + value=comfyui_config.get("comfyui_url", "http://127.0.0.1:8188"), + help=tr("settings.comfyui.comfyui_url_help"), key="comfyui_url_input" ) @@ -228,12 +228,12 @@ def render_advanced_settings(): st.markdown("---") # RunningHub cloud configuration - st.markdown(f"**{tr('settings.image.cloud_title')}**") + st.markdown(f"**{tr('settings.comfyui.cloud_title')}**") runninghub_api_key = st.text_input( - tr("settings.image.runninghub_api_key"), - value=image_config.get("runninghub_api_key", ""), + tr("settings.comfyui.runninghub_api_key"), + value=comfyui_config.get("runninghub_api_key", ""), type="password", - help=tr("settings.image.runninghub_api_key_help"), + help=tr("settings.comfyui.runninghub_api_key_help"), key="runninghub_api_key_input" ) @@ -250,8 +250,8 @@ def render_advanced_settings(): if llm_api_key and llm_base_url and llm_model: config_manager.set_llm_config(llm_api_key, llm_base_url, llm_model) - # Save Image configuration - config_manager.set_image_config( + # Save ComfyUI configuration + config_manager.set_comfyui_config( comfyui_url=comfyui_url if comfyui_url else None, runninghub_api_key=runninghub_api_key if runninghub_api_key else None ) @@ -380,53 +380,12 @@ def main(): st.info(tr("video.frames_fixed_mode_hint")) # ==================================================================== - # Audio Settings (Voice + BGM) + # Audio Settings (BGM + TTS) # ==================================================================== with st.container(border=True): st.markdown(f"**{tr('section.audio_settings')}**") - # Voice selection - st.markdown(f"**{tr('voice.title')}**") - voice_id = st.selectbox( - "Voice", - [ - "zh-CN-YunjianNeural", # 男声-专业 - "zh-CN-YunxiNeural", # 男声-年轻 - "zh-CN-XiaoxiaoNeural", # 女声-温柔 - "zh-CN-XiaoyiNeural", # 女声-活力 - ], - format_func=lambda x: { - "zh-CN-YunjianNeural": tr("voice.male_professional"), - "zh-CN-YunxiNeural": tr("voice.male_young"), - "zh-CN-XiaoxiaoNeural": tr("voice.female_gentle"), - "zh-CN-XiaoyiNeural": tr("voice.female_energetic"), - }[x], - label_visibility="collapsed" - ) - - # Voice preview button - if st.button(tr("voice.preview"), key="preview_voice", use_container_width=True): - with st.spinner(tr("voice.previewing")): - try: - # Generate preview audio - preview_text = "大家好,这是一段测试语音。" - - # Use TTS service to generate audio (auto temp path) - audio_path = run_async(reelforge.tts( - text=preview_text, - voice=voice_id - )) - - # Play the audio - if os.path.exists(audio_path): - st.audio(audio_path, format="audio/mp3") - else: - st.error("Failed to generate preview audio") - except Exception as e: - st.error(tr("voice.preview_failed", error=str(e))) - logger.exception(e) - - # Background music + # Background music (moved to top) st.markdown(f"**{tr('bgm.title')}**") st.caption(tr("bgm.custom_help")) @@ -465,6 +424,78 @@ def main(): # Use full filename for bgm_path (including extension) bgm_path = None if bgm_choice == tr("bgm.none") else bgm_choice + + + # TTS Workflow selection + st.markdown(f"**{tr('tts.title')}**") + st.caption(tr("tts.workflow_help")) + + # Get available TTS workflows + tts_workflows = reelforge.tts.list_workflows() + + # Build options for selectbox + tts_workflow_options = [wf["display_name"] for wf in tts_workflows] + tts_workflow_keys = [wf["key"] for wf in tts_workflows] + + # Default to saved workflow if exists + default_tts_index = 0 + comfyui_config = config_manager.get_comfyui_config() + saved_tts_workflow = comfyui_config["tts"]["default_workflow"] + if saved_tts_workflow and saved_tts_workflow in tts_workflow_keys: + default_tts_index = tts_workflow_keys.index(saved_tts_workflow) + + tts_workflow_display = st.selectbox( + "TTS Workflow", + tts_workflow_options if tts_workflow_options else ["No TTS workflows found"], + index=default_tts_index, + label_visibility="collapsed", + key="tts_workflow_select" + ) + + # Get the actual workflow key + if tts_workflow_options: + tts_selected_index = tts_workflow_options.index(tts_workflow_display) + tts_workflow_key = tts_workflow_keys[tts_selected_index] + else: + tts_workflow_key = "selfhost/tts_edge.json" # fallback + + # TTS preview expander (similar to image preview) + with st.expander(tr("tts.preview_title"), expanded=False): + # Preview text input + preview_text = st.text_input( + tr("tts.preview_text"), + value="大家好,这是一段测试语音。", + placeholder=tr("tts.preview_text_placeholder"), + key="tts_preview_text" + ) + + # Preview button + if st.button(tr("tts.preview_button"), key="preview_tts", use_container_width=True): + with st.spinner(tr("tts.previewing")): + try: + # Generate preview audio using selected workflow + audio_path = run_async(reelforge.tts( + text=preview_text, + workflow=tts_workflow_key + )) + + # Play the audio + if audio_path: + st.success(tr("tts.preview_success")) + if os.path.exists(audio_path): + st.audio(audio_path, format="audio/mp3") + elif audio_path.startswith('http'): + st.audio(audio_path) + else: + st.error("Failed to generate preview audio") + + # Show file path + st.caption(f"📁 {audio_path}") + else: + st.error("Failed to generate preview audio") + except Exception as e: + st.error(tr("tts.preview_failed", error=str(e))) + logger.exception(e) # ======================================================================== # Middle Column: Visual Settings (Style & Template) @@ -484,8 +515,8 @@ def main(): workflows = reelforge.image.list_workflows() # Build options for selectbox - # Display: "image_default.json - Runninghub" - # Value: "runninghub/image_default.json" + # Display: "image_flux.json - Runninghub" + # Value: "runninghub/image_flux.json" workflow_options = [wf["display_name"] for wf in workflows] workflow_keys = [wf["key"] for wf in workflows] @@ -493,8 +524,8 @@ def main(): default_workflow_index = 0 # If user has a saved preference in config, try to match it - image_config = config_manager.get_image_config() - saved_workflow = image_config.get("default_workflow") + comfyui_config = config_manager.get_comfyui_config() + saved_workflow = comfyui_config["image"]["default_workflow"] if saved_workflow and saved_workflow in workflow_keys: default_workflow_index = workflow_keys.index(saved_workflow) @@ -506,20 +537,19 @@ def main(): key="image_workflow_select" ) - # Get the actual workflow key (e.g., "runninghub/image_default.json") + # Get the actual workflow key (e.g., "runninghub/image_flux.json") if workflow_options: workflow_selected_index = workflow_options.index(workflow_display) workflow_key = workflow_keys[workflow_selected_index] else: - workflow_key = "runninghub/image_default.json" # fallback + workflow_key = "runninghub/image_flux.json" # fallback # 2. Prompt prefix input st.caption(tr("style.prompt_prefix")) # Get current prompt_prefix from config - image_config = config_manager.get_image_config() - current_prefix = image_config.get("prompt_prefix", "") + current_prefix = comfyui_config["image"]["prompt_prefix"] # Prompt prefix input (temporary, not saved to config) prompt_prefix = st.text_area( @@ -757,8 +787,8 @@ def main(): mode=mode, title=title if title else None, n_scenes=n_scenes, - voice_id=voice_id, - image_workflow=workflow_key, # Pass workflow key (e.g., "runninghub/image_default.json") + tts_workflow=tts_workflow_key, # Pass TTS workflow key + image_workflow=workflow_key, # Pass workflow key (e.g., "runninghub/image_flux.json") frame_template=frame_template, prompt_prefix=prompt_prefix, # Pass prompt_prefix bgm_path=bgm_path, diff --git a/web/i18n/locales/en_US.json b/web/i18n/locales/en_US.json index 8855122..c5f6567 100644 --- a/web/i18n/locales/en_US.json +++ b/web/i18n/locales/en_US.json @@ -58,7 +58,7 @@ "style.title": "🎨 Image Settings", "style.workflow": "ComfyUI Workflow", - "style.workflow_help": "💡 Custom: Place image_xxx.json in workflows/ folder", + "style.workflow_help": "💡 Custom: Place image_xxx.json in workflows/selfhost/ or workflows/runninghub/ folder", "style.prompt_prefix": "Style Prompt Prefix", "style.prompt_prefix_placeholder": "Enter style prefix (leave empty for config default)", "style.prompt_prefix_help": "This text will be automatically added before all image generation prompts. To permanently change, edit config.yaml", @@ -163,18 +163,24 @@ "settings.llm.model": "Model", "settings.llm.model_help": "Model name", - "settings.tts.title": "🎤 Text-to-Speech", - "settings.tts.provider": "Provider", - "settings.tts.provider_help": "Select TTS service provider", - "settings.tts.edge_info": "💡 Edge TTS is free and requires no configuration", + "settings.comfyui.title": "🔧 ComfyUI Configuration", + "settings.comfyui.local_title": "Local/Self-hosted ComfyUI", + "settings.comfyui.cloud_title": "RunningHub Cloud", + "settings.comfyui.comfyui_url": "ComfyUI Server URL", + "settings.comfyui.comfyui_url_help": "Local or remote ComfyUI server address", + "settings.comfyui.runninghub_api_key": "RunningHub API Key", + "settings.comfyui.runninghub_api_key_help": "Visit https://runninghub.ai to register and get API Key", - "settings.image.title": "🎨 Image Generation", - "settings.image.local_title": "Local/Self-hosted ComfyUI", - "settings.image.cloud_title": "RunningHub Cloud", - "settings.image.comfyui_url": "ComfyUI Service URL", - "settings.image.comfyui_url_help": "Local or remote ComfyUI service URL, default: http://127.0.0.1:8188", - "settings.image.runninghub_api_key": "RunningHub API Key", - "settings.image.runninghub_api_key_help": "Visit https://runninghub.ai to register and get API Key", + "tts.title": "🎤 TTS Workflow", + "tts.workflow": "TTS Workflow", + "tts.workflow_help": "💡 Custom: Place tts_xxx.json in workflows/selfhost/ or workflows/runninghub/ folder", + "tts.preview_title": "🔍 Preview TTS", + "tts.preview_text": "Preview Text", + "tts.preview_text_placeholder": "Enter text to preview...", + "tts.preview_button": "🔊 Generate Preview", + "tts.previewing": "Generating TTS preview...", + "tts.preview_success": "✅ Preview generated successfully!", + "tts.preview_failed": "❌ Preview failed: {error}", "settings.book.title": "📚 Book Information", "settings.book.provider": "Provider", diff --git a/web/i18n/locales/zh_CN.json b/web/i18n/locales/zh_CN.json index 792ff51..360cc52 100644 --- a/web/i18n/locales/zh_CN.json +++ b/web/i18n/locales/zh_CN.json @@ -58,7 +58,7 @@ "style.title": "🎨 插图设置", "style.workflow": "生图工作流", - "style.workflow_help": "💡 自定义:将 image_xxx.json 放入 workflows/ 文件夹", + "style.workflow_help": "💡 自定义:将 image_xxx.json 放入 workflows/selfhost/ 或 workflows/runninghub/ 文件夹", "style.prompt_prefix": "风格提示词前缀", "style.prompt_prefix_placeholder": "输入风格前缀(留空则使用配置文件默认值)", "style.prompt_prefix_help": "此文本将自动添加到所有图像生成提示词之前。要永久修改,请编辑 config.yaml", @@ -163,18 +163,24 @@ "settings.llm.model": "Model", "settings.llm.model_help": "模型名称", - "settings.tts.title": "🎤 语音合成", - "settings.tts.provider": "服务商", - "settings.tts.provider_help": "选择 TTS 服务提供商", - "settings.tts.edge_info": "💡 Edge TTS 是免费的,无需配置", + "settings.comfyui.title": "🔧 ComfyUI 配置", + "settings.comfyui.local_title": "本地/自建 ComfyUI", + "settings.comfyui.cloud_title": "RunningHub 云端", + "settings.comfyui.comfyui_url": "ComfyUI 服务器地址", + "settings.comfyui.comfyui_url_help": "本地或远程 ComfyUI 服务器地址", + "settings.comfyui.runninghub_api_key": "RunningHub API 密钥", + "settings.comfyui.runninghub_api_key_help": "访问 https://runninghub.ai 注册并获取 API Key", - "settings.image.title": "🎨 图像生成", - "settings.image.local_title": "本地/自建 ComfyUI", - "settings.image.cloud_title": "RunningHub 云端", - "settings.image.comfyui_url": "ComfyUI 服务地址", - "settings.image.comfyui_url_help": "本地或远程 ComfyUI 服务地址,默认: http://127.0.0.1:8188", - "settings.image.runninghub_api_key": "RunningHub API Key", - "settings.image.runninghub_api_key_help": "访问 https://runninghub.ai 注册并获取 API Key", + "tts.title": "🎤 TTS 工作流", + "tts.workflow": "TTS 工作流", + "tts.workflow_help": "💡 自定义:将 tts_xxx.json 放入 workflows/selfhost/ 或 workflows/runninghub/ 文件夹", + "tts.preview_title": "🔍 预览 TTS", + "tts.preview_text": "预览文本", + "tts.preview_text_placeholder": "输入要试听的文本...", + "tts.preview_button": "🔊 生成预览", + "tts.previewing": "正在生成 TTS 预览...", + "tts.preview_success": "✅ 预览生成成功!", + "tts.preview_failed": "❌ 预览失败:{error}", "settings.book.title": "📚 书籍信息", "settings.book.provider": "服务商", diff --git a/workflows/runninghub/image_flux.json b/workflows/runninghub/image_flux.json new file mode 100644 index 0000000..2e93086 --- /dev/null +++ b/workflows/runninghub/image_flux.json @@ -0,0 +1,5 @@ +{ + "source": "runninghub", + "workflow_id": "1983427617984585729" +} + diff --git a/workflows/runninghub/tts_edge.json b/workflows/runninghub/tts_edge.json new file mode 100644 index 0000000..b2479b2 --- /dev/null +++ b/workflows/runninghub/tts_edge.json @@ -0,0 +1,5 @@ +{ + "source": "runninghub", + "workflow_id": "1983513964837543938" +} + diff --git a/workflows/selfhost/image_flux.json b/workflows/selfhost/image_flux.json new file mode 100644 index 0000000..4aec0b5 --- /dev/null +++ b/workflows/selfhost/image_flux.json @@ -0,0 +1,155 @@ +{ + "29": { + "inputs": { + "seed": 362283278588365, + "steps": 20, + "cfg": 1.5, + "sampler_name": "euler", + "scheduler": "simple", + "denoise": 1, + "model": [ + "30", + 0 + ], + "positive": [ + "35", + 0 + ], + "negative": [ + "33", + 0 + ], + "latent_image": [ + "43", + 0 + ] + }, + "class_type": "KSampler", + "_meta": { + "title": "KSampler" + } + }, + "30": { + "inputs": { + "ckpt_name": "flux1-dev-fp8.safetensors" + }, + "class_type": "CheckpointLoaderSimple", + "_meta": { + "title": "Load Checkpoint" + } + }, + "31": { + "inputs": { + "text": [ + "46", + 0 + ], + "clip": [ + "30", + 1 + ] + }, + "class_type": "CLIPTextEncode", + "_meta": { + "title": "CLIP Text Encode (Prompt)" + } + }, + "33": { + "inputs": { + "conditioning": [ + "31", + 0 + ] + }, + "class_type": "ConditioningZeroOut", + "_meta": { + "title": "ConditioningZeroOut" + } + }, + "35": { + "inputs": { + "guidance": 3.5, + "conditioning": [ + "31", + 0 + ] + }, + "class_type": "FluxGuidance", + "_meta": { + "title": "FluxGuidance" + } + }, + "36": { + "inputs": { + "filename_prefix": "ComfyUI", + "images": [ + "37", + 0 + ] + }, + "class_type": "SaveImage", + "_meta": { + "title": "Save Image" + } + }, + "37": { + "inputs": { + "samples": [ + "29", + 0 + ], + "vae": [ + "30", + 2 + ] + }, + "class_type": "VAEDecode", + "_meta": { + "title": "VAE Decode" + } + }, + "41": { + "inputs": { + "value": 512 + }, + "class_type": "easy int", + "_meta": { + "title": "$width.value" + } + }, + "42": { + "inputs": { + "value": 512 + }, + "class_type": "easy int", + "_meta": { + "title": "$height.value" + } + }, + "43": { + "inputs": { + "width": [ + "41", + 0 + ], + "height": [ + "42", + 0 + ], + "batch_size": 1 + }, + "class_type": "EmptyLatentImage", + "_meta": { + "title": "Empty Latent Image" + } + }, + "46": { + "inputs": { + "value": "a dog" + }, + "class_type": "PrimitiveStringMultiline", + "_meta": { + "title": "$prompt.value!" + } + } +} \ No newline at end of file diff --git a/workflows/selfhost/tts_edge.json b/workflows/selfhost/tts_edge.json new file mode 100644 index 0000000..092b07b --- /dev/null +++ b/workflows/selfhost/tts_edge.json @@ -0,0 +1,78 @@ +{ + "1": { + "inputs": { + "text": [ + "3", + 0 + ], + "voice": [ + "5", + 0 + ], + "speed": [ + "8", + 0 + ], + "pitch": 0 + }, + "class_type": "EdgeTTS", + "_meta": { + "title": "Edge TTS 🔊" + } + }, + "3": { + "inputs": { + "value": "床前明月光,疑是地上霜。" + }, + "class_type": "PrimitiveStringMultiline", + "_meta": { + "title": "$text.value!" + } + }, + "4": { + "inputs": { + "filename_prefix": "audio/ComfyUI", + "quality": "V0", + "audioUI": "", + "audio": [ + "1", + 0 + ] + }, + "class_type": "SaveAudioMP3", + "_meta": { + "title": "Save Audio (MP3)" + } + }, + "5": { + "inputs": { + "text": "[Chinese] zh-CN Yunjian", + "anything": [ + "7", + 0 + ] + }, + "class_type": "easy showAnything", + "_meta": { + "title": "Show Any" + } + }, + "7": { + "inputs": { + "value": "[Chinese] zh-CN Yunjian" + }, + "class_type": "PrimitiveStringMultiline", + "_meta": { + "title": "$voice.value" + } + }, + "8": { + "inputs": { + "value": 1 + }, + "class_type": "easy float", + "_meta": { + "title": "$speed.value" + } + } +} \ No newline at end of file