优化tts逻辑

This commit is contained in:
puke
2025-10-29 21:40:37 +08:00
parent 8c03bd1bcd
commit fb18adf318
16 changed files with 505 additions and 318 deletions

4
.gitignore vendored
View File

@@ -71,9 +71,7 @@ examples/
repositories/ repositories/
# Workflows - ignore user customizations but keep defaults # Workflows - ignore user customizations but keep defaults
workflows/*
!workflows/*_default.json
!workflows/README.md
# Templates - ignore user customizations but keep presets # Templates - ignore user customizations but keep presets
templates/* templates/*

View File

@@ -147,7 +147,7 @@ uv run streamlit run web/app.py
**ComfyUI 工作流** **ComfyUI 工作流**
- 选择图像生成的工作流文件 - 选择图像生成的工作流文件
- 默认使用 `image_default.json` - 默认使用 `image_flux.json`
- 如果懂 ComfyUI可以放自己的工作流到 `workflows/` 文件夹 - 如果懂 ComfyUI可以放自己的工作流到 `workflows/` 文件夹
**提示词前缀Prompt Prefix** **提示词前缀Prompt Prefix**

View File

@@ -17,22 +17,22 @@ llm:
# DeepSeek: base_url: "https://api.deepseek.com" model: "deepseek-chat" # DeepSeek: base_url: "https://api.deepseek.com" model: "deepseek-chat"
# Ollama (Local): base_url: "http://localhost:11434/v1" model: "llama3.2" # Ollama (Local): base_url: "http://localhost:11434/v1" model: "llama3.2"
# ==================== TTS Configuration ==================== # ==================== ComfyUI Configuration ====================
tts: comfyui:
default: edge # "edge" (free) or "tts_xxx.json" (ComfyUI workflow) # Global ComfyUI settings
comfyui_url: http://127.0.0.1:8188 # ComfyUI server URL (required for selfhost workflows)
runninghub_api_key: "" # RunningHub API key (required for runninghub workflows)
# ==================== Image Generation Configuration ==================== # TTS-specific configuration
image: tts:
default: selfhost/tts_edge.json # TTS workflow to use
# Image-specific configuration
image:
# Required: Default workflow to use (no fallback) # Required: Default workflow to use (no fallback)
# Options: runninghub/image_default.json (recommended, no local setup) # Options: runninghub/image_flux.json (recommended, no local setup)
# selfhost/image_default.json (requires local ComfyUI) # selfhost/image_flux.json (requires local ComfyUI)
default_workflow: runninghub/image_default.json default_workflow: runninghub/image_flux.json
# Local ComfyUI configuration (required if using selfhost workflows)
comfyui_url: http://127.0.0.1:8188
# RunningHub cloud configuration (required if using runninghub workflows)
runninghub_api_key: ""
# Image prompt prefix (optional) # Image prompt prefix (optional)
prompt_prefix: "Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines" prompt_prefix: "Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines"

View File

@@ -17,7 +17,7 @@ Usage:
if config_manager.validate(): if config_manager.validate():
print("Config is valid!") print("Config is valid!")
""" """
from .schema import ReelForgeConfig, LLMConfig, TTSConfig, ImageConfig from .schema import ReelForgeConfig, LLMConfig, ComfyUIConfig, TTSSubConfig, ImageSubConfig
from .manager import ConfigManager from .manager import ConfigManager
from .loader import load_config_dict, save_config_dict from .loader import load_config_dict, save_config_dict
@@ -27,8 +27,9 @@ config_manager = ConfigManager()
__all__ = [ __all__ = [
"ReelForgeConfig", "ReelForgeConfig",
"LLMConfig", "LLMConfig",
"TTSConfig", "ComfyUIConfig",
"ImageConfig", "TTSSubConfig",
"ImageSubConfig",
"ConfigManager", "ConfigManager",
"config_manager", "config_manager",
"load_config_dict", "load_config_dict",

View File

@@ -93,21 +93,26 @@ class ConfigManager:
} }
}) })
def get_image_config(self) -> dict: def get_comfyui_config(self) -> dict:
"""Get image configuration as dict""" """Get ComfyUI configuration as dict"""
return { return {
"default_workflow": self.config.image.default_workflow, "comfyui_url": self.config.comfyui.comfyui_url,
"comfyui_url": self.config.image.comfyui_url, "runninghub_api_key": self.config.comfyui.runninghub_api_key,
"runninghub_api_key": self.config.image.runninghub_api_key, "tts": {
"prompt_prefix": self.config.image.prompt_prefix, "default_workflow": self.config.comfyui.tts.default_workflow,
},
"image": {
"default_workflow": self.config.comfyui.image.default_workflow,
"prompt_prefix": self.config.comfyui.image.prompt_prefix,
}
} }
def set_image_config( def set_comfyui_config(
self, self,
comfyui_url: Optional[str] = None, comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None runninghub_api_key: Optional[str] = None
): ):
"""Set image configuration""" """Set ComfyUI global configuration"""
updates = {} updates = {}
if comfyui_url is not None: if comfyui_url is not None:
updates["comfyui_url"] = comfyui_url updates["comfyui_url"] = comfyui_url
@@ -115,5 +120,5 @@ class ConfigManager:
updates["runninghub_api_key"] = runninghub_api_key updates["runninghub_api_key"] = runninghub_api_key
if updates: if updates:
self.update({"image": updates}) self.update({"comfyui": updates})

View File

@@ -13,32 +13,37 @@ class LLMConfig(BaseModel):
model: str = Field(default="", description="LLM Model Name") model: str = Field(default="", description="LLM Model Name")
class TTSConfig(BaseModel): class TTSSubConfig(BaseModel):
"""TTS configuration""" """TTS-specific configuration (under comfyui.tts)"""
model_config = {"populate_by_name": True} # Allow both field name and alias model_config = {"populate_by_name": True} # Allow both field name and alias
default_workflow: str = Field(default="edge", description="Default TTS workflow", alias="default") default_workflow: str = Field(default=None, description="Default TTS workflow (required, no fallback)", alias="default")
class ImageConfig(BaseModel): class ImageSubConfig(BaseModel):
"""Image generation configuration""" """Image-specific configuration (under comfyui.image)"""
model_config = {"populate_by_name": True} # Allow both field name and alias model_config = {"populate_by_name": True} # Allow both field name and alias
default_workflow: str = Field(default=None, description="Default image workflow (required, no fallback)", alias="default") default_workflow: str = Field(default=None, description="Default image workflow (required, no fallback)", alias="default")
comfyui_url: str = Field(default="http://127.0.0.1:8188", description="ComfyUI Server URL")
runninghub_api_key: str = Field(default="", description="RunningHub API Key (optional)")
prompt_prefix: str = Field( prompt_prefix: str = Field(
default="Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines", default="Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines",
description="Prompt prefix for all image generation" description="Prompt prefix for all image generation"
) )
class ComfyUIConfig(BaseModel):
"""ComfyUI configuration (includes global settings and service-specific configs)"""
comfyui_url: str = Field(default="http://127.0.0.1:8188", description="ComfyUI Server URL")
runninghub_api_key: str = Field(default="", description="RunningHub API Key (optional)")
tts: TTSSubConfig = Field(default_factory=TTSSubConfig, description="TTS-specific configuration")
image: ImageSubConfig = Field(default_factory=ImageSubConfig, description="Image-specific configuration")
class ReelForgeConfig(BaseModel): class ReelForgeConfig(BaseModel):
"""ReelForge main configuration""" """ReelForge main configuration"""
project_name: str = Field(default="ReelForge", description="Project name") project_name: str = Field(default="ReelForge", description="Project name")
llm: LLMConfig = Field(default_factory=LLMConfig) llm: LLMConfig = Field(default_factory=LLMConfig)
tts: TTSConfig = Field(default_factory=TTSConfig) comfyui: ComfyUIConfig = Field(default_factory=ComfyUIConfig)
image: ImageConfig = Field(default_factory=ImageConfig)
def is_llm_configured(self) -> bool: def is_llm_configured(self) -> bool:
"""Check if LLM is properly configured""" """Check if LLM is properly configured"""

View File

@@ -19,7 +19,7 @@ class ComfyBaseService:
Subclasses should define: Subclasses should define:
- WORKFLOW_PREFIX: Prefix for workflow files (e.g., "image_", "tts_") - WORKFLOW_PREFIX: Prefix for workflow files (e.g., "image_", "tts_")
- DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_default.json") - DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_flux.json")
- WORKFLOWS_DIR: Directory containing workflows (default: "workflows") - WORKFLOWS_DIR: Directory containing workflows (default: "workflows")
""" """
@@ -35,7 +35,13 @@ class ComfyBaseService:
config: Full application config dict config: Full application config dict
service_name: Service name in config (e.g., "tts", "image") service_name: Service name in config (e.g., "tts", "image")
""" """
self.config = config.get(service_name, {}) # Service-specific config (e.g., config["comfyui"]["tts"])
comfyui_config = config.get("comfyui", {})
self.config = comfyui_config.get(service_name, {})
# Global ComfyUI config (for comfyui_url and runninghub_api_key)
self.global_config = comfyui_config
self.service_name = service_name self.service_name = service_name
self._workflows_cache: Optional[List[str]] = None self._workflows_cache: Optional[List[str]] = None
@@ -47,18 +53,18 @@ class ComfyBaseService:
List of workflow info dicts List of workflow info dicts
Example: [ Example: [
{ {
"name": "image_default.json", "name": "image_flux.json",
"display_name": "image_default.json - Selfhost", "display_name": "image_flux.json - Selfhost",
"source": "selfhost", "source": "selfhost",
"path": "workflows/selfhost/image_default.json", "path": "workflows/selfhost/image_flux.json",
"key": "selfhost/image_default.json" "key": "selfhost/image_flux.json"
}, },
{ {
"name": "image_default.json", "name": "image_flux.json",
"display_name": "image_default.json - Runninghub", "display_name": "image_flux.json - Runninghub",
"source": "runninghub", "source": "runninghub",
"path": "workflows/runninghub/image_default.json", "path": "workflows/runninghub/image_flux.json",
"key": "runninghub/image_default.json", "key": "runninghub/image_flux.json",
"workflow_id": "123456" "workflow_id": "123456"
} }
] ]
@@ -101,11 +107,11 @@ class ComfyBaseService:
Returns: Returns:
Workflow info dict with structure: Workflow info dict with structure:
{ {
"name": "image_default.json", "name": "image_flux.json",
"display_name": "image_default.json - Runninghub", "display_name": "image_flux.json - Runninghub",
"source": "runninghub", "source": "runninghub",
"path": "workflows/runninghub/image_default.json", "path": "workflows/runninghub/image_flux.json",
"key": "runninghub/image_default.json", "key": "runninghub/image_flux.json",
"workflow_id": "123456" # Only for RunningHub "workflow_id": "123456" # Only for RunningHub
} }
""" """
@@ -134,7 +140,7 @@ class ComfyBaseService:
Get default workflow from config (required, no fallback) Get default workflow from config (required, no fallback)
Returns: Returns:
Default workflow key (e.g., "runninghub/image_default.json") Default workflow key (e.g., "runninghub/image_flux.json")
Raises: Raises:
ValueError: If default_workflow not configured ValueError: If default_workflow not configured
@@ -155,17 +161,17 @@ class ComfyBaseService:
Resolve workflow key to workflow info Resolve workflow key to workflow info
Args: Args:
workflow: Workflow key (e.g., "runninghub/image_default.json") workflow: Workflow key (e.g., "runninghub/image_flux.json")
If None, uses default from config If None, uses default from config
Returns: Returns:
Workflow info dict with structure: Workflow info dict with structure:
{ {
"name": "image_default.json", "name": "image_flux.json",
"display_name": "image_default.json - Runninghub", "display_name": "image_flux.json - Runninghub",
"source": "runninghub", "source": "runninghub",
"path": "workflows/runninghub/image_default.json", "path": "workflows/runninghub/image_flux.json",
"key": "runninghub/image_default.json", "key": "runninghub/image_flux.json",
"workflow_id": "123456" # Only for RunningHub "workflow_id": "123456" # Only for RunningHub
} }
@@ -210,19 +216,19 @@ class ComfyBaseService:
""" """
kit_config = {} kit_config = {}
# ComfyUI URL (priority: param > config > env > default) # ComfyUI URL (priority: param > global config > env > default)
final_comfyui_url = ( final_comfyui_url = (
comfyui_url comfyui_url
or self.config.get("comfyui_url") or self.global_config.get("comfyui_url")
or os.getenv("COMFYUI_BASE_URL") or os.getenv("COMFYUI_BASE_URL")
or "http://127.0.0.1:8188" or "http://127.0.0.1:8188"
) )
kit_config["comfyui_url"] = final_comfyui_url kit_config["comfyui_url"] = final_comfyui_url
# RunningHub API key (priority: param > config > env) # RunningHub API key (priority: param > global config > env)
final_rh_key = ( final_rh_key = (
runninghub_api_key runninghub_api_key
or self.config.get("runninghub_api_key") or self.global_config.get("runninghub_api_key")
or os.getenv("RUNNINGHUB_API_KEY") or os.getenv("RUNNINGHUB_API_KEY")
) )
if final_rh_key: if final_rh_key:
@@ -242,11 +248,11 @@ class ComfyBaseService:
workflows = service.list_workflows() workflows = service.list_workflows()
# [ # [
# { # {
# "name": "image_default.json", # "name": "image_flux.json",
# "display_name": "image_default.json - Runninghub", # "display_name": "image_flux.json - Runninghub",
# "source": "runninghub", # "source": "runninghub",
# "path": "workflows/runninghub/image_default.json", # "path": "workflows/runninghub/image_flux.json",
# "key": "runninghub/image_default.json", # "key": "runninghub/image_flux.json",
# "workflow_id": "123456" # "workflow_id": "123456"
# }, # },
# ... # ...
@@ -260,7 +266,7 @@ class ComfyBaseService:
List available workflow keys List available workflow keys
Returns: Returns:
List of available workflow keys (e.g., ["runninghub/image_default.json", ...]) List of available workflow keys (e.g., ["runninghub/image_flux.json", ...])
Example: Example:
print(f"Available workflows: {service.available}") print(f"Available workflows: {service.available}")

View File

@@ -17,7 +17,7 @@ class ImageService(ComfyBaseService):
Uses ComfyKit to execute image generation workflows. Uses ComfyKit to execute image generation workflows.
Usage: Usage:
# Use default workflow (workflows/image_default.json) # Use default workflow (workflows/image_flux.json)
image_url = await reelforge.image(prompt="a cat") image_url = await reelforge.image(prompt="a cat")
# Use specific workflow # Use specific workflow
@@ -65,7 +65,7 @@ class ImageService(ComfyBaseService):
Args: Args:
prompt: Image generation prompt prompt: Image generation prompt
workflow: Workflow filename (default: from config or "image_default.json") workflow: Workflow filename (default: from config or "image_flux.json")
comfyui_url: ComfyUI URL (optional, overrides config) comfyui_url: ComfyUI URL (optional, overrides config)
runninghub_api_key: RunningHub API key (optional, overrides config) runninghub_api_key: RunningHub API key (optional, overrides config)
width: Image width width: Image width
@@ -81,7 +81,7 @@ class ImageService(ComfyBaseService):
Generated image URL/path Generated image URL/path
Examples: Examples:
# Simplest: use default workflow (workflows/image_default.json) # Simplest: use default workflow (workflows/image_flux.json)
image_url = await reelforge.image(prompt="a beautiful cat") image_url = await reelforge.image(prompt="a beautiful cat")
# Use specific workflow # Use specific workflow

View File

@@ -1,39 +1,29 @@
""" """
TTS (Text-to-Speech) Service - Dual implementation (Edge TTS + ComfyUI) TTS (Text-to-Speech) Service - ComfyUI Workflow-based implementation
""" """
import uuid
from typing import Optional from typing import Optional
from comfykit import ComfyKit from comfykit import ComfyKit
from loguru import logger from loguru import logger
from reelforge.services.comfy_base_service import ComfyBaseService from reelforge.services.comfy_base_service import ComfyBaseService
from reelforge.utils.os_util import get_temp_path
class TTSService(ComfyBaseService): class TTSService(ComfyBaseService):
""" """
TTS (Text-to-Speech) service - Dual implementation TTS (Text-to-Speech) service - Workflow-based
Supports two TTS methods: Uses ComfyKit to execute TTS workflows.
1. Edge TTS (default) - Free, local SDK, no workflow needed
2. ComfyUI Workflow - Workflow-based, requires ComfyUI setup
Usage: Usage:
# Use default (edge-tts) # Use default workflow
audio_path = await reelforge.tts(text="Hello, world!") audio_path = await reelforge.tts(text="Hello, world!")
# Explicitly use edge-tts # Use specific workflow
audio_path = await reelforge.tts( audio_path = await reelforge.tts(
text="你好,世界!", text="你好,世界!",
workflow="edge" workflow="tts_edge.json"
)
# Use ComfyUI workflow
audio_path = await reelforge.tts(
text="Hello",
workflow="tts_comfyui.json"
) )
# List available workflows # List available workflows
@@ -41,12 +31,9 @@ class TTSService(ComfyBaseService):
""" """
WORKFLOW_PREFIX = "tts_" WORKFLOW_PREFIX = "tts_"
DEFAULT_WORKFLOW = "edge" # Default to edge-tts DEFAULT_WORKFLOW = None # No hardcoded default, must be configured
WORKFLOWS_DIR = "workflows" WORKFLOWS_DIR = "workflows"
# Built-in providers (not workflow files)
BUILTIN_PROVIDERS = ["edge", "edge-tts"]
def __init__(self, config: dict): def __init__(self, config: dict):
""" """
Initialize TTS service Initialize TTS service
@@ -56,81 +43,53 @@ class TTSService(ComfyBaseService):
""" """
super().__init__(config, service_name="tts") super().__init__(config, service_name="tts")
def _resolve_workflow(self, workflow: Optional[str] = None) -> str:
"""
Resolve workflow to actual workflow path or provider name
Args:
workflow: Workflow filename or provider name (e.g., "edge", "tts_default.json")
Returns:
Workflow file path or provider name
"""
# 1. If not specified, use default
if workflow is None:
workflow = self._get_default_workflow()
# 2. If it's a built-in provider, return as-is
if workflow in self.BUILTIN_PROVIDERS:
logger.debug(f"Using built-in TTS provider: {workflow}")
return workflow
# 3. Otherwise, treat as workflow file (use parent logic)
return super()._resolve_workflow(workflow)
async def __call__( async def __call__(
self, self,
text: str, text: str,
workflow: Optional[str] = None, workflow: Optional[str] = None,
# ComfyUI connection (optional overrides, only for workflow mode) # ComfyUI connection (optional overrides)
comfyui_url: Optional[str] = None, comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None, runninghub_api_key: Optional[str] = None,
# Common TTS parameters (work for both edge-tts and workflows) # TTS parameters
voice: Optional[str] = None, voice: Optional[str] = None,
rate: Optional[str] = None, speed: float = 1.0,
volume: Optional[str] = None,
pitch: Optional[str] = None,
# Output path # Output path
output_path: Optional[str] = None, output_path: Optional[str] = None,
**params **params
) -> str: ) -> str:
""" """
Generate speech using edge-tts or ComfyUI workflow Generate speech using ComfyUI workflow
Args: Args:
text: Text to convert to speech text: Text to convert to speech
workflow: Workflow filename or provider name (default: "edge") workflow: Workflow filename (default: from config)
- "edge" or "edge-tts": Use local edge-tts SDK comfyui_url: ComfyUI URL (optional, overrides config)
- "tts_xxx.json": Use ComfyUI workflow runninghub_api_key: RunningHub API key (optional, overrides config)
- Absolute path/URL/RunningHub ID: Also supported voice: Voice ID (workflow-specific)
comfyui_url: ComfyUI URL (only for workflow mode) speed: Speech speed multiplier (1.0 = normal, >1.0 = faster, <1.0 = slower)
runninghub_api_key: RunningHub API key (only for workflow mode)
voice: Voice ID
rate: Speech rate (e.g., "+0%", "+50%", "-20%")
volume: Speech volume (e.g., "+0%")
pitch: Speech pitch (e.g., "+0Hz")
output_path: Custom output path (auto-generated if None) output_path: Custom output path (auto-generated if None)
**params: Additional parameters **params: Additional workflow parameters
Returns: Returns:
Generated audio file path Generated audio file path
Examples: Examples:
# Simplest: use default (edge-tts) # Simplest: use default workflow
audio_path = await reelforge.tts(text="Hello, world!") audio_path = await reelforge.tts(text="Hello, world!")
# Explicitly use edge-tts with parameters # Use specific workflow
audio_path = await reelforge.tts( audio_path = await reelforge.tts(
text="你好,世界!", text="你好,世界!",
workflow="edge", workflow="tts_edge.json"
voice="zh-CN-XiaoxiaoNeural",
rate="+20%"
) )
# Use ComfyUI workflow # With voice and speed
audio_path = await reelforge.tts( audio_path = await reelforge.tts(
text="Hello", text="Hello",
workflow="tts_default.json" workflow="tts_edge.json",
voice="zh-CN-XiaoxiaoNeural",
speed=1.2
) )
# With absolute path # With absolute path
@@ -138,92 +97,28 @@ class TTSService(ComfyBaseService):
text="Hello", text="Hello",
workflow="/path/to/custom_tts.json" workflow="/path/to/custom_tts.json"
) )
"""
# 1. Check if it's a builtin provider (edge-tts)
if workflow in self.BUILTIN_PROVIDERS or workflow is None and self._get_default_workflow() in self.BUILTIN_PROVIDERS:
# Use edge-tts
return await self._call_edge_tts(
text=text,
voice=voice,
rate=rate,
volume=volume,
pitch=pitch,
output_path=output_path,
**params
)
# 2. Use ComfyUI workflow - resolve to structured info # With custom ComfyUI server
audio_path = await reelforge.tts(
text="Hello",
comfyui_url="http://192.168.1.100:8188"
)
"""
# 1. Resolve workflow (returns structured info)
workflow_info = self._resolve_workflow(workflow=workflow) workflow_info = self._resolve_workflow(workflow=workflow)
# 2. Execute ComfyUI workflow
return await self._call_comfyui_workflow( return await self._call_comfyui_workflow(
workflow_info=workflow_info, workflow_info=workflow_info,
text=text, text=text,
comfyui_url=comfyui_url, comfyui_url=comfyui_url,
runninghub_api_key=runninghub_api_key, runninghub_api_key=runninghub_api_key,
voice=voice, voice=voice,
rate=rate, speed=speed,
volume=volume,
pitch=pitch,
output_path=output_path, output_path=output_path,
**params **params
) )
async def _call_edge_tts(
self,
text: str,
voice: Optional[str] = None,
rate: Optional[str] = None,
volume: Optional[str] = None,
pitch: Optional[str] = None,
output_path: Optional[str] = None,
**params
) -> str:
"""
Generate speech using edge-tts SDK
Args:
text: Text to convert to speech
voice: Voice ID (default: zh-CN-YunjianNeural)
rate: Speech rate (default: +0%)
volume: Speech volume (default: +0%)
pitch: Speech pitch (default: +0Hz)
output_path: Custom output path (auto-generated if None)
**params: Additional parameters (e.g., retry_count, retry_delay)
Returns:
Generated audio file path
"""
from reelforge.utils.tts_util import edge_tts
logger.info(f"🎙️ Using edge-tts (local SDK)")
# Generate output path (use provided path or auto-generate)
if output_path is None:
output_path = get_temp_path(f"{uuid.uuid4().hex}.mp3")
else:
# Ensure parent directory exists
import os
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# Call edge-tts with output_path to save directly
try:
audio_bytes = await edge_tts(
text=text,
voice=voice or "zh-CN-YunjianNeural",
rate=rate or "+0%",
volume=volume or "+0%",
pitch=pitch or "+0Hz",
output_path=output_path,
**params
)
logger.info(f"✅ Generated audio (edge-tts): {output_path}")
return output_path
except Exception as e:
logger.error(f"Edge TTS generation error: {e}")
raise
async def _call_comfyui_workflow( async def _call_comfyui_workflow(
self, self,
workflow_info: dict, workflow_info: dict,
@@ -231,9 +126,7 @@ class TTSService(ComfyBaseService):
comfyui_url: Optional[str] = None, comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None, runninghub_api_key: Optional[str] = None,
voice: Optional[str] = None, voice: Optional[str] = None,
rate: Optional[str] = None, speed: float = 1.0,
volume: Optional[str] = None,
pitch: Optional[str] = None,
output_path: Optional[str] = None, output_path: Optional[str] = None,
**params **params
) -> str: ) -> str:
@@ -246,9 +139,7 @@ class TTSService(ComfyBaseService):
comfyui_url: ComfyUI URL comfyui_url: ComfyUI URL
runninghub_api_key: RunningHub API key runninghub_api_key: RunningHub API key
voice: Voice ID (workflow-specific) voice: Voice ID (workflow-specific)
rate: Speech rate (workflow-specific) speed: Speech speed multiplier (workflow-specific)
volume: Speech volume (workflow-specific)
pitch: Speech pitch (workflow-specific)
output_path: Custom output path (downloads if URL returned) output_path: Custom output path (downloads if URL returned)
**params: Additional workflow parameters **params: Additional workflow parameters
@@ -269,12 +160,8 @@ class TTSService(ComfyBaseService):
# Add optional TTS parameters # Add optional TTS parameters
if voice is not None: if voice is not None:
workflow_params["voice"] = voice workflow_params["voice"] = voice
if rate is not None: if speed != 1.0:
workflow_params["rate"] = rate workflow_params["speed"] = speed
if volume is not None:
workflow_params["volume"] = volume
if pitch is not None:
workflow_params["pitch"] = pitch
# Add any additional parameters # Add any additional parameters
workflow_params.update(params) workflow_params.update(params)

View File

@@ -100,11 +100,11 @@ def render_advanced_settings():
# Expand if not configured, collapse if configured # Expand if not configured, collapse if configured
with st.expander(tr("settings.title"), expanded=not is_configured): with st.expander(tr("settings.title"), expanded=not is_configured):
# 2-column layout: LLM | Image # 2-column layout: LLM | ComfyUI
llm_col, image_col = st.columns(2) llm_col, comfyui_col = st.columns(2)
# ==================================================================== # ====================================================================
# Column 1: LLM Settings (Simplified 3-field format) # Column 1: LLM Settings
# ==================================================================== # ====================================================================
with llm_col: with llm_col:
with st.container(border=True): with st.container(border=True):
@@ -195,21 +195,21 @@ def render_advanced_settings():
) )
# ==================================================================== # ====================================================================
# Column 2: Image Settings # Column 2: ComfyUI Settings
# ==================================================================== # ====================================================================
with image_col: with comfyui_col:
with st.container(border=True): with st.container(border=True):
st.markdown(f"**{tr('settings.image.title')}**") st.markdown(f"**{tr('settings.comfyui.title')}**")
# Get current configuration # Get current configuration
image_config = config_manager.get_image_config() comfyui_config = config_manager.get_comfyui_config()
# Local/Self-hosted ComfyUI configuration # Local/Self-hosted ComfyUI configuration
st.markdown(f"**{tr('settings.image.local_title')}**") st.markdown(f"**{tr('settings.comfyui.local_title')}**")
comfyui_url = st.text_input( comfyui_url = st.text_input(
tr("settings.image.comfyui_url"), tr("settings.comfyui.comfyui_url"),
value=image_config.get("comfyui_url", "http://127.0.0.1:8188"), value=comfyui_config.get("comfyui_url", "http://127.0.0.1:8188"),
help=tr("settings.image.comfyui_url_help"), help=tr("settings.comfyui.comfyui_url_help"),
key="comfyui_url_input" key="comfyui_url_input"
) )
@@ -228,12 +228,12 @@ def render_advanced_settings():
st.markdown("---") st.markdown("---")
# RunningHub cloud configuration # RunningHub cloud configuration
st.markdown(f"**{tr('settings.image.cloud_title')}**") st.markdown(f"**{tr('settings.comfyui.cloud_title')}**")
runninghub_api_key = st.text_input( runninghub_api_key = st.text_input(
tr("settings.image.runninghub_api_key"), tr("settings.comfyui.runninghub_api_key"),
value=image_config.get("runninghub_api_key", ""), value=comfyui_config.get("runninghub_api_key", ""),
type="password", type="password",
help=tr("settings.image.runninghub_api_key_help"), help=tr("settings.comfyui.runninghub_api_key_help"),
key="runninghub_api_key_input" key="runninghub_api_key_input"
) )
@@ -250,8 +250,8 @@ def render_advanced_settings():
if llm_api_key and llm_base_url and llm_model: if llm_api_key and llm_base_url and llm_model:
config_manager.set_llm_config(llm_api_key, llm_base_url, llm_model) config_manager.set_llm_config(llm_api_key, llm_base_url, llm_model)
# Save Image configuration # Save ComfyUI configuration
config_manager.set_image_config( config_manager.set_comfyui_config(
comfyui_url=comfyui_url if comfyui_url else None, comfyui_url=comfyui_url if comfyui_url else None,
runninghub_api_key=runninghub_api_key if runninghub_api_key else None runninghub_api_key=runninghub_api_key if runninghub_api_key else None
) )
@@ -380,53 +380,12 @@ def main():
st.info(tr("video.frames_fixed_mode_hint")) st.info(tr("video.frames_fixed_mode_hint"))
# ==================================================================== # ====================================================================
# Audio Settings (Voice + BGM) # Audio Settings (BGM + TTS)
# ==================================================================== # ====================================================================
with st.container(border=True): with st.container(border=True):
st.markdown(f"**{tr('section.audio_settings')}**") st.markdown(f"**{tr('section.audio_settings')}**")
# Voice selection # Background music (moved to top)
st.markdown(f"**{tr('voice.title')}**")
voice_id = st.selectbox(
"Voice",
[
"zh-CN-YunjianNeural", # 男声-专业
"zh-CN-YunxiNeural", # 男声-年轻
"zh-CN-XiaoxiaoNeural", # 女声-温柔
"zh-CN-XiaoyiNeural", # 女声-活力
],
format_func=lambda x: {
"zh-CN-YunjianNeural": tr("voice.male_professional"),
"zh-CN-YunxiNeural": tr("voice.male_young"),
"zh-CN-XiaoxiaoNeural": tr("voice.female_gentle"),
"zh-CN-XiaoyiNeural": tr("voice.female_energetic"),
}[x],
label_visibility="collapsed"
)
# Voice preview button
if st.button(tr("voice.preview"), key="preview_voice", use_container_width=True):
with st.spinner(tr("voice.previewing")):
try:
# Generate preview audio
preview_text = "大家好,这是一段测试语音。"
# Use TTS service to generate audio (auto temp path)
audio_path = run_async(reelforge.tts(
text=preview_text,
voice=voice_id
))
# Play the audio
if os.path.exists(audio_path):
st.audio(audio_path, format="audio/mp3")
else:
st.error("Failed to generate preview audio")
except Exception as e:
st.error(tr("voice.preview_failed", error=str(e)))
logger.exception(e)
# Background music
st.markdown(f"**{tr('bgm.title')}**") st.markdown(f"**{tr('bgm.title')}**")
st.caption(tr("bgm.custom_help")) st.caption(tr("bgm.custom_help"))
@@ -466,6 +425,78 @@ def main():
# Use full filename for bgm_path (including extension) # Use full filename for bgm_path (including extension)
bgm_path = None if bgm_choice == tr("bgm.none") else bgm_choice bgm_path = None if bgm_choice == tr("bgm.none") else bgm_choice
# TTS Workflow selection
st.markdown(f"**{tr('tts.title')}**")
st.caption(tr("tts.workflow_help"))
# Get available TTS workflows
tts_workflows = reelforge.tts.list_workflows()
# Build options for selectbox
tts_workflow_options = [wf["display_name"] for wf in tts_workflows]
tts_workflow_keys = [wf["key"] for wf in tts_workflows]
# Default to saved workflow if exists
default_tts_index = 0
comfyui_config = config_manager.get_comfyui_config()
saved_tts_workflow = comfyui_config["tts"]["default_workflow"]
if saved_tts_workflow and saved_tts_workflow in tts_workflow_keys:
default_tts_index = tts_workflow_keys.index(saved_tts_workflow)
tts_workflow_display = st.selectbox(
"TTS Workflow",
tts_workflow_options if tts_workflow_options else ["No TTS workflows found"],
index=default_tts_index,
label_visibility="collapsed",
key="tts_workflow_select"
)
# Get the actual workflow key
if tts_workflow_options:
tts_selected_index = tts_workflow_options.index(tts_workflow_display)
tts_workflow_key = tts_workflow_keys[tts_selected_index]
else:
tts_workflow_key = "selfhost/tts_edge.json" # fallback
# TTS preview expander (similar to image preview)
with st.expander(tr("tts.preview_title"), expanded=False):
# Preview text input
preview_text = st.text_input(
tr("tts.preview_text"),
value="大家好,这是一段测试语音。",
placeholder=tr("tts.preview_text_placeholder"),
key="tts_preview_text"
)
# Preview button
if st.button(tr("tts.preview_button"), key="preview_tts", use_container_width=True):
with st.spinner(tr("tts.previewing")):
try:
# Generate preview audio using selected workflow
audio_path = run_async(reelforge.tts(
text=preview_text,
workflow=tts_workflow_key
))
# Play the audio
if audio_path:
st.success(tr("tts.preview_success"))
if os.path.exists(audio_path):
st.audio(audio_path, format="audio/mp3")
elif audio_path.startswith('http'):
st.audio(audio_path)
else:
st.error("Failed to generate preview audio")
# Show file path
st.caption(f"📁 {audio_path}")
else:
st.error("Failed to generate preview audio")
except Exception as e:
st.error(tr("tts.preview_failed", error=str(e)))
logger.exception(e)
# ======================================================================== # ========================================================================
# Middle Column: Visual Settings (Style & Template) # Middle Column: Visual Settings (Style & Template)
# ======================================================================== # ========================================================================
@@ -484,8 +515,8 @@ def main():
workflows = reelforge.image.list_workflows() workflows = reelforge.image.list_workflows()
# Build options for selectbox # Build options for selectbox
# Display: "image_default.json - Runninghub" # Display: "image_flux.json - Runninghub"
# Value: "runninghub/image_default.json" # Value: "runninghub/image_flux.json"
workflow_options = [wf["display_name"] for wf in workflows] workflow_options = [wf["display_name"] for wf in workflows]
workflow_keys = [wf["key"] for wf in workflows] workflow_keys = [wf["key"] for wf in workflows]
@@ -493,8 +524,8 @@ def main():
default_workflow_index = 0 default_workflow_index = 0
# If user has a saved preference in config, try to match it # If user has a saved preference in config, try to match it
image_config = config_manager.get_image_config() comfyui_config = config_manager.get_comfyui_config()
saved_workflow = image_config.get("default_workflow") saved_workflow = comfyui_config["image"]["default_workflow"]
if saved_workflow and saved_workflow in workflow_keys: if saved_workflow and saved_workflow in workflow_keys:
default_workflow_index = workflow_keys.index(saved_workflow) default_workflow_index = workflow_keys.index(saved_workflow)
@@ -506,20 +537,19 @@ def main():
key="image_workflow_select" key="image_workflow_select"
) )
# Get the actual workflow key (e.g., "runninghub/image_default.json") # Get the actual workflow key (e.g., "runninghub/image_flux.json")
if workflow_options: if workflow_options:
workflow_selected_index = workflow_options.index(workflow_display) workflow_selected_index = workflow_options.index(workflow_display)
workflow_key = workflow_keys[workflow_selected_index] workflow_key = workflow_keys[workflow_selected_index]
else: else:
workflow_key = "runninghub/image_default.json" # fallback workflow_key = "runninghub/image_flux.json" # fallback
# 2. Prompt prefix input # 2. Prompt prefix input
st.caption(tr("style.prompt_prefix")) st.caption(tr("style.prompt_prefix"))
# Get current prompt_prefix from config # Get current prompt_prefix from config
image_config = config_manager.get_image_config() current_prefix = comfyui_config["image"]["prompt_prefix"]
current_prefix = image_config.get("prompt_prefix", "")
# Prompt prefix input (temporary, not saved to config) # Prompt prefix input (temporary, not saved to config)
prompt_prefix = st.text_area( prompt_prefix = st.text_area(
@@ -757,8 +787,8 @@ def main():
mode=mode, mode=mode,
title=title if title else None, title=title if title else None,
n_scenes=n_scenes, n_scenes=n_scenes,
voice_id=voice_id, tts_workflow=tts_workflow_key, # Pass TTS workflow key
image_workflow=workflow_key, # Pass workflow key (e.g., "runninghub/image_default.json") image_workflow=workflow_key, # Pass workflow key (e.g., "runninghub/image_flux.json")
frame_template=frame_template, frame_template=frame_template,
prompt_prefix=prompt_prefix, # Pass prompt_prefix prompt_prefix=prompt_prefix, # Pass prompt_prefix
bgm_path=bgm_path, bgm_path=bgm_path,

View File

@@ -58,7 +58,7 @@
"style.title": "🎨 Image Settings", "style.title": "🎨 Image Settings",
"style.workflow": "ComfyUI Workflow", "style.workflow": "ComfyUI Workflow",
"style.workflow_help": "💡 Custom: Place image_xxx.json in workflows/ folder", "style.workflow_help": "💡 Custom: Place image_xxx.json in workflows/selfhost/ or workflows/runninghub/ folder",
"style.prompt_prefix": "Style Prompt Prefix", "style.prompt_prefix": "Style Prompt Prefix",
"style.prompt_prefix_placeholder": "Enter style prefix (leave empty for config default)", "style.prompt_prefix_placeholder": "Enter style prefix (leave empty for config default)",
"style.prompt_prefix_help": "This text will be automatically added before all image generation prompts. To permanently change, edit config.yaml", "style.prompt_prefix_help": "This text will be automatically added before all image generation prompts. To permanently change, edit config.yaml",
@@ -163,18 +163,24 @@
"settings.llm.model": "Model", "settings.llm.model": "Model",
"settings.llm.model_help": "Model name", "settings.llm.model_help": "Model name",
"settings.tts.title": "🎤 Text-to-Speech", "settings.comfyui.title": "🔧 ComfyUI Configuration",
"settings.tts.provider": "Provider", "settings.comfyui.local_title": "Local/Self-hosted ComfyUI",
"settings.tts.provider_help": "Select TTS service provider", "settings.comfyui.cloud_title": "RunningHub Cloud",
"settings.tts.edge_info": "💡 Edge TTS is free and requires no configuration", "settings.comfyui.comfyui_url": "ComfyUI Server URL",
"settings.comfyui.comfyui_url_help": "Local or remote ComfyUI server address",
"settings.comfyui.runninghub_api_key": "RunningHub API Key",
"settings.comfyui.runninghub_api_key_help": "Visit https://runninghub.ai to register and get API Key",
"settings.image.title": "🎨 Image Generation", "tts.title": "🎤 TTS Workflow",
"settings.image.local_title": "Local/Self-hosted ComfyUI", "tts.workflow": "TTS Workflow",
"settings.image.cloud_title": "RunningHub Cloud", "tts.workflow_help": "💡 Custom: Place tts_xxx.json in workflows/selfhost/ or workflows/runninghub/ folder",
"settings.image.comfyui_url": "ComfyUI Service URL", "tts.preview_title": "🔍 Preview TTS",
"settings.image.comfyui_url_help": "Local or remote ComfyUI service URL, default: http://127.0.0.1:8188", "tts.preview_text": "Preview Text",
"settings.image.runninghub_api_key": "RunningHub API Key", "tts.preview_text_placeholder": "Enter text to preview...",
"settings.image.runninghub_api_key_help": "Visit https://runninghub.ai to register and get API Key", "tts.preview_button": "🔊 Generate Preview",
"tts.previewing": "Generating TTS preview...",
"tts.preview_success": "✅ Preview generated successfully!",
"tts.preview_failed": "❌ Preview failed: {error}",
"settings.book.title": "📚 Book Information", "settings.book.title": "📚 Book Information",
"settings.book.provider": "Provider", "settings.book.provider": "Provider",

View File

@@ -58,7 +58,7 @@
"style.title": "🎨 插图设置", "style.title": "🎨 插图设置",
"style.workflow": "生图工作流", "style.workflow": "生图工作流",
"style.workflow_help": "💡 自定义:将 image_xxx.json 放入 workflows/ 文件夹", "style.workflow_help": "💡 自定义:将 image_xxx.json 放入 workflows/selfhost/ 或 workflows/runninghub/ 文件夹",
"style.prompt_prefix": "风格提示词前缀", "style.prompt_prefix": "风格提示词前缀",
"style.prompt_prefix_placeholder": "输入风格前缀(留空则使用配置文件默认值)", "style.prompt_prefix_placeholder": "输入风格前缀(留空则使用配置文件默认值)",
"style.prompt_prefix_help": "此文本将自动添加到所有图像生成提示词之前。要永久修改,请编辑 config.yaml", "style.prompt_prefix_help": "此文本将自动添加到所有图像生成提示词之前。要永久修改,请编辑 config.yaml",
@@ -163,18 +163,24 @@
"settings.llm.model": "Model", "settings.llm.model": "Model",
"settings.llm.model_help": "模型名称", "settings.llm.model_help": "模型名称",
"settings.tts.title": "🎤 语音合成", "settings.comfyui.title": "🔧 ComfyUI 配置",
"settings.tts.provider": "服务商", "settings.comfyui.local_title": "本地/自建 ComfyUI",
"settings.tts.provider_help": "选择 TTS 服务提供商", "settings.comfyui.cloud_title": "RunningHub 云端",
"settings.tts.edge_info": "💡 Edge TTS 是免费的,无需配置", "settings.comfyui.comfyui_url": "ComfyUI 服务器地址",
"settings.comfyui.comfyui_url_help": "本地或远程 ComfyUI 服务器地址",
"settings.comfyui.runninghub_api_key": "RunningHub API 密钥",
"settings.comfyui.runninghub_api_key_help": "访问 https://runninghub.ai 注册并获取 API Key",
"settings.image.title": "🎨 图像生成", "tts.title": "🎤 TTS 工作流",
"settings.image.local_title": "本地/自建 ComfyUI", "tts.workflow": "TTS 工作流",
"settings.image.cloud_title": "RunningHub 云端", "tts.workflow_help": "💡 自定义:将 tts_xxx.json 放入 workflows/selfhost/ 或 workflows/runninghub/ 文件夹",
"settings.image.comfyui_url": "ComfyUI 服务地址", "tts.preview_title": "🔍 预览 TTS",
"settings.image.comfyui_url_help": "本地或远程 ComfyUI 服务地址,默认: http://127.0.0.1:8188", "tts.preview_text": "预览文本",
"settings.image.runninghub_api_key": "RunningHub API Key", "tts.preview_text_placeholder": "输入要试听的文本...",
"settings.image.runninghub_api_key_help": "访问 https://runninghub.ai 注册并获取 API Key", "tts.preview_button": "🔊 生成预览",
"tts.previewing": "正在生成 TTS 预览...",
"tts.preview_success": "✅ 预览生成成功!",
"tts.preview_failed": "❌ 预览失败:{error}",
"settings.book.title": "📚 书籍信息", "settings.book.title": "📚 书籍信息",
"settings.book.provider": "服务商", "settings.book.provider": "服务商",

View File

@@ -0,0 +1,5 @@
{
"source": "runninghub",
"workflow_id": "1983427617984585729"
}

View File

@@ -0,0 +1,5 @@
{
"source": "runninghub",
"workflow_id": "1983513964837543938"
}

View File

@@ -0,0 +1,155 @@
{
"29": {
"inputs": {
"seed": 362283278588365,
"steps": 20,
"cfg": 1.5,
"sampler_name": "euler",
"scheduler": "simple",
"denoise": 1,
"model": [
"30",
0
],
"positive": [
"35",
0
],
"negative": [
"33",
0
],
"latent_image": [
"43",
0
]
},
"class_type": "KSampler",
"_meta": {
"title": "KSampler"
}
},
"30": {
"inputs": {
"ckpt_name": "flux1-dev-fp8.safetensors"
},
"class_type": "CheckpointLoaderSimple",
"_meta": {
"title": "Load Checkpoint"
}
},
"31": {
"inputs": {
"text": [
"46",
0
],
"clip": [
"30",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"33": {
"inputs": {
"conditioning": [
"31",
0
]
},
"class_type": "ConditioningZeroOut",
"_meta": {
"title": "ConditioningZeroOut"
}
},
"35": {
"inputs": {
"guidance": 3.5,
"conditioning": [
"31",
0
]
},
"class_type": "FluxGuidance",
"_meta": {
"title": "FluxGuidance"
}
},
"36": {
"inputs": {
"filename_prefix": "ComfyUI",
"images": [
"37",
0
]
},
"class_type": "SaveImage",
"_meta": {
"title": "Save Image"
}
},
"37": {
"inputs": {
"samples": [
"29",
0
],
"vae": [
"30",
2
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"41": {
"inputs": {
"value": 512
},
"class_type": "easy int",
"_meta": {
"title": "$width.value"
}
},
"42": {
"inputs": {
"value": 512
},
"class_type": "easy int",
"_meta": {
"title": "$height.value"
}
},
"43": {
"inputs": {
"width": [
"41",
0
],
"height": [
"42",
0
],
"batch_size": 1
},
"class_type": "EmptyLatentImage",
"_meta": {
"title": "Empty Latent Image"
}
},
"46": {
"inputs": {
"value": "a dog"
},
"class_type": "PrimitiveStringMultiline",
"_meta": {
"title": "$prompt.value!"
}
}
}

View File

@@ -0,0 +1,78 @@
{
"1": {
"inputs": {
"text": [
"3",
0
],
"voice": [
"5",
0
],
"speed": [
"8",
0
],
"pitch": 0
},
"class_type": "EdgeTTS",
"_meta": {
"title": "Edge TTS 🔊"
}
},
"3": {
"inputs": {
"value": "床前明月光,疑是地上霜。"
},
"class_type": "PrimitiveStringMultiline",
"_meta": {
"title": "$text.value!"
}
},
"4": {
"inputs": {
"filename_prefix": "audio/ComfyUI",
"quality": "V0",
"audioUI": "",
"audio": [
"1",
0
]
},
"class_type": "SaveAudioMP3",
"_meta": {
"title": "Save Audio (MP3)"
}
},
"5": {
"inputs": {
"text": "[Chinese] zh-CN Yunjian",
"anything": [
"7",
0
]
},
"class_type": "easy showAnything",
"_meta": {
"title": "Show Any"
}
},
"7": {
"inputs": {
"value": "[Chinese] zh-CN Yunjian"
},
"class_type": "PrimitiveStringMultiline",
"_meta": {
"title": "$voice.value"
}
},
"8": {
"inputs": {
"value": 1
},
"class_type": "easy float",
"_meta": {
"title": "$speed.value"
}
}
}