优化tts逻辑

This commit is contained in:
puke
2025-10-29 21:40:37 +08:00
parent 8c03bd1bcd
commit fb18adf318
16 changed files with 505 additions and 318 deletions

View File

@@ -17,7 +17,7 @@ Usage:
if config_manager.validate():
print("Config is valid!")
"""
from .schema import ReelForgeConfig, LLMConfig, TTSConfig, ImageConfig
from .schema import ReelForgeConfig, LLMConfig, ComfyUIConfig, TTSSubConfig, ImageSubConfig
from .manager import ConfigManager
from .loader import load_config_dict, save_config_dict
@@ -27,8 +27,9 @@ config_manager = ConfigManager()
__all__ = [
"ReelForgeConfig",
"LLMConfig",
"TTSConfig",
"ImageConfig",
"ComfyUIConfig",
"TTSSubConfig",
"ImageSubConfig",
"ConfigManager",
"config_manager",
"load_config_dict",

View File

@@ -93,21 +93,26 @@ class ConfigManager:
}
})
def get_image_config(self) -> dict:
"""Get image configuration as dict"""
def get_comfyui_config(self) -> dict:
"""Get ComfyUI configuration as dict"""
return {
"default_workflow": self.config.image.default_workflow,
"comfyui_url": self.config.image.comfyui_url,
"runninghub_api_key": self.config.image.runninghub_api_key,
"prompt_prefix": self.config.image.prompt_prefix,
"comfyui_url": self.config.comfyui.comfyui_url,
"runninghub_api_key": self.config.comfyui.runninghub_api_key,
"tts": {
"default_workflow": self.config.comfyui.tts.default_workflow,
},
"image": {
"default_workflow": self.config.comfyui.image.default_workflow,
"prompt_prefix": self.config.comfyui.image.prompt_prefix,
}
}
def set_image_config(
def set_comfyui_config(
self,
comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None
):
"""Set image configuration"""
"""Set ComfyUI global configuration"""
updates = {}
if comfyui_url is not None:
updates["comfyui_url"] = comfyui_url
@@ -115,5 +120,5 @@ class ConfigManager:
updates["runninghub_api_key"] = runninghub_api_key
if updates:
self.update({"image": updates})
self.update({"comfyui": updates})

View File

@@ -13,32 +13,37 @@ class LLMConfig(BaseModel):
model: str = Field(default="", description="LLM Model Name")
class TTSConfig(BaseModel):
"""TTS configuration"""
class TTSSubConfig(BaseModel):
"""TTS-specific configuration (under comfyui.tts)"""
model_config = {"populate_by_name": True} # Allow both field name and alias
default_workflow: str = Field(default="edge", description="Default TTS workflow", alias="default")
default_workflow: str = Field(default=None, description="Default TTS workflow (required, no fallback)", alias="default")
class ImageConfig(BaseModel):
"""Image generation configuration"""
class ImageSubConfig(BaseModel):
"""Image-specific configuration (under comfyui.image)"""
model_config = {"populate_by_name": True} # Allow both field name and alias
default_workflow: str = Field(default=None, description="Default image workflow (required, no fallback)", alias="default")
comfyui_url: str = Field(default="http://127.0.0.1:8188", description="ComfyUI Server URL")
runninghub_api_key: str = Field(default="", description="RunningHub API Key (optional)")
prompt_prefix: str = Field(
default="Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines",
description="Prompt prefix for all image generation"
)
class ComfyUIConfig(BaseModel):
"""ComfyUI configuration (includes global settings and service-specific configs)"""
comfyui_url: str = Field(default="http://127.0.0.1:8188", description="ComfyUI Server URL")
runninghub_api_key: str = Field(default="", description="RunningHub API Key (optional)")
tts: TTSSubConfig = Field(default_factory=TTSSubConfig, description="TTS-specific configuration")
image: ImageSubConfig = Field(default_factory=ImageSubConfig, description="Image-specific configuration")
class ReelForgeConfig(BaseModel):
"""ReelForge main configuration"""
project_name: str = Field(default="ReelForge", description="Project name")
llm: LLMConfig = Field(default_factory=LLMConfig)
tts: TTSConfig = Field(default_factory=TTSConfig)
image: ImageConfig = Field(default_factory=ImageConfig)
comfyui: ComfyUIConfig = Field(default_factory=ComfyUIConfig)
def is_llm_configured(self) -> bool:
"""Check if LLM is properly configured"""

View File

@@ -19,7 +19,7 @@ class ComfyBaseService:
Subclasses should define:
- WORKFLOW_PREFIX: Prefix for workflow files (e.g., "image_", "tts_")
- DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_default.json")
- DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_flux.json")
- WORKFLOWS_DIR: Directory containing workflows (default: "workflows")
"""
@@ -35,7 +35,13 @@ class ComfyBaseService:
config: Full application config dict
service_name: Service name in config (e.g., "tts", "image")
"""
self.config = config.get(service_name, {})
# Service-specific config (e.g., config["comfyui"]["tts"])
comfyui_config = config.get("comfyui", {})
self.config = comfyui_config.get(service_name, {})
# Global ComfyUI config (for comfyui_url and runninghub_api_key)
self.global_config = comfyui_config
self.service_name = service_name
self._workflows_cache: Optional[List[str]] = None
@@ -47,18 +53,18 @@ class ComfyBaseService:
List of workflow info dicts
Example: [
{
"name": "image_default.json",
"display_name": "image_default.json - Selfhost",
"name": "image_flux.json",
"display_name": "image_flux.json - Selfhost",
"source": "selfhost",
"path": "workflows/selfhost/image_default.json",
"key": "selfhost/image_default.json"
"path": "workflows/selfhost/image_flux.json",
"key": "selfhost/image_flux.json"
},
{
"name": "image_default.json",
"display_name": "image_default.json - Runninghub",
"name": "image_flux.json",
"display_name": "image_flux.json - Runninghub",
"source": "runninghub",
"path": "workflows/runninghub/image_default.json",
"key": "runninghub/image_default.json",
"path": "workflows/runninghub/image_flux.json",
"key": "runninghub/image_flux.json",
"workflow_id": "123456"
}
]
@@ -101,11 +107,11 @@ class ComfyBaseService:
Returns:
Workflow info dict with structure:
{
"name": "image_default.json",
"display_name": "image_default.json - Runninghub",
"name": "image_flux.json",
"display_name": "image_flux.json - Runninghub",
"source": "runninghub",
"path": "workflows/runninghub/image_default.json",
"key": "runninghub/image_default.json",
"path": "workflows/runninghub/image_flux.json",
"key": "runninghub/image_flux.json",
"workflow_id": "123456" # Only for RunningHub
}
"""
@@ -134,7 +140,7 @@ class ComfyBaseService:
Get default workflow from config (required, no fallback)
Returns:
Default workflow key (e.g., "runninghub/image_default.json")
Default workflow key (e.g., "runninghub/image_flux.json")
Raises:
ValueError: If default_workflow not configured
@@ -155,17 +161,17 @@ class ComfyBaseService:
Resolve workflow key to workflow info
Args:
workflow: Workflow key (e.g., "runninghub/image_default.json")
workflow: Workflow key (e.g., "runninghub/image_flux.json")
If None, uses default from config
Returns:
Workflow info dict with structure:
{
"name": "image_default.json",
"display_name": "image_default.json - Runninghub",
"name": "image_flux.json",
"display_name": "image_flux.json - Runninghub",
"source": "runninghub",
"path": "workflows/runninghub/image_default.json",
"key": "runninghub/image_default.json",
"path": "workflows/runninghub/image_flux.json",
"key": "runninghub/image_flux.json",
"workflow_id": "123456" # Only for RunningHub
}
@@ -210,19 +216,19 @@ class ComfyBaseService:
"""
kit_config = {}
# ComfyUI URL (priority: param > config > env > default)
# ComfyUI URL (priority: param > global config > env > default)
final_comfyui_url = (
comfyui_url
or self.config.get("comfyui_url")
or self.global_config.get("comfyui_url")
or os.getenv("COMFYUI_BASE_URL")
or "http://127.0.0.1:8188"
)
kit_config["comfyui_url"] = final_comfyui_url
# RunningHub API key (priority: param > config > env)
# RunningHub API key (priority: param > global config > env)
final_rh_key = (
runninghub_api_key
or self.config.get("runninghub_api_key")
or self.global_config.get("runninghub_api_key")
or os.getenv("RUNNINGHUB_API_KEY")
)
if final_rh_key:
@@ -242,11 +248,11 @@ class ComfyBaseService:
workflows = service.list_workflows()
# [
# {
# "name": "image_default.json",
# "display_name": "image_default.json - Runninghub",
# "name": "image_flux.json",
# "display_name": "image_flux.json - Runninghub",
# "source": "runninghub",
# "path": "workflows/runninghub/image_default.json",
# "key": "runninghub/image_default.json",
# "path": "workflows/runninghub/image_flux.json",
# "key": "runninghub/image_flux.json",
# "workflow_id": "123456"
# },
# ...
@@ -260,7 +266,7 @@ class ComfyBaseService:
List available workflow keys
Returns:
List of available workflow keys (e.g., ["runninghub/image_default.json", ...])
List of available workflow keys (e.g., ["runninghub/image_flux.json", ...])
Example:
print(f"Available workflows: {service.available}")

View File

@@ -17,7 +17,7 @@ class ImageService(ComfyBaseService):
Uses ComfyKit to execute image generation workflows.
Usage:
# Use default workflow (workflows/image_default.json)
# Use default workflow (workflows/image_flux.json)
image_url = await reelforge.image(prompt="a cat")
# Use specific workflow
@@ -65,7 +65,7 @@ class ImageService(ComfyBaseService):
Args:
prompt: Image generation prompt
workflow: Workflow filename (default: from config or "image_default.json")
workflow: Workflow filename (default: from config or "image_flux.json")
comfyui_url: ComfyUI URL (optional, overrides config)
runninghub_api_key: RunningHub API key (optional, overrides config)
width: Image width
@@ -81,7 +81,7 @@ class ImageService(ComfyBaseService):
Generated image URL/path
Examples:
# Simplest: use default workflow (workflows/image_default.json)
# Simplest: use default workflow (workflows/image_flux.json)
image_url = await reelforge.image(prompt="a beautiful cat")
# Use specific workflow

View File

@@ -1,39 +1,29 @@
"""
TTS (Text-to-Speech) Service - Dual implementation (Edge TTS + ComfyUI)
TTS (Text-to-Speech) Service - ComfyUI Workflow-based implementation
"""
import uuid
from typing import Optional
from comfykit import ComfyKit
from loguru import logger
from reelforge.services.comfy_base_service import ComfyBaseService
from reelforge.utils.os_util import get_temp_path
class TTSService(ComfyBaseService):
"""
TTS (Text-to-Speech) service - Dual implementation
TTS (Text-to-Speech) service - Workflow-based
Supports two TTS methods:
1. Edge TTS (default) - Free, local SDK, no workflow needed
2. ComfyUI Workflow - Workflow-based, requires ComfyUI setup
Uses ComfyKit to execute TTS workflows.
Usage:
# Use default (edge-tts)
# Use default workflow
audio_path = await reelforge.tts(text="Hello, world!")
# Explicitly use edge-tts
# Use specific workflow
audio_path = await reelforge.tts(
text="你好,世界!",
workflow="edge"
)
# Use ComfyUI workflow
audio_path = await reelforge.tts(
text="Hello",
workflow="tts_comfyui.json"
workflow="tts_edge.json"
)
# List available workflows
@@ -41,12 +31,9 @@ class TTSService(ComfyBaseService):
"""
WORKFLOW_PREFIX = "tts_"
DEFAULT_WORKFLOW = "edge" # Default to edge-tts
DEFAULT_WORKFLOW = None # No hardcoded default, must be configured
WORKFLOWS_DIR = "workflows"
# Built-in providers (not workflow files)
BUILTIN_PROVIDERS = ["edge", "edge-tts"]
def __init__(self, config: dict):
"""
Initialize TTS service
@@ -56,81 +43,53 @@ class TTSService(ComfyBaseService):
"""
super().__init__(config, service_name="tts")
def _resolve_workflow(self, workflow: Optional[str] = None) -> str:
"""
Resolve workflow to actual workflow path or provider name
Args:
workflow: Workflow filename or provider name (e.g., "edge", "tts_default.json")
Returns:
Workflow file path or provider name
"""
# 1. If not specified, use default
if workflow is None:
workflow = self._get_default_workflow()
# 2. If it's a built-in provider, return as-is
if workflow in self.BUILTIN_PROVIDERS:
logger.debug(f"Using built-in TTS provider: {workflow}")
return workflow
# 3. Otherwise, treat as workflow file (use parent logic)
return super()._resolve_workflow(workflow)
async def __call__(
self,
text: str,
workflow: Optional[str] = None,
# ComfyUI connection (optional overrides, only for workflow mode)
# ComfyUI connection (optional overrides)
comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None,
# Common TTS parameters (work for both edge-tts and workflows)
# TTS parameters
voice: Optional[str] = None,
rate: Optional[str] = None,
volume: Optional[str] = None,
pitch: Optional[str] = None,
speed: float = 1.0,
# Output path
output_path: Optional[str] = None,
**params
) -> str:
"""
Generate speech using edge-tts or ComfyUI workflow
Generate speech using ComfyUI workflow
Args:
text: Text to convert to speech
workflow: Workflow filename or provider name (default: "edge")
- "edge" or "edge-tts": Use local edge-tts SDK
- "tts_xxx.json": Use ComfyUI workflow
- Absolute path/URL/RunningHub ID: Also supported
comfyui_url: ComfyUI URL (only for workflow mode)
runninghub_api_key: RunningHub API key (only for workflow mode)
voice: Voice ID
rate: Speech rate (e.g., "+0%", "+50%", "-20%")
volume: Speech volume (e.g., "+0%")
pitch: Speech pitch (e.g., "+0Hz")
workflow: Workflow filename (default: from config)
comfyui_url: ComfyUI URL (optional, overrides config)
runninghub_api_key: RunningHub API key (optional, overrides config)
voice: Voice ID (workflow-specific)
speed: Speech speed multiplier (1.0 = normal, >1.0 = faster, <1.0 = slower)
output_path: Custom output path (auto-generated if None)
**params: Additional parameters
**params: Additional workflow parameters
Returns:
Generated audio file path
Examples:
# Simplest: use default (edge-tts)
# Simplest: use default workflow
audio_path = await reelforge.tts(text="Hello, world!")
# Explicitly use edge-tts with parameters
# Use specific workflow
audio_path = await reelforge.tts(
text="你好,世界!",
workflow="edge",
voice="zh-CN-XiaoxiaoNeural",
rate="+20%"
workflow="tts_edge.json"
)
# Use ComfyUI workflow
# With voice and speed
audio_path = await reelforge.tts(
text="Hello",
workflow="tts_default.json"
workflow="tts_edge.json",
voice="zh-CN-XiaoxiaoNeural",
speed=1.2
)
# With absolute path
@@ -138,92 +97,28 @@ class TTSService(ComfyBaseService):
text="Hello",
workflow="/path/to/custom_tts.json"
)
"""
# 1. Check if it's a builtin provider (edge-tts)
if workflow in self.BUILTIN_PROVIDERS or workflow is None and self._get_default_workflow() in self.BUILTIN_PROVIDERS:
# Use edge-tts
return await self._call_edge_tts(
text=text,
voice=voice,
rate=rate,
volume=volume,
pitch=pitch,
output_path=output_path,
**params
# With custom ComfyUI server
audio_path = await reelforge.tts(
text="Hello",
comfyui_url="http://192.168.1.100:8188"
)
# 2. Use ComfyUI workflow - resolve to structured info
"""
# 1. Resolve workflow (returns structured info)
workflow_info = self._resolve_workflow(workflow=workflow)
# 2. Execute ComfyUI workflow
return await self._call_comfyui_workflow(
workflow_info=workflow_info,
text=text,
comfyui_url=comfyui_url,
runninghub_api_key=runninghub_api_key,
voice=voice,
rate=rate,
volume=volume,
pitch=pitch,
speed=speed,
output_path=output_path,
**params
)
async def _call_edge_tts(
self,
text: str,
voice: Optional[str] = None,
rate: Optional[str] = None,
volume: Optional[str] = None,
pitch: Optional[str] = None,
output_path: Optional[str] = None,
**params
) -> str:
"""
Generate speech using edge-tts SDK
Args:
text: Text to convert to speech
voice: Voice ID (default: zh-CN-YunjianNeural)
rate: Speech rate (default: +0%)
volume: Speech volume (default: +0%)
pitch: Speech pitch (default: +0Hz)
output_path: Custom output path (auto-generated if None)
**params: Additional parameters (e.g., retry_count, retry_delay)
Returns:
Generated audio file path
"""
from reelforge.utils.tts_util import edge_tts
logger.info(f"🎙️ Using edge-tts (local SDK)")
# Generate output path (use provided path or auto-generate)
if output_path is None:
output_path = get_temp_path(f"{uuid.uuid4().hex}.mp3")
else:
# Ensure parent directory exists
import os
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# Call edge-tts with output_path to save directly
try:
audio_bytes = await edge_tts(
text=text,
voice=voice or "zh-CN-YunjianNeural",
rate=rate or "+0%",
volume=volume or "+0%",
pitch=pitch or "+0Hz",
output_path=output_path,
**params
)
logger.info(f"✅ Generated audio (edge-tts): {output_path}")
return output_path
except Exception as e:
logger.error(f"Edge TTS generation error: {e}")
raise
async def _call_comfyui_workflow(
self,
workflow_info: dict,
@@ -231,9 +126,7 @@ class TTSService(ComfyBaseService):
comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None,
voice: Optional[str] = None,
rate: Optional[str] = None,
volume: Optional[str] = None,
pitch: Optional[str] = None,
speed: float = 1.0,
output_path: Optional[str] = None,
**params
) -> str:
@@ -246,9 +139,7 @@ class TTSService(ComfyBaseService):
comfyui_url: ComfyUI URL
runninghub_api_key: RunningHub API key
voice: Voice ID (workflow-specific)
rate: Speech rate (workflow-specific)
volume: Speech volume (workflow-specific)
pitch: Speech pitch (workflow-specific)
speed: Speech speed multiplier (workflow-specific)
output_path: Custom output path (downloads if URL returned)
**params: Additional workflow parameters
@@ -269,12 +160,8 @@ class TTSService(ComfyBaseService):
# Add optional TTS parameters
if voice is not None:
workflow_params["voice"] = voice
if rate is not None:
workflow_params["rate"] = rate
if volume is not None:
workflow_params["volume"] = volume
if pitch is not None:
workflow_params["pitch"] = pitch
if speed != 1.0:
workflow_params["speed"] = speed
# Add any additional parameters
workflow_params.update(params)