重构capability层

This commit is contained in:
puke
2025-10-27 20:06:27 +08:00
committed by puke
parent c19710d5bd
commit 9937c0fffd
19 changed files with 818 additions and 1160 deletions

View File

@@ -4,9 +4,9 @@ ReelForge Services
Unified service layer providing simplified access to capabilities.
"""
from reelforge.services.base import BaseService
from reelforge.services.llm import LLMService
from reelforge.services.tts import TTSService
from reelforge.services.comfy_base_service import ComfyBaseService
from reelforge.services.llm_service import LLMService
from reelforge.services.tts_service import TTSService
from reelforge.services.image import ImageService
from reelforge.services.video import VideoService
from reelforge.services.narration_generator import NarrationGeneratorService
@@ -16,7 +16,7 @@ from reelforge.services.storyboard_processor import StoryboardProcessorService
from reelforge.services.video_generator import VideoGeneratorService
__all__ = [
"BaseService",
"ComfyBaseService",
"LLMService",
"TTSService",
"ImageService",

View File

@@ -1,85 +0,0 @@
"""
Base service class for all capability services
"""
from abc import ABC, abstractmethod
from typing import Any, Optional
from reelforge.core.config_manager import ConfigManager
class BaseService(ABC):
"""
Base service class for all capability services
Provides callable interface and basic properties:
- Direct call: result = await service(...)
- Active capability: service.active
- Available IDs: service.available
Usage:
result = await reelforge.llm("Hello world")
print(f"Using: {reelforge.llm.active}")
print(f"Available: {reelforge.llm.available}")
"""
def __init__(self, config_manager: ConfigManager, capability_type: str):
"""
Initialize service
Args:
config_manager: ConfigManager instance
capability_type: Type of capability (llm, tts, etc.)
"""
self._config_manager = config_manager
self._capability_type = capability_type
@abstractmethod
async def __call__(self, **kwargs) -> Any:
"""
Make service callable directly
This is the main entry point for using the service.
Subclasses MUST implement this with specific signatures.
Example:
answer = await reelforge.llm(prompt="Hello")
"""
pass
@property
def active(self) -> Optional[str]:
"""
Get active capability ID
Returns:
Active capability ID (e.g., "call") or None if not set
Example:
print(f"Using LLM: {reelforge.llm.active}")
"""
return self._config_manager.get_active(self._capability_type)
@property
def available(self) -> list[str]:
"""
List available capability IDs
Returns:
List of capability IDs
Example:
print(f"Available LLMs: {reelforge.llm.available}")
"""
return self._config_manager.get_available_ids(self._capability_type)
def __repr__(self) -> str:
"""String representation"""
active = self.active or "none"
available = ", ".join(self.available) if self.available else "none"
return (
f"<{self.__class__.__name__} "
f"active={active!r} "
f"available=[{available}]>"
)

View File

@@ -0,0 +1,191 @@
"""
ComfyUI Base Service - Common logic for ComfyUI-based services
"""
import os
from pathlib import Path
from typing import Optional, List, Dict, Any
from comfykit import ComfyKit
from loguru import logger
class ComfyBaseService:
"""
Base service for ComfyUI workflow-based capabilities
Provides common functionality for TTS, Image, and other ComfyUI-based services.
Subclasses should define:
- WORKFLOW_PREFIX: Prefix for workflow files (e.g., "image_", "tts_")
- DEFAULT_WORKFLOW: Default workflow filename (e.g., "image_default.json")
- WORKFLOWS_DIR: Directory containing workflows (default: "workflows")
"""
WORKFLOW_PREFIX: str = "" # Must be overridden by subclass
DEFAULT_WORKFLOW: str = "" # Must be overridden by subclass
WORKFLOWS_DIR: str = "workflows"
def __init__(self, config: dict, service_name: str):
"""
Initialize ComfyUI base service
Args:
config: Full application config dict
service_name: Service name in config (e.g., "tts", "image")
"""
self.config = config.get(service_name, {})
self.service_name = service_name
self._workflows_cache: Optional[List[str]] = None
def _scan_workflows(self) -> List[str]:
"""
Scan workflows/{prefix}*.json files
Returns:
List of workflow filenames
Example: ["image_default.json", "image_flux.json"]
"""
workflows = []
workflows_dir = Path(self.WORKFLOWS_DIR)
if not workflows_dir.exists():
logger.warning(f"Workflows directory not found: {workflows_dir}")
return workflows
# Scan for {prefix}_*.json files
for file in workflows_dir.glob(f"{self.WORKFLOW_PREFIX}*.json"):
workflows.append(file.name)
logger.debug(f"Found {self.service_name} workflow: {file.name}")
return sorted(workflows)
def _get_default_workflow(self) -> str:
"""
Get default workflow name from config or use DEFAULT_WORKFLOW
Returns:
Default workflow filename
"""
return self.config.get("default_workflow", self.DEFAULT_WORKFLOW)
def _resolve_workflow(self, workflow: Optional[str] = None) -> str:
"""
Resolve workflow to actual workflow path
Args:
workflow: Workflow filename (e.g., "image_default.json")
Can also be:
- Absolute path: "/path/to/workflow.json"
- Relative path: "custom/workflow.json"
- URL: "http://..."
- RunningHub ID: "12345"
Returns:
Workflow file path or identifier
Raises:
ValueError: If workflow not found
"""
# 1. If not specified, use default
if workflow is None:
workflow = self._get_default_workflow()
# 2. If it's an absolute path, URL, or looks like RunningHub ID, use as-is
if (workflow.startswith("/") or
workflow.startswith("http://") or
workflow.startswith("https://") or
workflow.isdigit()):
logger.debug(f"Using workflow identifier: {workflow}")
return workflow
# 3. If it's just a filename, look in workflows/ directory
workflow_path = Path(self.WORKFLOWS_DIR) / workflow
if not workflow_path.exists():
# List available workflows for error message
available = self._scan_workflows()
available_str = ", ".join(available) if available else "none"
raise ValueError(
f"Workflow '{workflow}' not found at {workflow_path}. "
f"Available workflows: {available_str}\n"
f"Please create: {workflow_path}"
)
logger.info(f"🎬 Using {self.service_name} workflow: {workflow}")
return str(workflow_path)
def _prepare_comfykit_config(
self,
comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None,
) -> Dict[str, Any]:
"""
Prepare ComfyKit configuration
Args:
comfyui_url: ComfyUI URL (optional, overrides config)
runninghub_api_key: RunningHub API key (optional, overrides config)
Returns:
ComfyKit configuration dict
"""
kit_config = {}
# ComfyUI URL (priority: param > config > env > default)
final_comfyui_url = (
comfyui_url
or self.config.get("comfyui_url")
or os.getenv("COMFYUI_BASE_URL")
or "http://127.0.0.1:8188"
)
kit_config["comfyui_url"] = final_comfyui_url
# RunningHub API key (priority: param > config > env)
final_rh_key = (
runninghub_api_key
or self.config.get("runninghub_api_key")
or os.getenv("RUNNINGHUB_API_KEY")
)
if final_rh_key:
kit_config["runninghub_api_key"] = final_rh_key
logger.debug(f"ComfyKit config: {kit_config}")
return kit_config
def list_workflows(self) -> List[str]:
"""
List all available workflows
Returns:
List of workflow filenames (sorted alphabetically)
Example:
workflows = service.list_workflows()
# ['image_default.json', 'image_flux.json']
"""
return self._scan_workflows()
@property
def available(self) -> List[str]:
"""
List available workflows
Returns:
List of available workflow filenames
Example:
print(f"Available workflows: {service.available}")
"""
return self.list_workflows()
def __repr__(self) -> str:
"""String representation"""
default = self._get_default_workflow()
available = ", ".join(self.available) if self.available else "none"
return (
f"<{self.__class__.__name__} "
f"default={default!r} "
f"available=[{available}]>"
)

View File

@@ -1,42 +1,37 @@
"""
Image Generation Service - Workflow-based, no capability layer
This service directly uses ComfyKit to execute workflows without going through
the capability abstraction layer. This is because workflow files themselves
already provide sufficient abstraction and flexibility.
Image Generation Service - ComfyUI Workflow-based implementation
"""
import os
from pathlib import Path
from typing import Optional, List, Dict
from typing import Optional
from comfykit import ComfyKit
from loguru import logger
from reelforge.services.comfy_base_service import ComfyBaseService
class ImageService:
class ImageService(ComfyBaseService):
"""
Image generation service - Workflow-based
Directly uses ComfyKit to execute workflows. No capability abstraction needed
since workflow itself is already the abstraction.
Uses ComfyKit to execute image generation workflows.
Usage:
# Use default preset (workflows/image_default.json)
# Use default workflow (workflows/image_default.json)
image_url = await reelforge.image(prompt="a cat")
# Use specific preset
image_url = await reelforge.image(preset="flux", prompt="a cat")
# Use specific workflow
image_url = await reelforge.image(
prompt="a cat",
workflow="image_flux.json"
)
# List available presets
presets = reelforge.image.list_presets()
# Get preset path
path = reelforge.image.get_preset_path("flux")
# List available workflows
workflows = reelforge.image.list_workflows()
"""
PRESET_PREFIX = "image_"
DEFAULT_PRESET = "default"
WORKFLOW_PREFIX = "image_"
DEFAULT_WORKFLOW = "image_default.json"
WORKFLOWS_DIR = "workflows"
def __init__(self, config: dict):
@@ -46,105 +41,11 @@ class ImageService:
Args:
config: Full application config dict
"""
self.config = config.get("image", {})
self._presets_cache: Optional[Dict[str, str]] = None
def _scan_presets(self) -> Dict[str, str]:
"""
Scan workflows/image_*.json files
Returns:
Dict mapping preset name to workflow path
Example: {"default": "workflows/image_default.json", "flux": "workflows/image_flux.json"}
"""
if self._presets_cache is not None:
return self._presets_cache
presets = {}
workflows_dir = Path(self.WORKFLOWS_DIR)
if not workflows_dir.exists():
logger.warning(f"Workflows directory not found: {workflows_dir}")
return presets
# Scan for image_*.json files
for file in workflows_dir.glob(f"{self.PRESET_PREFIX}*.json"):
# Extract preset name: "image_flux.json" -> "flux"
preset_name = file.stem.replace(self.PRESET_PREFIX, "")
presets[preset_name] = str(file)
logger.debug(f"Found image preset: {preset_name} -> {file}")
self._presets_cache = presets
return presets
def _get_default_preset(self) -> str:
"""
Get default preset name from config or use "default"
Priority:
1. config.yaml: image.default
2. "default"
"""
return self.config.get("default", self.DEFAULT_PRESET)
def _resolve_workflow(
self,
preset: Optional[str] = None,
workflow: Optional[str] = None
) -> str:
"""
Resolve preset/workflow to actual workflow path
Args:
preset: Preset name (e.g., "flux", "default")
workflow: Full workflow path (for backward compatibility)
Returns:
Workflow file path
Raises:
ValueError: If preset not found or no workflows available
"""
# 1. If explicit workflow path provided, use it
if workflow:
logger.debug(f"Using explicit workflow: {workflow}")
return workflow
# 2. Scan available presets
presets = self._scan_presets()
if not presets:
raise ValueError(
f"No workflow presets found in {self.WORKFLOWS_DIR}/ directory. "
f"Please create at least one workflow file: {self.WORKFLOWS_DIR}/{self.PRESET_PREFIX}default.json"
)
# 3. Determine which preset to use
if preset:
# Use specified preset
target_preset = preset
else:
# Use default preset
target_preset = self._get_default_preset()
# 4. Lookup preset
if target_preset not in presets:
available = ", ".join(sorted(presets.keys()))
raise ValueError(
f"Preset '{target_preset}' not found. "
f"Available presets: {available}\n"
f"Please create: {self.WORKFLOWS_DIR}/{self.PRESET_PREFIX}{target_preset}.json"
)
workflow_path = presets[target_preset]
logger.info(f"🎨 Using image preset: {target_preset} ({workflow_path})")
return workflow_path
super().__init__(config, service_name="image")
async def __call__(
self,
prompt: str,
preset: Optional[str] = None,
workflow: Optional[str] = None,
# ComfyUI connection (optional overrides)
comfyui_url: Optional[str] = None,
@@ -164,8 +65,7 @@ class ImageService:
Args:
prompt: Image generation prompt
preset: Preset name (default: from config or "default")
workflow: Full workflow path (backward compatible)
workflow: Workflow filename (default: from config or "image_default.json")
comfyui_url: ComfyUI URL (optional, overrides config)
runninghub_api_key: RunningHub API key (optional, overrides config)
width: Image width
@@ -181,26 +81,29 @@ class ImageService:
Generated image URL/path
Examples:
# Simplest: use default preset (workflows/image_default.json)
# Simplest: use default workflow (workflows/image_default.json)
image_url = await reelforge.image(prompt="a beautiful cat")
# Use specific preset
image_url = await reelforge.image(preset="flux", prompt="a cat")
# Use specific workflow
image_url = await reelforge.image(
prompt="a cat",
workflow="image_flux.json"
)
# With additional parameters
image_url = await reelforge.image(
preset="flux",
prompt="a cat",
workflow="image_flux.json",
width=1024,
height=1024,
steps=20,
seed=42
)
# Backward compatible: direct workflow path
# With absolute path
image_url = await reelforge.image(
workflow="workflows/custom.json",
prompt="a cat"
prompt="a cat",
workflow="/path/to/custom.json"
)
# With custom ComfyUI server
@@ -210,30 +113,13 @@ class ImageService:
)
"""
# 1. Resolve workflow path
workflow_path = self._resolve_workflow(preset=preset, workflow=workflow)
workflow_path = self._resolve_workflow(workflow=workflow)
# 2. Prepare ComfyKit config
kit_config = {}
# ComfyUI URL (priority: param > config > env > default)
final_comfyui_url = (
comfyui_url
or self.config.get("comfyui_url")
or os.getenv("COMFYUI_BASE_URL")
or "http://127.0.0.1:8188"
kit_config = self._prepare_comfykit_config(
comfyui_url=comfyui_url,
runninghub_api_key=runninghub_api_key
)
kit_config["comfyui_url"] = final_comfyui_url
# RunningHub API key (priority: param > config > env)
final_rh_key = (
runninghub_api_key
or self.config.get("runninghub_api_key")
or os.getenv("RUNNINGHUB_API_KEY")
)
if final_rh_key:
kit_config["runninghub_api_key"] = final_rh_key
logger.debug(f"ComfyKit config: {kit_config}")
# 3. Build workflow parameters
workflow_params = {"prompt": prompt}
@@ -283,74 +169,3 @@ class ImageService:
except Exception as e:
logger.error(f"Image generation error: {e}")
raise
def list_presets(self) -> List[str]:
"""
List all available image presets
Returns:
List of preset names (sorted alphabetically)
Example:
presets = reelforge.image.list_presets()
# ['anime', 'default', 'flux', 'sd15']
"""
return sorted(self._scan_presets().keys())
def get_preset_path(self, preset: str) -> Optional[str]:
"""
Get workflow path for a preset
Args:
preset: Preset name
Returns:
Workflow file path, or None if not found
Example:
path = reelforge.image.get_preset_path("flux")
# 'workflows/image_flux.json'
"""
return self._scan_presets().get(preset)
@property
def active(self) -> str:
"""
Get active preset name
This property is provided for compatibility with other services
that use the capability layer.
Returns:
Active preset name
Example:
print(f"Using preset: {reelforge.image.active}")
"""
return self._get_default_preset()
@property
def available(self) -> List[str]:
"""
List available presets
This property is provided for compatibility with other services
that use the capability layer.
Returns:
List of available preset names
Example:
print(f"Available presets: {reelforge.image.available}")
"""
return self.list_presets()
def __repr__(self) -> str:
"""String representation"""
active = self.active
available = ", ".join(self.available) if self.available else "none"
return (
f"<ImageService "
f"active={active!r} "
f"available=[{available}]>"
)

View File

@@ -1,97 +0,0 @@
"""
LLM (Large Language Model) Service
"""
from typing import Optional
from reelforge.services.base import BaseService
class LLMService(BaseService):
"""
LLM (Large Language Model) service
Provides unified access to various LLM providers (Qwen, OpenAI, DeepSeek, Ollama, etc.)
Usage:
# Direct call (recommended)
answer = await reelforge.llm("Explain atomic habits")
# With parameters
answer = await reelforge.llm(
prompt="Explain atomic habits in 3 sentences",
temperature=0.7,
max_tokens=2000
)
# Explicit call syntax
answer = await reelforge.llm.call(prompt="Hello")
# Check active LLM
print(f"Using: {reelforge.llm.active}")
# List available LLMs
print(f"Available: {reelforge.llm.available}")
"""
def __init__(self, router):
super().__init__(router, "llm")
async def __call__(
self,
prompt: str,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model: Optional[str] = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
**kwargs
) -> str:
"""
Generate text using LLM
Args:
prompt: The prompt to generate from
api_key: API key (optional, uses config if not provided)
base_url: Base URL (optional, uses config if not provided)
model: Model name (optional, uses config if not provided)
temperature: Sampling temperature (0.0-2.0). Lower is more deterministic.
max_tokens: Maximum tokens to generate
**kwargs: Additional provider-specific parameters
Returns:
Generated text
Examples:
# Use config from config.yaml
answer = await reelforge.llm("Explain atomic habits")
# Override with custom parameters
answer = await reelforge.llm(
"Explain the concept of atomic habits in 3 sentences",
api_key="sk-custom-key",
base_url="https://api.custom.com/v1",
model="custom-model",
temperature=0.7,
max_tokens=500
)
"""
params = {"prompt": prompt}
# Add optional LLM parameters (will override config if provided)
if api_key is not None:
params["api_key"] = api_key
if base_url is not None:
params["base_url"] = base_url
if model is not None:
params["model"] = model
if temperature is not None:
params["temperature"] = temperature
if max_tokens is not None:
params["max_tokens"] = max_tokens
params.update(kwargs)
return await self._config_manager.call(self._capability_type, **params)

View File

@@ -0,0 +1,184 @@
"""
LLM (Large Language Model) Service - Direct OpenAI SDK implementation
"""
import os
from typing import Optional
from openai import AsyncOpenAI
from loguru import logger
class LLMService:
"""
LLM (Large Language Model) service
Direct implementation using OpenAI SDK. No capability layer needed.
Supports all OpenAI SDK compatible providers:
- OpenAI (gpt-4o, gpt-4o-mini, gpt-3.5-turbo)
- Alibaba Qwen (qwen-max, qwen-plus, qwen-turbo)
- Anthropic Claude (claude-sonnet-4-5, claude-opus-4, claude-haiku-4)
- DeepSeek (deepseek-chat)
- Moonshot Kimi (moonshot-v1-8k, moonshot-v1-32k, moonshot-v1-128k)
- Ollama (llama3.2, qwen2.5, mistral, codellama) - FREE & LOCAL!
- Any custom provider with OpenAI-compatible API
Usage:
# Direct call
answer = await reelforge.llm("Explain atomic habits")
# With parameters
answer = await reelforge.llm(
prompt="Explain atomic habits in 3 sentences",
temperature=0.7,
max_tokens=2000
)
"""
def __init__(self, config: dict):
"""
Initialize LLM service
Args:
config: Full application config dict
"""
self.config = config.get("llm", {})
self._client: Optional[AsyncOpenAI] = None
def _get_config_value(self, key: str, default=None):
"""
Get config value from config file
Args:
key: Config key name
default: Default value if not found
Returns:
Config value
"""
return self.config.get(key, default)
def _create_client(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
) -> AsyncOpenAI:
"""
Create OpenAI client
Args:
api_key: API key (optional, uses config if not provided)
base_url: Base URL (optional, uses config if not provided)
Returns:
AsyncOpenAI client instance
"""
# Get API key (priority: parameter > config)
final_api_key = (
api_key
or self._get_config_value("api_key")
or "dummy-key" # Ollama doesn't need real key
)
# Get base URL (priority: parameter > config)
final_base_url = (
base_url
or self._get_config_value("base_url")
)
# Create client
client_kwargs = {"api_key": final_api_key}
if final_base_url:
client_kwargs["base_url"] = final_base_url
return AsyncOpenAI(**client_kwargs)
async def __call__(
self,
prompt: str,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model: Optional[str] = None,
temperature: float = 0.7,
max_tokens: int = 2000,
**kwargs
) -> str:
"""
Generate text using LLM
Args:
prompt: The prompt to generate from
api_key: API key (optional, uses config if not provided)
base_url: Base URL (optional, uses config if not provided)
model: Model name (optional, uses config if not provided)
temperature: Sampling temperature (0.0-2.0). Lower is more deterministic.
max_tokens: Maximum tokens to generate
**kwargs: Additional provider-specific parameters
Returns:
Generated text
Examples:
# Use config from config.yaml
answer = await reelforge.llm("Explain atomic habits")
# Override with custom parameters
answer = await reelforge.llm(
prompt="Explain atomic habits in 3 sentences",
api_key="sk-custom-key",
base_url="https://api.custom.com/v1",
model="custom-model",
temperature=0.7,
max_tokens=500
)
"""
# Create client (new instance each time to support parameter overrides)
client = self._create_client(api_key=api_key, base_url=base_url)
# Get model (priority: parameter > config)
final_model = (
model
or self._get_config_value("model")
or "gpt-3.5-turbo" # Default fallback
)
logger.debug(f"LLM call: model={final_model}, base_url={client.base_url}")
try:
response = await client.chat.completions.create(
model=final_model,
messages=[{"role": "user", "content": prompt}],
temperature=temperature,
max_tokens=max_tokens,
**kwargs
)
result = response.choices[0].message.content
logger.debug(f"LLM response length: {len(result)} chars")
return result
except Exception as e:
logger.error(f"LLM call error (model={final_model}, base_url={client.base_url}): {e}")
raise
@property
def active(self) -> str:
"""
Get active model name
Returns:
Active model name
Example:
print(f"Using model: {reelforge.llm.active}")
"""
return self._get_config_value("model", "gpt-3.5-turbo")
def __repr__(self) -> str:
"""String representation"""
model = self.active
base_url = self._get_config_value("base_url", "default")
return f"<LLMService model={model!r} base_url={base_url!r}>"

View File

@@ -123,7 +123,8 @@ class StoryboardProcessorService:
# Call TTS
audio_path = await self.core.tts(
text=frame.narration,
voice=config.voice_id
voice=config.voice_id,
rate="+20%",
)
frame.audio_path = audio_path

View File

@@ -1,103 +0,0 @@
"""
TTS (Text-to-Speech) Service
"""
import base64
import uuid
from typing import Optional
from reelforge.services.base import BaseService
from reelforge.utils.os_util import get_temp_path, save_bytes_to_file
class TTSService(BaseService):
"""
TTS (Text-to-Speech) service
Provides unified access to various TTS providers (Edge TTS, Azure TTS, etc.)
Returns path to saved audio file.
Usage:
# Direct call (auto-generate temp path)
audio_path = await reelforge.tts("Hello world")
# Returns: "temp/abc123def456.mp3"
# With voice parameter
audio_path = await reelforge.tts(
text="你好,世界",
voice="zh-CN-YunjianNeural"
)
# Specify custom output path
audio_path = await reelforge.tts(
text="Hello",
output_path="output/greeting.mp3"
)
# Check active TTS
print(f"Using: {reelforge.tts.active}")
"""
def __init__(self, router):
super().__init__(router, "tts")
async def __call__(
self,
text: str,
voice: Optional[str] = None,
rate: Optional[str] = None,
output_path: Optional[str] = None,
**kwargs
) -> str:
"""
Convert text to speech and save to file
Args:
text: Text to convert to speech
voice: Voice ID (uses default if not specified)
rate: Speech rate (e.g., "+0%", "+50%", "-20%")
output_path: Output file path (default: temp/<uuid>.mp3)
**kwargs: Additional provider-specific parameters
Returns:
Path to saved audio file (str)
Example:
# Auto-generate path
audio_path = await reelforge.tts("Hello world")
# Returns: "temp/abc123def456.mp3"
# Specify custom path
audio_path = await reelforge.tts(
"你好,世界",
voice="zh-CN-YunjianNeural",
output_path="output/greeting.mp3"
)
"""
params = {"text": text}
if voice is not None:
params["voice"] = voice
if rate is not None:
params["rate"] = rate
params.update(kwargs)
# Call capability and get base64-encoded audio
audio_base64 = await self._config_manager.call(self._capability_type, **params)
# Decode base64 to bytes
if isinstance(audio_base64, str):
audio_data = base64.b64decode(audio_base64)
else:
audio_data = audio_base64
# Generate output path if not specified
if output_path is None:
# Generate UUID without hyphens for filename
file_uuid = uuid.uuid4().hex
output_path = get_temp_path(f"{file_uuid}.mp3")
# Save to file
saved_path = save_bytes_to_file(audio_data, output_path)
return saved_path

View File

@@ -0,0 +1,311 @@
"""
TTS (Text-to-Speech) Service - Dual implementation (Edge TTS + ComfyUI)
"""
import uuid
from typing import Optional
from comfykit import ComfyKit
from loguru import logger
from reelforge.services.comfy_base_service import ComfyBaseService
from reelforge.utils.os_util import get_temp_path
class TTSService(ComfyBaseService):
"""
TTS (Text-to-Speech) service - Dual implementation
Supports two TTS methods:
1. Edge TTS (default) - Free, local SDK, no workflow needed
2. ComfyUI Workflow - Workflow-based, requires ComfyUI setup
Usage:
# Use default (edge-tts)
audio_path = await reelforge.tts(text="Hello, world!")
# Explicitly use edge-tts
audio_path = await reelforge.tts(
text="你好,世界!",
workflow="edge"
)
# Use ComfyUI workflow
audio_path = await reelforge.tts(
text="Hello",
workflow="tts_comfyui.json"
)
# List available workflows
workflows = reelforge.tts.list_workflows()
"""
WORKFLOW_PREFIX = "tts_"
DEFAULT_WORKFLOW = "edge" # Default to edge-tts
WORKFLOWS_DIR = "workflows"
# Built-in providers (not workflow files)
BUILTIN_PROVIDERS = ["edge", "edge-tts"]
def __init__(self, config: dict):
"""
Initialize TTS service
Args:
config: Full application config dict
"""
super().__init__(config, service_name="tts")
def _resolve_workflow(self, workflow: Optional[str] = None) -> str:
"""
Resolve workflow to actual workflow path or provider name
Args:
workflow: Workflow filename or provider name (e.g., "edge", "tts_default.json")
Returns:
Workflow file path or provider name
"""
# 1. If not specified, use default
if workflow is None:
workflow = self._get_default_workflow()
# 2. If it's a built-in provider, return as-is
if workflow in self.BUILTIN_PROVIDERS:
logger.debug(f"Using built-in TTS provider: {workflow}")
return workflow
# 3. Otherwise, treat as workflow file (use parent logic)
return super()._resolve_workflow(workflow)
async def __call__(
self,
text: str,
workflow: Optional[str] = None,
# ComfyUI connection (optional overrides, only for workflow mode)
comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None,
# Common TTS parameters (work for both edge-tts and workflows)
voice: Optional[str] = None,
rate: Optional[str] = None,
volume: Optional[str] = None,
pitch: Optional[str] = None,
**params
) -> str:
"""
Generate speech using edge-tts or ComfyUI workflow
Args:
text: Text to convert to speech
workflow: Workflow filename or provider name (default: "edge")
- "edge" or "edge-tts": Use local edge-tts SDK
- "tts_xxx.json": Use ComfyUI workflow
- Absolute path/URL/RunningHub ID: Also supported
comfyui_url: ComfyUI URL (only for workflow mode)
runninghub_api_key: RunningHub API key (only for workflow mode)
voice: Voice ID
rate: Speech rate (e.g., "+0%", "+50%", "-20%")
volume: Speech volume (e.g., "+0%")
pitch: Speech pitch (e.g., "+0Hz")
**params: Additional parameters
Returns:
Generated audio file path
Examples:
# Simplest: use default (edge-tts)
audio_path = await reelforge.tts(text="Hello, world!")
# Explicitly use edge-tts with parameters
audio_path = await reelforge.tts(
text="你好,世界!",
workflow="edge",
voice="zh-CN-XiaoxiaoNeural",
rate="+20%"
)
# Use ComfyUI workflow
audio_path = await reelforge.tts(
text="Hello",
workflow="tts_default.json"
)
# With absolute path
audio_path = await reelforge.tts(
text="Hello",
workflow="/path/to/custom_tts.json"
)
"""
# 1. Resolve workflow path or provider
workflow_or_provider = self._resolve_workflow(workflow=workflow)
# 2. Determine execution path
if workflow_or_provider in self.BUILTIN_PROVIDERS:
# Use edge-tts
return await self._call_edge_tts(
text=text,
voice=voice,
rate=rate,
volume=volume,
pitch=pitch,
**params
)
else:
# Use ComfyUI workflow
return await self._call_comfyui_workflow(
workflow_path=workflow_or_provider,
text=text,
comfyui_url=comfyui_url,
runninghub_api_key=runninghub_api_key,
voice=voice,
rate=rate,
volume=volume,
pitch=pitch,
**params
)
async def _call_edge_tts(
self,
text: str,
voice: Optional[str] = None,
rate: Optional[str] = None,
volume: Optional[str] = None,
pitch: Optional[str] = None,
**params
) -> str:
"""
Generate speech using edge-tts SDK
Args:
text: Text to convert to speech
voice: Voice ID (default: zh-CN-YunjianNeural)
rate: Speech rate (default: +0%)
volume: Speech volume (default: +0%)
pitch: Speech pitch (default: +0Hz)
**params: Additional parameters (e.g., retry_count, retry_delay)
Returns:
Generated audio file path
"""
from reelforge.utils.tts_util import edge_tts
logger.info(f"🎙️ Using edge-tts (local SDK)")
# Generate temp file path
output_path = get_temp_path(f"{uuid.uuid4().hex}.mp3")
# Call edge-tts with output_path to save directly
try:
audio_bytes = await edge_tts(
text=text,
voice=voice or "zh-CN-YunjianNeural",
rate=rate or "+0%",
volume=volume or "+0%",
pitch=pitch or "+0Hz",
output_path=output_path,
**params
)
logger.info(f"✅ Generated audio (edge-tts): {output_path}")
return output_path
except Exception as e:
logger.error(f"Edge TTS generation error: {e}")
raise
async def _call_comfyui_workflow(
self,
workflow_path: str,
text: str,
comfyui_url: Optional[str] = None,
runninghub_api_key: Optional[str] = None,
voice: Optional[str] = None,
rate: Optional[str] = None,
volume: Optional[str] = None,
pitch: Optional[str] = None,
**params
) -> str:
"""
Generate speech using ComfyUI workflow
Args:
workflow_path: Path to workflow file
text: Text to convert to speech
comfyui_url: ComfyUI URL
runninghub_api_key: RunningHub API key
voice: Voice ID (workflow-specific)
rate: Speech rate (workflow-specific)
volume: Speech volume (workflow-specific)
pitch: Speech pitch (workflow-specific)
**params: Additional workflow parameters
Returns:
Generated audio file path/URL
"""
logger.info(f"🎙️ Using ComfyUI workflow: {workflow_path}")
# 1. Prepare ComfyKit config
kit_config = self._prepare_comfykit_config(
comfyui_url=comfyui_url,
runninghub_api_key=runninghub_api_key
)
# 2. Build workflow parameters
workflow_params = {"text": text}
# Add optional TTS parameters
if voice is not None:
workflow_params["voice"] = voice
if rate is not None:
workflow_params["rate"] = rate
if volume is not None:
workflow_params["volume"] = volume
if pitch is not None:
workflow_params["pitch"] = pitch
# Add any additional parameters
workflow_params.update(params)
logger.debug(f"Workflow parameters: {workflow_params}")
# 3. Execute workflow
try:
kit = ComfyKit(**kit_config)
logger.info(f"Executing TTS workflow: {workflow_path}")
result = await kit.execute(workflow_path, workflow_params)
# 4. Handle result
if result.status != "completed":
error_msg = result.msg or "Unknown error"
logger.error(f"TTS generation failed: {error_msg}")
raise Exception(f"TTS generation failed: {error_msg}")
# ComfyKit result can have audio files in different output types
# Try to get audio file path from result
audio_path = None
# Check for audio files in result.audios (if available)
if hasattr(result, 'audios') and result.audios:
audio_path = result.audios[0]
# Check for files in result.files
elif hasattr(result, 'files') and result.files:
audio_path = result.files[0]
# Check in outputs dictionary
elif hasattr(result, 'outputs') and result.outputs:
# Try to find audio file in outputs
for key, value in result.outputs.items():
if isinstance(value, str) and any(value.endswith(ext) for ext in ['.mp3', '.wav', '.flac']):
audio_path = value
break
if not audio_path:
logger.error("No audio file generated")
raise Exception("No audio file generated by workflow")
logger.info(f"✅ Generated audio (ComfyUI): {audio_path}")
return audio_path
except Exception as e:
logger.error(f"TTS generation error: {e}")
raise