完善fastapi接口

2025-11-05 19:46:47 +08:00
parent eee604d8e9
commit 15899afb6f
11 changed files with 595 additions and 56 deletions
--- a/api/routers/tts.py
+++ b/api/routers/tts.py
@@ -20,21 +20,53 @@ async def tts_synthesize(
    """
    Text-to-Speech synthesis endpoint
    
-    Convert text to speech audio.
+    Convert text to speech audio using ComfyUI workflows.
    
    - **text**: Text to synthesize
-    - **voice_id**: Voice ID (e.g., '[Chinese] zh-CN Yunjian', '[English] en-US Aria')
+    - **workflow**: TTS workflow key (optional, uses default if not specified)
+    - **ref_audio**: Reference audio for voice cloning (optional)
+    - **voice_id**: (Deprecated) Voice ID for legacy compatibility
    
    Returns path to generated audio file and duration.
+    
+    Examples:
+    ```json
+    {
+        "text": "Hello, welcome to Pixelle-Video!",
+        "workflow": "runninghub/tts_edge.json"
+    }
+    ```
+    
+    With voice cloning:
+    ```json
+    {
+        "text": "Hello, this is a cloned voice",
+        "workflow": "runninghub/tts_index2.json",
+        "ref_audio": "path/to/reference.wav"
+    }
+    ```
    """
    try:
        logger.info(f"TTS synthesis request: {request.text[:50]}...")
        
+        # Build TTS parameters
+        tts_params = {"text": request.text}
+        
+        # Add workflow if specified
+        if request.workflow:
+            tts_params["workflow"] = request.workflow
+        
+        # Add ref_audio if specified
+        if request.ref_audio:
+            tts_params["ref_audio"] = request.ref_audio
+        
+        # Legacy voice_id support (deprecated)
+        if request.voice_id and not request.workflow:
+            logger.warning("voice_id parameter is deprecated, please use workflow instead")
+            tts_params["voice"] = request.voice_id
+        
        # Call TTS service
-        audio_path = await pixelle_video.tts(
-            text=request.text,
-            voice=request.voice_id
-        )
+        audio_path = await pixelle_video.tts(**tts_params)
        
        # Get audio duration
        duration = get_audio_duration(audio_path)