完善fastapi接口

2025-11-05 19:46:47 +08:00
parent eee604d8e9
commit 15899afb6f
11 changed files with 595 additions and 56 deletions
--- a/api/app.py
+++ b/api/app.py
@@ -30,6 +30,8 @@ from api.routers import (
    video_router,
    tasks_router,
    files_router,
+    resources_router,
+    frame_router,
 )


@@ -107,6 +109,8 @@ app.include_router(content_router, prefix=api_config.api_prefix)
 app.include_router(video_router, prefix=api_config.api_prefix)
 app.include_router(tasks_router, prefix=api_config.api_prefix)
 app.include_router(files_router, prefix=api_config.api_prefix)
+app.include_router(resources_router, prefix=api_config.api_prefix)
+app.include_router(frame_router, prefix=api_config.api_prefix)


@app.get("/")
@@ -124,6 +128,9 @@ async def root():
            "content": f"{api_config.api_prefix}/content",
            "video": f"{api_config.api_prefix}/video",
            "tasks": f"{api_config.api_prefix}/tasks",
+            "files": f"{api_config.api_prefix}/files",
+            "resources": f"{api_config.api_prefix}/resources",
+            "frame": f"{api_config.api_prefix}/frame",
        }
    }

--- a/api/routers/init.py
+++ b/api/routers/init.py
@@ -10,6 +10,8 @@ from api.routers.content import router as content_router
 from api.routers.video import router as video_router
 from api.routers.tasks import router as tasks_router
 from api.routers.files import router as files_router
+from api.routers.resources import router as resources_router
+from api.routers.frame import router as frame_router

 __all__ = [
    "health_router",
@@ -20,5 +22,7 @@ __all__ = [
    "video_router",
    "tasks_router",
    "files_router",
+    "resources_router",
+    "frame_router",
 ]

--- a/api/routers/files.py
+++ b/api/routers/files.py
@@ -1,7 +1,7 @@
 """
 File service endpoints

-Provides access to generated files (videos, images, audio).
+Provides access to generated files (videos, images, audio) and resource files.
 """

 from pathlib import Path
@@ -17,15 +17,49 @@ async def get_file(file_path: str):
    """
    Get file by path
    
-    Serves files from the output directory only.
+    Serves files from allowed directories:
+    - output/ - Generated files (videos, images, audio)
+    - workflows/ - ComfyUI workflow files
+    - templates/ - HTML templates
+    - bgm/ - Background music
+    - data/bgm/ - Custom background music
+    - data/templates/ - Custom templates
+    - resources/ - Other resources (images, fonts, etc.)
    
-    - **file_path**: File name or path (e.g., "abc123.mp4" or "subfolder/abc123.mp4")
+    - **file_path**: File path relative to allowed directories
+    
+    Examples:
+    - "abc123.mp4" → output/abc123.mp4
+    - "workflows/runninghub/image_flux.json" → workflows/runninghub/image_flux.json
+    - "templates/1080x1920/default.html" → templates/1080x1920/default.html
+    - "bgm/default.mp3" → bgm/default.mp3
+    - "resources/example.png" → resources/example.png
    
    Returns file for download or preview.
    """
    try:
-        # Automatically prepend "output/" to the path
-        full_path = f"output/{file_path}"
+        # Define allowed directories (in priority order)
+        allowed_prefixes = [
+            "output/",
+            "workflows/",
+            "templates/",
+            "bgm/",
+            "data/bgm/",
+            "data/templates/",
+            "resources/",
+        ]
+        
+        # Check if path starts with allowed prefix, otherwise try output/
+        full_path = None
+        for prefix in allowed_prefixes:
+            if file_path.startswith(prefix):
+                full_path = file_path
+                break
+        
+        # If no prefix matched, assume it's in output/ (backward compatibility)
+        if full_path is None:
+            full_path = f"output/{file_path}"
+        
        abs_path = Path.cwd() / full_path
        
        if not abs_path.exists():
@@ -34,11 +68,19 @@ async def get_file(file_path: str):
        if not abs_path.is_file():
            raise HTTPException(status_code=400, detail=f"Path is not a file: {file_path}")
        
-        # Security: only allow access to output directory
+        # Security: only allow access to specified directories
        try:
            rel_path = abs_path.relative_to(Path.cwd())
-            if not str(rel_path).startswith("output"):
-                raise HTTPException(status_code=403, detail="Access denied: only output directory is accessible")
+            rel_path_str = str(rel_path)
+            
+            # Check if path starts with any allowed prefix
+            is_allowed = any(rel_path_str.startswith(prefix.rstrip('/')) for prefix in allowed_prefixes)
+            
+            if not is_allowed:
+                raise HTTPException(
+                    status_code=403, 
+                    detail=f"Access denied: only {', '.join(p.rstrip('/') for p in allowed_prefixes)} directories are accessible"
+                )
        except ValueError:
            raise HTTPException(status_code=403, detail="Access denied")
        
@@ -52,6 +94,8 @@ async def get_file(file_path: str):
            '.jpg': 'image/jpeg',
            '.jpeg': 'image/jpeg',
            '.gif': 'image/gif',
+            '.html': 'text/html',
+            '.json': 'application/json',
        }
        media_type = media_types.get(suffix, 'application/octet-stream')
        
--- a/api/routers/frame.py
+++ b/api/routers/frame.py
@@ -0,0 +1,73 @@
+"""
+Frame/Template rendering endpoints
+"""
+
+from fastapi import APIRouter, HTTPException
+from loguru import logger
+
+from api.dependencies import PixelleVideoDep
+from api.schemas.frame import FrameRenderRequest, FrameRenderResponse
+from pixelle_video.services.frame_html import HTMLFrameGenerator
+from pixelle_video.utils.template_util import parse_template_size, resolve_template_path
+
+router = APIRouter(prefix="/frame", tags=["Frame Rendering"])
+
+
+@router.post("/render", response_model=FrameRenderResponse)
+async def render_frame(
+    request: FrameRenderRequest,
+    pixelle_video: PixelleVideoDep
+):
+    """
+    Render a single frame using HTML template
+    
+    Generates a frame image by combining template, title, text, and image.
+    This is useful for previewing templates or generating custom frames.
+    
+    - **template**: Template key (e.g., '1080x1920/default.html')
+    - **title**: Optional title text
+    - **text**: Frame text content
+    - **image**: Image path (can be local path or URL)
+    
+    Returns path to generated frame image.
+    
+    Example:
+    ```json
+    {
+        "template": "1080x1920/modern.html",
+        "title": "Welcome",
+        "text": "This is a beautiful frame with custom styling",
+        "image": "resources/example.png"
+    }
+    ```
+    """
+    try:
+        logger.info(f"Frame render request: template={request.template}")
+        
+        # Resolve template path (handles both "default.html" and "1080x1920/default.html")
+        template_path = resolve_template_path(request.template)
+        full_template_path = f"templates/{template_path}"
+        
+        # Parse template size
+        width, height = parse_template_size(full_template_path)
+        
+        # Create HTML frame generator
+        generator = HTMLFrameGenerator(full_template_path)
+        
+        # Generate frame
+        frame_path = await generator.generate_frame(
+            title=request.title,
+            text=request.text,
+            image=request.image
+        )
+        
+        return FrameRenderResponse(
+            frame_path=frame_path,
+            width=width,
+            height=height
+        )
+        
+    except Exception as e:
+        logger.error(f"Frame render error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
--- a/api/routers/resources.py
+++ b/api/routers/resources.py
@@ -0,0 +1,229 @@
+"""
+Resource discovery endpoints
+
+Provides endpoints to discover available workflows, templates, and BGM.
+"""
+
+from pathlib import Path
+from fastapi import APIRouter, HTTPException
+from loguru import logger
+
+from api.dependencies import PixelleVideoDep
+from api.schemas.resources import (
+    WorkflowInfo,
+    WorkflowListResponse,
+    TemplateInfo,
+    TemplateListResponse,
+    BGMInfo,
+    BGMListResponse,
+)
+from pixelle_video.utils.os_util import list_resource_files, get_root_path, get_data_path
+from pixelle_video.utils.template_util import get_all_templates_with_info
+
+router = APIRouter(prefix="/resources", tags=["Resources"])
+
+
+@router.get("/workflows/tts", response_model=WorkflowListResponse)
+async def list_tts_workflows(pixelle_video: PixelleVideoDep):
+    """
+    List available TTS workflows
+    
+    Returns list of TTS workflows from both RunningHub and self-hosted sources.
+    
+    Example response:
+    ```json
+    {
+        "workflows": [
+            {
+                "name": "tts_edge.json",
+                "display_name": "tts_edge.json - Runninghub",
+                "source": "runninghub",
+                "path": "workflows/runninghub/tts_edge.json",
+                "key": "runninghub/tts_edge.json",
+                "workflow_id": "123456"
+            }
+        ]
+    }
+    ```
+    """
+    try:
+        # Get all workflows from TTS service
+        all_workflows = pixelle_video.tts.list_workflows()
+        
+        # Filter to TTS workflows only (filename starts with "tts_")
+        tts_workflows = [
+            WorkflowInfo(**wf) 
+            for wf in all_workflows 
+            if wf["name"].startswith("tts_")
+        ]
+        
+        return WorkflowListResponse(workflows=tts_workflows)
+        
+    except Exception as e:
+        logger.error(f"List TTS workflows error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/workflows/image", response_model=WorkflowListResponse)
+async def list_image_workflows(pixelle_video: PixelleVideoDep):
+    """
+    List available image generation workflows
+    
+    Returns list of image workflows from both RunningHub and self-hosted sources.
+    
+    Example response:
+    ```json
+    {
+        "workflows": [
+            {
+                "name": "image_flux.json",
+                "display_name": "image_flux.json - Runninghub",
+                "source": "runninghub",
+                "path": "workflows/runninghub/image_flux.json",
+                "key": "runninghub/image_flux.json",
+                "workflow_id": "123456"
+            }
+        ]
+    }
+    ```
+    """
+    try:
+        # Get all workflows from image service
+        all_workflows = pixelle_video.image.list_workflows()
+        
+        # Filter to image workflows only (filename starts with "image_")
+        image_workflows = [
+            WorkflowInfo(**wf) 
+            for wf in all_workflows 
+            if wf["name"].startswith("image_")
+        ]
+        
+        return WorkflowListResponse(workflows=image_workflows)
+        
+    except Exception as e:
+        logger.error(f"List image workflows error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/templates", response_model=TemplateListResponse)
+async def list_templates():
+    """
+    List available video templates
+    
+    Returns list of HTML templates grouped by size (portrait, landscape, square).
+    Templates are merged from both default (templates/) and custom (data/templates/) directories.
+    
+    Example response:
+    ```json
+    {
+        "templates": [
+            {
+                "name": "default.html",
+                "display_name": "default.html",
+                "size": "1080x1920",
+                "width": 1080,
+                "height": 1920,
+                "orientation": "portrait",
+                "path": "templates/1080x1920/default.html",
+                "key": "1080x1920/default.html"
+            }
+        ]
+    }
+    ```
+    """
+    try:
+        # Get all templates with info
+        all_templates = get_all_templates_with_info()
+        
+        # Convert to API response format
+        templates = []
+        for t in all_templates:
+            templates.append(TemplateInfo(
+                name=t.display_info.name,
+                display_name=t.display_info.name,
+                size=t.display_info.size,
+                width=t.display_info.width,
+                height=t.display_info.height,
+                orientation=t.display_info.orientation,
+                path=t.template_path,
+                key=t.template_path
+            ))
+        
+        return TemplateListResponse(templates=templates)
+        
+    except Exception as e:
+        logger.error(f"List templates error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/bgm", response_model=BGMListResponse)
+async def list_bgm():
+    """
+    List available background music files
+    
+    Returns list of BGM files merged from both default (bgm/) and custom (data/bgm/) directories.
+    Custom files take precedence over default files with the same name.
+    
+    Supported formats: mp3, wav, flac, m4a, aac, ogg
+    
+    Example response:
+    ```json
+    {
+        "bgm_files": [
+            {
+                "name": "default.mp3",
+                "path": "bgm/default.mp3",
+                "source": "default"
+            },
+            {
+                "name": "happy.mp3",
+                "path": "data/bgm/happy.mp3",
+                "source": "custom"
+            }
+        ]
+    }
+    ```
+    """
+    try:
+        # Supported audio extensions
+        audio_extensions = ('.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg')
+        
+        # Collect BGM files from both locations
+        bgm_files_dict = {}  # {filename: {"path": str, "source": str}}
+        
+        # Scan default bgm/ directory
+        default_bgm_dir = Path(get_root_path("bgm"))
+        if default_bgm_dir.exists() and default_bgm_dir.is_dir():
+            for item in default_bgm_dir.iterdir():
+                if item.is_file() and item.suffix.lower() in audio_extensions:
+                    bgm_files_dict[item.name] = {
+                        "path": f"bgm/{item.name}",
+                        "source": "default"
+                    }
+        
+        # Scan custom data/bgm/ directory (overrides default)
+        custom_bgm_dir = Path(get_data_path("bgm"))
+        if custom_bgm_dir.exists() and custom_bgm_dir.is_dir():
+            for item in custom_bgm_dir.iterdir():
+                if item.is_file() and item.suffix.lower() in audio_extensions:
+                    bgm_files_dict[item.name] = {
+                        "path": f"data/bgm/{item.name}",
+                        "source": "custom"
+                    }
+        
+        # Convert to response format
+        bgm_files = [
+            BGMInfo(
+                name=name,
+                path=info["path"],
+                source=info["source"]
+            )
+            for name, info in sorted(bgm_files_dict.items())
+        ]
+        
+        return BGMListResponse(bgm_files=bgm_files)
+        
+    except Exception as e:
+        logger.error(f"List BGM error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
--- a/api/routers/tts.py
+++ b/api/routers/tts.py
@@ -20,21 +20,53 @@ async def tts_synthesize(
    """
    Text-to-Speech synthesis endpoint
    
-    Convert text to speech audio.
+    Convert text to speech audio using ComfyUI workflows.
    
    - **text**: Text to synthesize
-    - **voice_id**: Voice ID (e.g., '[Chinese] zh-CN Yunjian', '[English] en-US Aria')
+    - **workflow**: TTS workflow key (optional, uses default if not specified)
+    - **ref_audio**: Reference audio for voice cloning (optional)
+    - **voice_id**: (Deprecated) Voice ID for legacy compatibility
    
    Returns path to generated audio file and duration.
+    
+    Examples:
+    ```json
+    {
+        "text": "Hello, welcome to Pixelle-Video!",
+        "workflow": "runninghub/tts_edge.json"
+    }
+    ```
+    
+    With voice cloning:
+    ```json
+    {
+        "text": "Hello, this is a cloned voice",
+        "workflow": "runninghub/tts_index2.json",
+        "ref_audio": "path/to/reference.wav"
+    }
+    ```
    """
    try:
        logger.info(f"TTS synthesis request: {request.text[:50]}...")
        
+        # Build TTS parameters
+        tts_params = {"text": request.text}
+        
+        # Add workflow if specified
+        if request.workflow:
+            tts_params["workflow"] = request.workflow
+        
+        # Add ref_audio if specified
+        if request.ref_audio:
+            tts_params["ref_audio"] = request.ref_audio
+        
+        # Legacy voice_id support (deprecated)
+        if request.voice_id and not request.workflow:
+            logger.warning("voice_id parameter is deprecated, please use workflow instead")
+            tts_params["voice"] = request.voice_id
+        
        # Call TTS service
-        audio_path = await pixelle_video.tts(
-            text=request.text,
-            voice=request.voice_id
-        )
+        audio_path = await pixelle_video.tts(**tts_params)
        
        # Get audio duration
        duration = get_audio_duration(audio_path)
--- a/api/routers/video.py
+++ b/api/routers/video.py
@@ -51,26 +51,41 @@ async def generate_video_sync(
    try:
        logger.info(f"Sync video generation: {request_body.text[:50]}...")
        
+        # Build video generation parameters
+        video_params = {
+            "text": request_body.text,
+            "mode": request_body.mode,
+            "title": request_body.title,
+            "n_scenes": request_body.n_scenes,
+            "min_narration_words": request_body.min_narration_words,
+            "max_narration_words": request_body.max_narration_words,
+            "min_image_prompt_words": request_body.min_image_prompt_words,
+            "max_image_prompt_words": request_body.max_image_prompt_words,
+            "image_width": request_body.image_width,
+            "image_height": request_body.image_height,
+            "image_workflow": request_body.image_workflow,
+            "video_fps": request_body.video_fps,
+            "frame_template": request_body.frame_template,
+            "prompt_prefix": request_body.prompt_prefix,
+            "bgm_path": request_body.bgm_path,
+            "bgm_volume": request_body.bgm_volume,
+        }
+        
+        # Add TTS workflow if specified
+        if request_body.tts_workflow:
+            video_params["tts_workflow"] = request_body.tts_workflow
+        
+        # Add ref_audio if specified
+        if request_body.ref_audio:
+            video_params["ref_audio"] = request_body.ref_audio
+        
+        # Legacy voice_id support (deprecated)
+        if request_body.voice_id:
+            logger.warning("voice_id parameter is deprecated, please use tts_workflow instead")
+            video_params["voice_id"] = request_body.voice_id
+        
        # Call video generator service
-        result = await pixelle_video.generate_video(
-            text=request_body.text,
-            mode=request_body.mode,
-            title=request_body.title,
-            n_scenes=request_body.n_scenes,
-            voice_id=request_body.voice_id,
-            min_narration_words=request_body.min_narration_words,
-            max_narration_words=request_body.max_narration_words,
-            min_image_prompt_words=request_body.min_image_prompt_words,
-            max_image_prompt_words=request_body.max_image_prompt_words,
-            image_width=request_body.image_width,
-            image_height=request_body.image_height,
-            image_workflow=request_body.image_workflow,
-            video_fps=request_body.video_fps,
-            frame_template=request_body.frame_template,
-            prompt_prefix=request_body.prompt_prefix,
-            bgm_path=request_body.bgm_path,
-            bgm_volume=request_body.bgm_volume,
-        )
+        result = await pixelle_video.generate_video(**video_params)
        
        # Get file size
        file_size = os.path.getsize(result.video_path) if os.path.exists(result.video_path) else 0
@@ -124,27 +139,42 @@ async def generate_video_async(
        # Define async execution function
        async def execute_video_generation():
            """Execute video generation in background"""
-            result = await pixelle_video.generate_video(
-                text=request_body.text,
-                mode=request_body.mode,
-                title=request_body.title,
-                n_scenes=request_body.n_scenes,
-                voice_id=request_body.voice_id,
-                min_narration_words=request_body.min_narration_words,
-                max_narration_words=request_body.max_narration_words,
-                min_image_prompt_words=request_body.min_image_prompt_words,
-                max_image_prompt_words=request_body.max_image_prompt_words,
-                image_width=request_body.image_width,
-                image_height=request_body.image_height,
-                image_workflow=request_body.image_workflow,
-                video_fps=request_body.video_fps,
-                frame_template=request_body.frame_template,
-                prompt_prefix=request_body.prompt_prefix,
-                bgm_path=request_body.bgm_path,
-                bgm_volume=request_body.bgm_volume,
+            # Build video generation parameters
+            video_params = {
+                "text": request_body.text,
+                "mode": request_body.mode,
+                "title": request_body.title,
+                "n_scenes": request_body.n_scenes,
+                "min_narration_words": request_body.min_narration_words,
+                "max_narration_words": request_body.max_narration_words,
+                "min_image_prompt_words": request_body.min_image_prompt_words,
+                "max_image_prompt_words": request_body.max_image_prompt_words,
+                "image_width": request_body.image_width,
+                "image_height": request_body.image_height,
+                "image_workflow": request_body.image_workflow,
+                "video_fps": request_body.video_fps,
+                "frame_template": request_body.frame_template,
+                "prompt_prefix": request_body.prompt_prefix,
+                "bgm_path": request_body.bgm_path,
+                "bgm_volume": request_body.bgm_volume,
                # Progress callback can be added here if needed
-                # progress_callback=lambda event: task_manager.update_progress(...)
-            )
+                # "progress_callback": lambda event: task_manager.update_progress(...)
+            }
+            
+            # Add TTS workflow if specified
+            if request_body.tts_workflow:
+                video_params["tts_workflow"] = request_body.tts_workflow
+            
+            # Add ref_audio if specified
+            if request_body.ref_audio:
+                video_params["ref_audio"] = request_body.ref_audio
+            
+            # Legacy voice_id support (deprecated)
+            if request_body.voice_id:
+                logger.warning("voice_id parameter is deprecated, please use tts_workflow instead")
+                video_params["voice_id"] = request_body.voice_id
+            
+            result = await pixelle_video.generate_video(**video_params)
            
            # Get file size
            file_size = os.path.getsize(result.video_path) if os.path.exists(result.video_path) else 0
--- a/api/schemas/frame.py
+++ b/api/schemas/frame.py
@@ -0,0 +1,37 @@
+"""
+Frame/Template rendering API schemas
+"""
+
+from typing import Optional
+from pydantic import BaseModel, Field
+
+
+class FrameRenderRequest(BaseModel):
+    """Frame rendering request"""
+    template: str = Field(
+        ..., 
+        description="Template key (e.g., '1080x1920/default.html'). Can also be just filename (e.g., 'default.html') to use default size."
+    )
+    title: Optional[str] = Field(None, description="Frame title (optional)")
+    text: str = Field(..., description="Frame text content")
+    image: str = Field(..., description="Image path or URL")
+    
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "template": "1080x1920/default.html",
+                "title": "Sample Title",
+                "text": "This is a sample text for the frame.",
+                "image": "resources/example.png"
+            }
+        }
+
+
+class FrameRenderResponse(BaseModel):
+    """Frame rendering response"""
+    success: bool = True
+    message: str = "Success"
+    frame_path: str = Field(..., description="Path to generated frame image")
+    width: int = Field(..., description="Frame width in pixels")
+    height: int = Field(..., description="Frame height in pixels")
+
--- a/api/schemas/resources.py
+++ b/api/schemas/resources.py
@@ -0,0 +1,57 @@
+"""
+Resource discovery API schemas
+"""
+
+from typing import List, Optional
+from pydantic import BaseModel, Field
+
+
+class WorkflowInfo(BaseModel):
+    """Workflow information"""
+    name: str = Field(..., description="Workflow filename")
+    display_name: str = Field(..., description="Display name with source info")
+    source: str = Field(..., description="Source (runninghub or selfhost)")
+    path: str = Field(..., description="Full path to workflow file")
+    key: str = Field(..., description="Workflow key (source/name)")
+    workflow_id: Optional[str] = Field(None, description="RunningHub workflow ID (if applicable)")
+
+
+class WorkflowListResponse(BaseModel):
+    """Workflow list response"""
+    success: bool = True
+    message: str = "Success"
+    workflows: List[WorkflowInfo] = Field(..., description="List of available workflows")
+
+
+class TemplateInfo(BaseModel):
+    """Template information"""
+    name: str = Field(..., description="Template filename")
+    display_name: str = Field(..., description="Display name")
+    size: str = Field(..., description="Size (e.g., 1080x1920)")
+    width: int = Field(..., description="Width in pixels")
+    height: int = Field(..., description="Height in pixels")
+    orientation: str = Field(..., description="Orientation (portrait/landscape/square)")
+    path: str = Field(..., description="Full path to template file")
+    key: str = Field(..., description="Template key (size/name)")
+
+
+class TemplateListResponse(BaseModel):
+    """Template list response"""
+    success: bool = True
+    message: str = "Success"
+    templates: List[TemplateInfo] = Field(..., description="List of available templates")
+
+
+class BGMInfo(BaseModel):
+    """BGM information"""
+    name: str = Field(..., description="BGM filename")
+    path: str = Field(..., description="Full path to BGM file")
+    source: str = Field(..., description="Source (default or custom)")
+
+
+class BGMListResponse(BaseModel):
+    """BGM list response"""
+    success: bool = True
+    message: str = "Success"
+    bgm_files: List[BGMInfo] = Field(..., description="List of available BGM files")
+
--- a/api/schemas/tts.py
+++ b/api/schemas/tts.py
@@ -2,19 +2,32 @@
 TTS API schemas
 """

+from typing import Optional
 from pydantic import BaseModel, Field


 class TTSSynthesizeRequest(BaseModel):
    """TTS synthesis request"""
    text: str = Field(..., description="Text to synthesize")
-    voice_id: str = Field("[Chinese] zh-CN Yunjian", description="Voice ID")
+    workflow: Optional[str] = Field(
+        None, 
+        description="TTS workflow key (e.g., 'runninghub/tts_edge.json' or 'selfhost/tts_edge.json'). If not specified, uses default workflow from config."
+    )
+    ref_audio: Optional[str] = Field(
+        None, 
+        description="Reference audio path for voice cloning (optional). Can be a local file path or URL."
+    )
+    voice_id: Optional[str] = Field(
+        None, 
+        description="Voice ID (deprecated, use workflow instead)"
+    )
    
    class Config:
        json_schema_extra = {
            "example": {
                "text": "Hello, welcome to Pixelle-Video!",
-                "voice_id": "[Chinese] zh-CN Yunjian"
+                "workflow": "runninghub/tts_edge.json",
+                "ref_audio": None
            }
        }

--- a/api/schemas/video.py
+++ b/api/schemas/video.py
@@ -23,7 +23,20 @@ class VideoGenerateRequest(BaseModel):
    
    # === Basic Config ===
    n_scenes: int = Field(5, ge=1, le=20, description="Number of scenes (generate mode only)")
-    voice_id: str = Field("[Chinese] zh-CN Yunjian", description="TTS voice ID")
+    
+    # === TTS Parameters ===
+    tts_workflow: Optional[str] = Field(
+        None, 
+        description="TTS workflow key (e.g., 'runninghub/tts_edge.json'). If not specified, uses default workflow from config."
+    )
+    ref_audio: Optional[str] = Field(
+        None, 
+        description="Reference audio path for voice cloning (optional)"
+    )
+    voice_id: Optional[str] = Field(
+        None, 
+        description="(Deprecated) TTS voice ID for legacy compatibility"
+    )
    
    # === LLM Parameters ===
    min_narration_words: int = Field(5, ge=1, le=100, description="Min narration words")