AI-Video/api/routers/tts.py

"""
TTS (Text-to-Speech) endpoints
"""

from fastapi import APIRouter, HTTPException
from loguru import logger

from api.dependencies import PixelleVideoDep
from api.schemas.tts import TTSSynthesizeRequest, TTSSynthesizeResponse
from pixelle_video.utils.tts_util import get_audio_duration

router = APIRouter(prefix="/tts", tags=["TTS"])


@router.post("/synthesize", response_model=TTSSynthesizeResponse)
async def tts_synthesize(
    request: TTSSynthesizeRequest,
    pixelle_video: PixelleVideoDep
):
    """
    Text-to-Speech synthesis endpoint

    Convert text to speech audio.

    - **text**: Text to synthesize
    - **voice_id**: Voice ID (e.g., '[Chinese] zh-CN Yunjian', '[English] en-US Aria')

    Returns path to generated audio file and duration.
    """
    try:
        logger.info(f"TTS synthesis request: {request.text[:50]}...")

        # Call TTS service
        audio_path = await pixelle_video.tts(
            text=request.text,
            voice_id=request.voice_id
        )

        # Get audio duration
        duration = get_audio_duration(audio_path)

        return TTSSynthesizeResponse(
            audio_path=audio_path,
            duration=duration
        )

    except Exception as e:
        logger.error(f"TTS synthesis error: {e}")
        raise HTTPException(status_code=500, detail=str(e))