完善fastapi接口

This commit is contained in:
puke
2025-11-05 19:46:47 +08:00
parent eee604d8e9
commit 15899afb6f
11 changed files with 595 additions and 56 deletions

View File

@@ -30,6 +30,8 @@ from api.routers import (
video_router,
tasks_router,
files_router,
resources_router,
frame_router,
)
@@ -107,6 +109,8 @@ app.include_router(content_router, prefix=api_config.api_prefix)
app.include_router(video_router, prefix=api_config.api_prefix)
app.include_router(tasks_router, prefix=api_config.api_prefix)
app.include_router(files_router, prefix=api_config.api_prefix)
app.include_router(resources_router, prefix=api_config.api_prefix)
app.include_router(frame_router, prefix=api_config.api_prefix)
@app.get("/")
@@ -124,6 +128,9 @@ async def root():
"content": f"{api_config.api_prefix}/content",
"video": f"{api_config.api_prefix}/video",
"tasks": f"{api_config.api_prefix}/tasks",
"files": f"{api_config.api_prefix}/files",
"resources": f"{api_config.api_prefix}/resources",
"frame": f"{api_config.api_prefix}/frame",
}
}

View File

@@ -10,6 +10,8 @@ from api.routers.content import router as content_router
from api.routers.video import router as video_router
from api.routers.tasks import router as tasks_router
from api.routers.files import router as files_router
from api.routers.resources import router as resources_router
from api.routers.frame import router as frame_router
__all__ = [
"health_router",
@@ -20,5 +22,7 @@ __all__ = [
"video_router",
"tasks_router",
"files_router",
"resources_router",
"frame_router",
]

View File

@@ -1,7 +1,7 @@
"""
File service endpoints
Provides access to generated files (videos, images, audio).
Provides access to generated files (videos, images, audio) and resource files.
"""
from pathlib import Path
@@ -17,15 +17,49 @@ async def get_file(file_path: str):
"""
Get file by path
Serves files from the output directory only.
Serves files from allowed directories:
- output/ - Generated files (videos, images, audio)
- workflows/ - ComfyUI workflow files
- templates/ - HTML templates
- bgm/ - Background music
- data/bgm/ - Custom background music
- data/templates/ - Custom templates
- resources/ - Other resources (images, fonts, etc.)
- **file_path**: File name or path (e.g., "abc123.mp4" or "subfolder/abc123.mp4")
- **file_path**: File path relative to allowed directories
Examples:
- "abc123.mp4" → output/abc123.mp4
- "workflows/runninghub/image_flux.json" → workflows/runninghub/image_flux.json
- "templates/1080x1920/default.html" → templates/1080x1920/default.html
- "bgm/default.mp3" → bgm/default.mp3
- "resources/example.png" → resources/example.png
Returns file for download or preview.
"""
try:
# Automatically prepend "output/" to the path
full_path = f"output/{file_path}"
# Define allowed directories (in priority order)
allowed_prefixes = [
"output/",
"workflows/",
"templates/",
"bgm/",
"data/bgm/",
"data/templates/",
"resources/",
]
# Check if path starts with allowed prefix, otherwise try output/
full_path = None
for prefix in allowed_prefixes:
if file_path.startswith(prefix):
full_path = file_path
break
# If no prefix matched, assume it's in output/ (backward compatibility)
if full_path is None:
full_path = f"output/{file_path}"
abs_path = Path.cwd() / full_path
if not abs_path.exists():
@@ -34,11 +68,19 @@ async def get_file(file_path: str):
if not abs_path.is_file():
raise HTTPException(status_code=400, detail=f"Path is not a file: {file_path}")
# Security: only allow access to output directory
# Security: only allow access to specified directories
try:
rel_path = abs_path.relative_to(Path.cwd())
if not str(rel_path).startswith("output"):
raise HTTPException(status_code=403, detail="Access denied: only output directory is accessible")
rel_path_str = str(rel_path)
# Check if path starts with any allowed prefix
is_allowed = any(rel_path_str.startswith(prefix.rstrip('/')) for prefix in allowed_prefixes)
if not is_allowed:
raise HTTPException(
status_code=403,
detail=f"Access denied: only {', '.join(p.rstrip('/') for p in allowed_prefixes)} directories are accessible"
)
except ValueError:
raise HTTPException(status_code=403, detail="Access denied")
@@ -52,6 +94,8 @@ async def get_file(file_path: str):
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.gif': 'image/gif',
'.html': 'text/html',
'.json': 'application/json',
}
media_type = media_types.get(suffix, 'application/octet-stream')

73
api/routers/frame.py Normal file
View File

@@ -0,0 +1,73 @@
"""
Frame/Template rendering endpoints
"""
from fastapi import APIRouter, HTTPException
from loguru import logger
from api.dependencies import PixelleVideoDep
from api.schemas.frame import FrameRenderRequest, FrameRenderResponse
from pixelle_video.services.frame_html import HTMLFrameGenerator
from pixelle_video.utils.template_util import parse_template_size, resolve_template_path
router = APIRouter(prefix="/frame", tags=["Frame Rendering"])
@router.post("/render", response_model=FrameRenderResponse)
async def render_frame(
request: FrameRenderRequest,
pixelle_video: PixelleVideoDep
):
"""
Render a single frame using HTML template
Generates a frame image by combining template, title, text, and image.
This is useful for previewing templates or generating custom frames.
- **template**: Template key (e.g., '1080x1920/default.html')
- **title**: Optional title text
- **text**: Frame text content
- **image**: Image path (can be local path or URL)
Returns path to generated frame image.
Example:
```json
{
"template": "1080x1920/modern.html",
"title": "Welcome",
"text": "This is a beautiful frame with custom styling",
"image": "resources/example.png"
}
```
"""
try:
logger.info(f"Frame render request: template={request.template}")
# Resolve template path (handles both "default.html" and "1080x1920/default.html")
template_path = resolve_template_path(request.template)
full_template_path = f"templates/{template_path}"
# Parse template size
width, height = parse_template_size(full_template_path)
# Create HTML frame generator
generator = HTMLFrameGenerator(full_template_path)
# Generate frame
frame_path = await generator.generate_frame(
title=request.title,
text=request.text,
image=request.image
)
return FrameRenderResponse(
frame_path=frame_path,
width=width,
height=height
)
except Exception as e:
logger.error(f"Frame render error: {e}")
raise HTTPException(status_code=500, detail=str(e))

229
api/routers/resources.py Normal file
View File

@@ -0,0 +1,229 @@
"""
Resource discovery endpoints
Provides endpoints to discover available workflows, templates, and BGM.
"""
from pathlib import Path
from fastapi import APIRouter, HTTPException
from loguru import logger
from api.dependencies import PixelleVideoDep
from api.schemas.resources import (
WorkflowInfo,
WorkflowListResponse,
TemplateInfo,
TemplateListResponse,
BGMInfo,
BGMListResponse,
)
from pixelle_video.utils.os_util import list_resource_files, get_root_path, get_data_path
from pixelle_video.utils.template_util import get_all_templates_with_info
router = APIRouter(prefix="/resources", tags=["Resources"])
@router.get("/workflows/tts", response_model=WorkflowListResponse)
async def list_tts_workflows(pixelle_video: PixelleVideoDep):
"""
List available TTS workflows
Returns list of TTS workflows from both RunningHub and self-hosted sources.
Example response:
```json
{
"workflows": [
{
"name": "tts_edge.json",
"display_name": "tts_edge.json - Runninghub",
"source": "runninghub",
"path": "workflows/runninghub/tts_edge.json",
"key": "runninghub/tts_edge.json",
"workflow_id": "123456"
}
]
}
```
"""
try:
# Get all workflows from TTS service
all_workflows = pixelle_video.tts.list_workflows()
# Filter to TTS workflows only (filename starts with "tts_")
tts_workflows = [
WorkflowInfo(**wf)
for wf in all_workflows
if wf["name"].startswith("tts_")
]
return WorkflowListResponse(workflows=tts_workflows)
except Exception as e:
logger.error(f"List TTS workflows error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/workflows/image", response_model=WorkflowListResponse)
async def list_image_workflows(pixelle_video: PixelleVideoDep):
"""
List available image generation workflows
Returns list of image workflows from both RunningHub and self-hosted sources.
Example response:
```json
{
"workflows": [
{
"name": "image_flux.json",
"display_name": "image_flux.json - Runninghub",
"source": "runninghub",
"path": "workflows/runninghub/image_flux.json",
"key": "runninghub/image_flux.json",
"workflow_id": "123456"
}
]
}
```
"""
try:
# Get all workflows from image service
all_workflows = pixelle_video.image.list_workflows()
# Filter to image workflows only (filename starts with "image_")
image_workflows = [
WorkflowInfo(**wf)
for wf in all_workflows
if wf["name"].startswith("image_")
]
return WorkflowListResponse(workflows=image_workflows)
except Exception as e:
logger.error(f"List image workflows error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/templates", response_model=TemplateListResponse)
async def list_templates():
"""
List available video templates
Returns list of HTML templates grouped by size (portrait, landscape, square).
Templates are merged from both default (templates/) and custom (data/templates/) directories.
Example response:
```json
{
"templates": [
{
"name": "default.html",
"display_name": "default.html",
"size": "1080x1920",
"width": 1080,
"height": 1920,
"orientation": "portrait",
"path": "templates/1080x1920/default.html",
"key": "1080x1920/default.html"
}
]
}
```
"""
try:
# Get all templates with info
all_templates = get_all_templates_with_info()
# Convert to API response format
templates = []
for t in all_templates:
templates.append(TemplateInfo(
name=t.display_info.name,
display_name=t.display_info.name,
size=t.display_info.size,
width=t.display_info.width,
height=t.display_info.height,
orientation=t.display_info.orientation,
path=t.template_path,
key=t.template_path
))
return TemplateListResponse(templates=templates)
except Exception as e:
logger.error(f"List templates error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/bgm", response_model=BGMListResponse)
async def list_bgm():
"""
List available background music files
Returns list of BGM files merged from both default (bgm/) and custom (data/bgm/) directories.
Custom files take precedence over default files with the same name.
Supported formats: mp3, wav, flac, m4a, aac, ogg
Example response:
```json
{
"bgm_files": [
{
"name": "default.mp3",
"path": "bgm/default.mp3",
"source": "default"
},
{
"name": "happy.mp3",
"path": "data/bgm/happy.mp3",
"source": "custom"
}
]
}
```
"""
try:
# Supported audio extensions
audio_extensions = ('.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg')
# Collect BGM files from both locations
bgm_files_dict = {} # {filename: {"path": str, "source": str}}
# Scan default bgm/ directory
default_bgm_dir = Path(get_root_path("bgm"))
if default_bgm_dir.exists() and default_bgm_dir.is_dir():
for item in default_bgm_dir.iterdir():
if item.is_file() and item.suffix.lower() in audio_extensions:
bgm_files_dict[item.name] = {
"path": f"bgm/{item.name}",
"source": "default"
}
# Scan custom data/bgm/ directory (overrides default)
custom_bgm_dir = Path(get_data_path("bgm"))
if custom_bgm_dir.exists() and custom_bgm_dir.is_dir():
for item in custom_bgm_dir.iterdir():
if item.is_file() and item.suffix.lower() in audio_extensions:
bgm_files_dict[item.name] = {
"path": f"data/bgm/{item.name}",
"source": "custom"
}
# Convert to response format
bgm_files = [
BGMInfo(
name=name,
path=info["path"],
source=info["source"]
)
for name, info in sorted(bgm_files_dict.items())
]
return BGMListResponse(bgm_files=bgm_files)
except Exception as e:
logger.error(f"List BGM error: {e}")
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -20,21 +20,53 @@ async def tts_synthesize(
"""
Text-to-Speech synthesis endpoint
Convert text to speech audio.
Convert text to speech audio using ComfyUI workflows.
- **text**: Text to synthesize
- **voice_id**: Voice ID (e.g., '[Chinese] zh-CN Yunjian', '[English] en-US Aria')
- **workflow**: TTS workflow key (optional, uses default if not specified)
- **ref_audio**: Reference audio for voice cloning (optional)
- **voice_id**: (Deprecated) Voice ID for legacy compatibility
Returns path to generated audio file and duration.
Examples:
```json
{
"text": "Hello, welcome to Pixelle-Video!",
"workflow": "runninghub/tts_edge.json"
}
```
With voice cloning:
```json
{
"text": "Hello, this is a cloned voice",
"workflow": "runninghub/tts_index2.json",
"ref_audio": "path/to/reference.wav"
}
```
"""
try:
logger.info(f"TTS synthesis request: {request.text[:50]}...")
# Build TTS parameters
tts_params = {"text": request.text}
# Add workflow if specified
if request.workflow:
tts_params["workflow"] = request.workflow
# Add ref_audio if specified
if request.ref_audio:
tts_params["ref_audio"] = request.ref_audio
# Legacy voice_id support (deprecated)
if request.voice_id and not request.workflow:
logger.warning("voice_id parameter is deprecated, please use workflow instead")
tts_params["voice"] = request.voice_id
# Call TTS service
audio_path = await pixelle_video.tts(
text=request.text,
voice=request.voice_id
)
audio_path = await pixelle_video.tts(**tts_params)
# Get audio duration
duration = get_audio_duration(audio_path)

View File

@@ -51,26 +51,41 @@ async def generate_video_sync(
try:
logger.info(f"Sync video generation: {request_body.text[:50]}...")
# Build video generation parameters
video_params = {
"text": request_body.text,
"mode": request_body.mode,
"title": request_body.title,
"n_scenes": request_body.n_scenes,
"min_narration_words": request_body.min_narration_words,
"max_narration_words": request_body.max_narration_words,
"min_image_prompt_words": request_body.min_image_prompt_words,
"max_image_prompt_words": request_body.max_image_prompt_words,
"image_width": request_body.image_width,
"image_height": request_body.image_height,
"image_workflow": request_body.image_workflow,
"video_fps": request_body.video_fps,
"frame_template": request_body.frame_template,
"prompt_prefix": request_body.prompt_prefix,
"bgm_path": request_body.bgm_path,
"bgm_volume": request_body.bgm_volume,
}
# Add TTS workflow if specified
if request_body.tts_workflow:
video_params["tts_workflow"] = request_body.tts_workflow
# Add ref_audio if specified
if request_body.ref_audio:
video_params["ref_audio"] = request_body.ref_audio
# Legacy voice_id support (deprecated)
if request_body.voice_id:
logger.warning("voice_id parameter is deprecated, please use tts_workflow instead")
video_params["voice_id"] = request_body.voice_id
# Call video generator service
result = await pixelle_video.generate_video(
text=request_body.text,
mode=request_body.mode,
title=request_body.title,
n_scenes=request_body.n_scenes,
voice_id=request_body.voice_id,
min_narration_words=request_body.min_narration_words,
max_narration_words=request_body.max_narration_words,
min_image_prompt_words=request_body.min_image_prompt_words,
max_image_prompt_words=request_body.max_image_prompt_words,
image_width=request_body.image_width,
image_height=request_body.image_height,
image_workflow=request_body.image_workflow,
video_fps=request_body.video_fps,
frame_template=request_body.frame_template,
prompt_prefix=request_body.prompt_prefix,
bgm_path=request_body.bgm_path,
bgm_volume=request_body.bgm_volume,
)
result = await pixelle_video.generate_video(**video_params)
# Get file size
file_size = os.path.getsize(result.video_path) if os.path.exists(result.video_path) else 0
@@ -124,27 +139,42 @@ async def generate_video_async(
# Define async execution function
async def execute_video_generation():
"""Execute video generation in background"""
result = await pixelle_video.generate_video(
text=request_body.text,
mode=request_body.mode,
title=request_body.title,
n_scenes=request_body.n_scenes,
voice_id=request_body.voice_id,
min_narration_words=request_body.min_narration_words,
max_narration_words=request_body.max_narration_words,
min_image_prompt_words=request_body.min_image_prompt_words,
max_image_prompt_words=request_body.max_image_prompt_words,
image_width=request_body.image_width,
image_height=request_body.image_height,
image_workflow=request_body.image_workflow,
video_fps=request_body.video_fps,
frame_template=request_body.frame_template,
prompt_prefix=request_body.prompt_prefix,
bgm_path=request_body.bgm_path,
bgm_volume=request_body.bgm_volume,
# Build video generation parameters
video_params = {
"text": request_body.text,
"mode": request_body.mode,
"title": request_body.title,
"n_scenes": request_body.n_scenes,
"min_narration_words": request_body.min_narration_words,
"max_narration_words": request_body.max_narration_words,
"min_image_prompt_words": request_body.min_image_prompt_words,
"max_image_prompt_words": request_body.max_image_prompt_words,
"image_width": request_body.image_width,
"image_height": request_body.image_height,
"image_workflow": request_body.image_workflow,
"video_fps": request_body.video_fps,
"frame_template": request_body.frame_template,
"prompt_prefix": request_body.prompt_prefix,
"bgm_path": request_body.bgm_path,
"bgm_volume": request_body.bgm_volume,
# Progress callback can be added here if needed
# progress_callback=lambda event: task_manager.update_progress(...)
)
# "progress_callback": lambda event: task_manager.update_progress(...)
}
# Add TTS workflow if specified
if request_body.tts_workflow:
video_params["tts_workflow"] = request_body.tts_workflow
# Add ref_audio if specified
if request_body.ref_audio:
video_params["ref_audio"] = request_body.ref_audio
# Legacy voice_id support (deprecated)
if request_body.voice_id:
logger.warning("voice_id parameter is deprecated, please use tts_workflow instead")
video_params["voice_id"] = request_body.voice_id
result = await pixelle_video.generate_video(**video_params)
# Get file size
file_size = os.path.getsize(result.video_path) if os.path.exists(result.video_path) else 0

37
api/schemas/frame.py Normal file
View File

@@ -0,0 +1,37 @@
"""
Frame/Template rendering API schemas
"""
from typing import Optional
from pydantic import BaseModel, Field
class FrameRenderRequest(BaseModel):
"""Frame rendering request"""
template: str = Field(
...,
description="Template key (e.g., '1080x1920/default.html'). Can also be just filename (e.g., 'default.html') to use default size."
)
title: Optional[str] = Field(None, description="Frame title (optional)")
text: str = Field(..., description="Frame text content")
image: str = Field(..., description="Image path or URL")
class Config:
json_schema_extra = {
"example": {
"template": "1080x1920/default.html",
"title": "Sample Title",
"text": "This is a sample text for the frame.",
"image": "resources/example.png"
}
}
class FrameRenderResponse(BaseModel):
"""Frame rendering response"""
success: bool = True
message: str = "Success"
frame_path: str = Field(..., description="Path to generated frame image")
width: int = Field(..., description="Frame width in pixels")
height: int = Field(..., description="Frame height in pixels")

57
api/schemas/resources.py Normal file
View File

@@ -0,0 +1,57 @@
"""
Resource discovery API schemas
"""
from typing import List, Optional
from pydantic import BaseModel, Field
class WorkflowInfo(BaseModel):
"""Workflow information"""
name: str = Field(..., description="Workflow filename")
display_name: str = Field(..., description="Display name with source info")
source: str = Field(..., description="Source (runninghub or selfhost)")
path: str = Field(..., description="Full path to workflow file")
key: str = Field(..., description="Workflow key (source/name)")
workflow_id: Optional[str] = Field(None, description="RunningHub workflow ID (if applicable)")
class WorkflowListResponse(BaseModel):
"""Workflow list response"""
success: bool = True
message: str = "Success"
workflows: List[WorkflowInfo] = Field(..., description="List of available workflows")
class TemplateInfo(BaseModel):
"""Template information"""
name: str = Field(..., description="Template filename")
display_name: str = Field(..., description="Display name")
size: str = Field(..., description="Size (e.g., 1080x1920)")
width: int = Field(..., description="Width in pixels")
height: int = Field(..., description="Height in pixels")
orientation: str = Field(..., description="Orientation (portrait/landscape/square)")
path: str = Field(..., description="Full path to template file")
key: str = Field(..., description="Template key (size/name)")
class TemplateListResponse(BaseModel):
"""Template list response"""
success: bool = True
message: str = "Success"
templates: List[TemplateInfo] = Field(..., description="List of available templates")
class BGMInfo(BaseModel):
"""BGM information"""
name: str = Field(..., description="BGM filename")
path: str = Field(..., description="Full path to BGM file")
source: str = Field(..., description="Source (default or custom)")
class BGMListResponse(BaseModel):
"""BGM list response"""
success: bool = True
message: str = "Success"
bgm_files: List[BGMInfo] = Field(..., description="List of available BGM files")

View File

@@ -2,19 +2,32 @@
TTS API schemas
"""
from typing import Optional
from pydantic import BaseModel, Field
class TTSSynthesizeRequest(BaseModel):
"""TTS synthesis request"""
text: str = Field(..., description="Text to synthesize")
voice_id: str = Field("[Chinese] zh-CN Yunjian", description="Voice ID")
workflow: Optional[str] = Field(
None,
description="TTS workflow key (e.g., 'runninghub/tts_edge.json' or 'selfhost/tts_edge.json'). If not specified, uses default workflow from config."
)
ref_audio: Optional[str] = Field(
None,
description="Reference audio path for voice cloning (optional). Can be a local file path or URL."
)
voice_id: Optional[str] = Field(
None,
description="Voice ID (deprecated, use workflow instead)"
)
class Config:
json_schema_extra = {
"example": {
"text": "Hello, welcome to Pixelle-Video!",
"voice_id": "[Chinese] zh-CN Yunjian"
"workflow": "runninghub/tts_edge.json",
"ref_audio": None
}
}

View File

@@ -23,7 +23,20 @@ class VideoGenerateRequest(BaseModel):
# === Basic Config ===
n_scenes: int = Field(5, ge=1, le=20, description="Number of scenes (generate mode only)")
voice_id: str = Field("[Chinese] zh-CN Yunjian", description="TTS voice ID")
# === TTS Parameters ===
tts_workflow: Optional[str] = Field(
None,
description="TTS workflow key (e.g., 'runninghub/tts_edge.json'). If not specified, uses default workflow from config."
)
ref_audio: Optional[str] = Field(
None,
description="Reference audio path for voice cloning (optional)"
)
voice_id: Optional[str] = Field(
None,
description="(Deprecated) TTS voice ID for legacy compatibility"
)
# === LLM Parameters ===
min_narration_words: int = Field(5, ge=1, le=100, description="Min narration words")