diff --git a/api/app.py b/api/app.py index d4965e2..5919917 100644 --- a/api/app.py +++ b/api/app.py @@ -30,6 +30,8 @@ from api.routers import ( video_router, tasks_router, files_router, + resources_router, + frame_router, ) @@ -107,6 +109,8 @@ app.include_router(content_router, prefix=api_config.api_prefix) app.include_router(video_router, prefix=api_config.api_prefix) app.include_router(tasks_router, prefix=api_config.api_prefix) app.include_router(files_router, prefix=api_config.api_prefix) +app.include_router(resources_router, prefix=api_config.api_prefix) +app.include_router(frame_router, prefix=api_config.api_prefix) @app.get("/") @@ -124,6 +128,9 @@ async def root(): "content": f"{api_config.api_prefix}/content", "video": f"{api_config.api_prefix}/video", "tasks": f"{api_config.api_prefix}/tasks", + "files": f"{api_config.api_prefix}/files", + "resources": f"{api_config.api_prefix}/resources", + "frame": f"{api_config.api_prefix}/frame", } } diff --git a/api/routers/__init__.py b/api/routers/__init__.py index 4f75f04..632949b 100644 --- a/api/routers/__init__.py +++ b/api/routers/__init__.py @@ -10,6 +10,8 @@ from api.routers.content import router as content_router from api.routers.video import router as video_router from api.routers.tasks import router as tasks_router from api.routers.files import router as files_router +from api.routers.resources import router as resources_router +from api.routers.frame import router as frame_router __all__ = [ "health_router", @@ -20,5 +22,7 @@ __all__ = [ "video_router", "tasks_router", "files_router", + "resources_router", + "frame_router", ] diff --git a/api/routers/files.py b/api/routers/files.py index ffeb660..474e71f 100644 --- a/api/routers/files.py +++ b/api/routers/files.py @@ -1,7 +1,7 @@ """ File service endpoints -Provides access to generated files (videos, images, audio). +Provides access to generated files (videos, images, audio) and resource files. """ from pathlib import Path @@ -17,15 +17,49 @@ async def get_file(file_path: str): """ Get file by path - Serves files from the output directory only. + Serves files from allowed directories: + - output/ - Generated files (videos, images, audio) + - workflows/ - ComfyUI workflow files + - templates/ - HTML templates + - bgm/ - Background music + - data/bgm/ - Custom background music + - data/templates/ - Custom templates + - resources/ - Other resources (images, fonts, etc.) - - **file_path**: File name or path (e.g., "abc123.mp4" or "subfolder/abc123.mp4") + - **file_path**: File path relative to allowed directories + + Examples: + - "abc123.mp4" → output/abc123.mp4 + - "workflows/runninghub/image_flux.json" → workflows/runninghub/image_flux.json + - "templates/1080x1920/default.html" → templates/1080x1920/default.html + - "bgm/default.mp3" → bgm/default.mp3 + - "resources/example.png" → resources/example.png Returns file for download or preview. """ try: - # Automatically prepend "output/" to the path - full_path = f"output/{file_path}" + # Define allowed directories (in priority order) + allowed_prefixes = [ + "output/", + "workflows/", + "templates/", + "bgm/", + "data/bgm/", + "data/templates/", + "resources/", + ] + + # Check if path starts with allowed prefix, otherwise try output/ + full_path = None + for prefix in allowed_prefixes: + if file_path.startswith(prefix): + full_path = file_path + break + + # If no prefix matched, assume it's in output/ (backward compatibility) + if full_path is None: + full_path = f"output/{file_path}" + abs_path = Path.cwd() / full_path if not abs_path.exists(): @@ -34,11 +68,19 @@ async def get_file(file_path: str): if not abs_path.is_file(): raise HTTPException(status_code=400, detail=f"Path is not a file: {file_path}") - # Security: only allow access to output directory + # Security: only allow access to specified directories try: rel_path = abs_path.relative_to(Path.cwd()) - if not str(rel_path).startswith("output"): - raise HTTPException(status_code=403, detail="Access denied: only output directory is accessible") + rel_path_str = str(rel_path) + + # Check if path starts with any allowed prefix + is_allowed = any(rel_path_str.startswith(prefix.rstrip('/')) for prefix in allowed_prefixes) + + if not is_allowed: + raise HTTPException( + status_code=403, + detail=f"Access denied: only {', '.join(p.rstrip('/') for p in allowed_prefixes)} directories are accessible" + ) except ValueError: raise HTTPException(status_code=403, detail="Access denied") @@ -52,6 +94,8 @@ async def get_file(file_path: str): '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.gif': 'image/gif', + '.html': 'text/html', + '.json': 'application/json', } media_type = media_types.get(suffix, 'application/octet-stream') diff --git a/api/routers/frame.py b/api/routers/frame.py new file mode 100644 index 0000000..ba54fca --- /dev/null +++ b/api/routers/frame.py @@ -0,0 +1,73 @@ +""" +Frame/Template rendering endpoints +""" + +from fastapi import APIRouter, HTTPException +from loguru import logger + +from api.dependencies import PixelleVideoDep +from api.schemas.frame import FrameRenderRequest, FrameRenderResponse +from pixelle_video.services.frame_html import HTMLFrameGenerator +from pixelle_video.utils.template_util import parse_template_size, resolve_template_path + +router = APIRouter(prefix="/frame", tags=["Frame Rendering"]) + + +@router.post("/render", response_model=FrameRenderResponse) +async def render_frame( + request: FrameRenderRequest, + pixelle_video: PixelleVideoDep +): + """ + Render a single frame using HTML template + + Generates a frame image by combining template, title, text, and image. + This is useful for previewing templates or generating custom frames. + + - **template**: Template key (e.g., '1080x1920/default.html') + - **title**: Optional title text + - **text**: Frame text content + - **image**: Image path (can be local path or URL) + + Returns path to generated frame image. + + Example: + ```json + { + "template": "1080x1920/modern.html", + "title": "Welcome", + "text": "This is a beautiful frame with custom styling", + "image": "resources/example.png" + } + ``` + """ + try: + logger.info(f"Frame render request: template={request.template}") + + # Resolve template path (handles both "default.html" and "1080x1920/default.html") + template_path = resolve_template_path(request.template) + full_template_path = f"templates/{template_path}" + + # Parse template size + width, height = parse_template_size(full_template_path) + + # Create HTML frame generator + generator = HTMLFrameGenerator(full_template_path) + + # Generate frame + frame_path = await generator.generate_frame( + title=request.title, + text=request.text, + image=request.image + ) + + return FrameRenderResponse( + frame_path=frame_path, + width=width, + height=height + ) + + except Exception as e: + logger.error(f"Frame render error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + diff --git a/api/routers/resources.py b/api/routers/resources.py new file mode 100644 index 0000000..ffc14ae --- /dev/null +++ b/api/routers/resources.py @@ -0,0 +1,229 @@ +""" +Resource discovery endpoints + +Provides endpoints to discover available workflows, templates, and BGM. +""" + +from pathlib import Path +from fastapi import APIRouter, HTTPException +from loguru import logger + +from api.dependencies import PixelleVideoDep +from api.schemas.resources import ( + WorkflowInfo, + WorkflowListResponse, + TemplateInfo, + TemplateListResponse, + BGMInfo, + BGMListResponse, +) +from pixelle_video.utils.os_util import list_resource_files, get_root_path, get_data_path +from pixelle_video.utils.template_util import get_all_templates_with_info + +router = APIRouter(prefix="/resources", tags=["Resources"]) + + +@router.get("/workflows/tts", response_model=WorkflowListResponse) +async def list_tts_workflows(pixelle_video: PixelleVideoDep): + """ + List available TTS workflows + + Returns list of TTS workflows from both RunningHub and self-hosted sources. + + Example response: + ```json + { + "workflows": [ + { + "name": "tts_edge.json", + "display_name": "tts_edge.json - Runninghub", + "source": "runninghub", + "path": "workflows/runninghub/tts_edge.json", + "key": "runninghub/tts_edge.json", + "workflow_id": "123456" + } + ] + } + ``` + """ + try: + # Get all workflows from TTS service + all_workflows = pixelle_video.tts.list_workflows() + + # Filter to TTS workflows only (filename starts with "tts_") + tts_workflows = [ + WorkflowInfo(**wf) + for wf in all_workflows + if wf["name"].startswith("tts_") + ] + + return WorkflowListResponse(workflows=tts_workflows) + + except Exception as e: + logger.error(f"List TTS workflows error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/workflows/image", response_model=WorkflowListResponse) +async def list_image_workflows(pixelle_video: PixelleVideoDep): + """ + List available image generation workflows + + Returns list of image workflows from both RunningHub and self-hosted sources. + + Example response: + ```json + { + "workflows": [ + { + "name": "image_flux.json", + "display_name": "image_flux.json - Runninghub", + "source": "runninghub", + "path": "workflows/runninghub/image_flux.json", + "key": "runninghub/image_flux.json", + "workflow_id": "123456" + } + ] + } + ``` + """ + try: + # Get all workflows from image service + all_workflows = pixelle_video.image.list_workflows() + + # Filter to image workflows only (filename starts with "image_") + image_workflows = [ + WorkflowInfo(**wf) + for wf in all_workflows + if wf["name"].startswith("image_") + ] + + return WorkflowListResponse(workflows=image_workflows) + + except Exception as e: + logger.error(f"List image workflows error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/templates", response_model=TemplateListResponse) +async def list_templates(): + """ + List available video templates + + Returns list of HTML templates grouped by size (portrait, landscape, square). + Templates are merged from both default (templates/) and custom (data/templates/) directories. + + Example response: + ```json + { + "templates": [ + { + "name": "default.html", + "display_name": "default.html", + "size": "1080x1920", + "width": 1080, + "height": 1920, + "orientation": "portrait", + "path": "templates/1080x1920/default.html", + "key": "1080x1920/default.html" + } + ] + } + ``` + """ + try: + # Get all templates with info + all_templates = get_all_templates_with_info() + + # Convert to API response format + templates = [] + for t in all_templates: + templates.append(TemplateInfo( + name=t.display_info.name, + display_name=t.display_info.name, + size=t.display_info.size, + width=t.display_info.width, + height=t.display_info.height, + orientation=t.display_info.orientation, + path=t.template_path, + key=t.template_path + )) + + return TemplateListResponse(templates=templates) + + except Exception as e: + logger.error(f"List templates error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/bgm", response_model=BGMListResponse) +async def list_bgm(): + """ + List available background music files + + Returns list of BGM files merged from both default (bgm/) and custom (data/bgm/) directories. + Custom files take precedence over default files with the same name. + + Supported formats: mp3, wav, flac, m4a, aac, ogg + + Example response: + ```json + { + "bgm_files": [ + { + "name": "default.mp3", + "path": "bgm/default.mp3", + "source": "default" + }, + { + "name": "happy.mp3", + "path": "data/bgm/happy.mp3", + "source": "custom" + } + ] + } + ``` + """ + try: + # Supported audio extensions + audio_extensions = ('.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg') + + # Collect BGM files from both locations + bgm_files_dict = {} # {filename: {"path": str, "source": str}} + + # Scan default bgm/ directory + default_bgm_dir = Path(get_root_path("bgm")) + if default_bgm_dir.exists() and default_bgm_dir.is_dir(): + for item in default_bgm_dir.iterdir(): + if item.is_file() and item.suffix.lower() in audio_extensions: + bgm_files_dict[item.name] = { + "path": f"bgm/{item.name}", + "source": "default" + } + + # Scan custom data/bgm/ directory (overrides default) + custom_bgm_dir = Path(get_data_path("bgm")) + if custom_bgm_dir.exists() and custom_bgm_dir.is_dir(): + for item in custom_bgm_dir.iterdir(): + if item.is_file() and item.suffix.lower() in audio_extensions: + bgm_files_dict[item.name] = { + "path": f"data/bgm/{item.name}", + "source": "custom" + } + + # Convert to response format + bgm_files = [ + BGMInfo( + name=name, + path=info["path"], + source=info["source"] + ) + for name, info in sorted(bgm_files_dict.items()) + ] + + return BGMListResponse(bgm_files=bgm_files) + + except Exception as e: + logger.error(f"List BGM error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + diff --git a/api/routers/tts.py b/api/routers/tts.py index 3c37788..2201fa8 100644 --- a/api/routers/tts.py +++ b/api/routers/tts.py @@ -20,21 +20,53 @@ async def tts_synthesize( """ Text-to-Speech synthesis endpoint - Convert text to speech audio. + Convert text to speech audio using ComfyUI workflows. - **text**: Text to synthesize - - **voice_id**: Voice ID (e.g., '[Chinese] zh-CN Yunjian', '[English] en-US Aria') + - **workflow**: TTS workflow key (optional, uses default if not specified) + - **ref_audio**: Reference audio for voice cloning (optional) + - **voice_id**: (Deprecated) Voice ID for legacy compatibility Returns path to generated audio file and duration. + + Examples: + ```json + { + "text": "Hello, welcome to Pixelle-Video!", + "workflow": "runninghub/tts_edge.json" + } + ``` + + With voice cloning: + ```json + { + "text": "Hello, this is a cloned voice", + "workflow": "runninghub/tts_index2.json", + "ref_audio": "path/to/reference.wav" + } + ``` """ try: logger.info(f"TTS synthesis request: {request.text[:50]}...") + # Build TTS parameters + tts_params = {"text": request.text} + + # Add workflow if specified + if request.workflow: + tts_params["workflow"] = request.workflow + + # Add ref_audio if specified + if request.ref_audio: + tts_params["ref_audio"] = request.ref_audio + + # Legacy voice_id support (deprecated) + if request.voice_id and not request.workflow: + logger.warning("voice_id parameter is deprecated, please use workflow instead") + tts_params["voice"] = request.voice_id + # Call TTS service - audio_path = await pixelle_video.tts( - text=request.text, - voice=request.voice_id - ) + audio_path = await pixelle_video.tts(**tts_params) # Get audio duration duration = get_audio_duration(audio_path) diff --git a/api/routers/video.py b/api/routers/video.py index 9746897..ea81f03 100644 --- a/api/routers/video.py +++ b/api/routers/video.py @@ -51,26 +51,41 @@ async def generate_video_sync( try: logger.info(f"Sync video generation: {request_body.text[:50]}...") + # Build video generation parameters + video_params = { + "text": request_body.text, + "mode": request_body.mode, + "title": request_body.title, + "n_scenes": request_body.n_scenes, + "min_narration_words": request_body.min_narration_words, + "max_narration_words": request_body.max_narration_words, + "min_image_prompt_words": request_body.min_image_prompt_words, + "max_image_prompt_words": request_body.max_image_prompt_words, + "image_width": request_body.image_width, + "image_height": request_body.image_height, + "image_workflow": request_body.image_workflow, + "video_fps": request_body.video_fps, + "frame_template": request_body.frame_template, + "prompt_prefix": request_body.prompt_prefix, + "bgm_path": request_body.bgm_path, + "bgm_volume": request_body.bgm_volume, + } + + # Add TTS workflow if specified + if request_body.tts_workflow: + video_params["tts_workflow"] = request_body.tts_workflow + + # Add ref_audio if specified + if request_body.ref_audio: + video_params["ref_audio"] = request_body.ref_audio + + # Legacy voice_id support (deprecated) + if request_body.voice_id: + logger.warning("voice_id parameter is deprecated, please use tts_workflow instead") + video_params["voice_id"] = request_body.voice_id + # Call video generator service - result = await pixelle_video.generate_video( - text=request_body.text, - mode=request_body.mode, - title=request_body.title, - n_scenes=request_body.n_scenes, - voice_id=request_body.voice_id, - min_narration_words=request_body.min_narration_words, - max_narration_words=request_body.max_narration_words, - min_image_prompt_words=request_body.min_image_prompt_words, - max_image_prompt_words=request_body.max_image_prompt_words, - image_width=request_body.image_width, - image_height=request_body.image_height, - image_workflow=request_body.image_workflow, - video_fps=request_body.video_fps, - frame_template=request_body.frame_template, - prompt_prefix=request_body.prompt_prefix, - bgm_path=request_body.bgm_path, - bgm_volume=request_body.bgm_volume, - ) + result = await pixelle_video.generate_video(**video_params) # Get file size file_size = os.path.getsize(result.video_path) if os.path.exists(result.video_path) else 0 @@ -124,27 +139,42 @@ async def generate_video_async( # Define async execution function async def execute_video_generation(): """Execute video generation in background""" - result = await pixelle_video.generate_video( - text=request_body.text, - mode=request_body.mode, - title=request_body.title, - n_scenes=request_body.n_scenes, - voice_id=request_body.voice_id, - min_narration_words=request_body.min_narration_words, - max_narration_words=request_body.max_narration_words, - min_image_prompt_words=request_body.min_image_prompt_words, - max_image_prompt_words=request_body.max_image_prompt_words, - image_width=request_body.image_width, - image_height=request_body.image_height, - image_workflow=request_body.image_workflow, - video_fps=request_body.video_fps, - frame_template=request_body.frame_template, - prompt_prefix=request_body.prompt_prefix, - bgm_path=request_body.bgm_path, - bgm_volume=request_body.bgm_volume, + # Build video generation parameters + video_params = { + "text": request_body.text, + "mode": request_body.mode, + "title": request_body.title, + "n_scenes": request_body.n_scenes, + "min_narration_words": request_body.min_narration_words, + "max_narration_words": request_body.max_narration_words, + "min_image_prompt_words": request_body.min_image_prompt_words, + "max_image_prompt_words": request_body.max_image_prompt_words, + "image_width": request_body.image_width, + "image_height": request_body.image_height, + "image_workflow": request_body.image_workflow, + "video_fps": request_body.video_fps, + "frame_template": request_body.frame_template, + "prompt_prefix": request_body.prompt_prefix, + "bgm_path": request_body.bgm_path, + "bgm_volume": request_body.bgm_volume, # Progress callback can be added here if needed - # progress_callback=lambda event: task_manager.update_progress(...) - ) + # "progress_callback": lambda event: task_manager.update_progress(...) + } + + # Add TTS workflow if specified + if request_body.tts_workflow: + video_params["tts_workflow"] = request_body.tts_workflow + + # Add ref_audio if specified + if request_body.ref_audio: + video_params["ref_audio"] = request_body.ref_audio + + # Legacy voice_id support (deprecated) + if request_body.voice_id: + logger.warning("voice_id parameter is deprecated, please use tts_workflow instead") + video_params["voice_id"] = request_body.voice_id + + result = await pixelle_video.generate_video(**video_params) # Get file size file_size = os.path.getsize(result.video_path) if os.path.exists(result.video_path) else 0 diff --git a/api/schemas/frame.py b/api/schemas/frame.py new file mode 100644 index 0000000..671633e --- /dev/null +++ b/api/schemas/frame.py @@ -0,0 +1,37 @@ +""" +Frame/Template rendering API schemas +""" + +from typing import Optional +from pydantic import BaseModel, Field + + +class FrameRenderRequest(BaseModel): + """Frame rendering request""" + template: str = Field( + ..., + description="Template key (e.g., '1080x1920/default.html'). Can also be just filename (e.g., 'default.html') to use default size." + ) + title: Optional[str] = Field(None, description="Frame title (optional)") + text: str = Field(..., description="Frame text content") + image: str = Field(..., description="Image path or URL") + + class Config: + json_schema_extra = { + "example": { + "template": "1080x1920/default.html", + "title": "Sample Title", + "text": "This is a sample text for the frame.", + "image": "resources/example.png" + } + } + + +class FrameRenderResponse(BaseModel): + """Frame rendering response""" + success: bool = True + message: str = "Success" + frame_path: str = Field(..., description="Path to generated frame image") + width: int = Field(..., description="Frame width in pixels") + height: int = Field(..., description="Frame height in pixels") + diff --git a/api/schemas/resources.py b/api/schemas/resources.py new file mode 100644 index 0000000..3c63714 --- /dev/null +++ b/api/schemas/resources.py @@ -0,0 +1,57 @@ +""" +Resource discovery API schemas +""" + +from typing import List, Optional +from pydantic import BaseModel, Field + + +class WorkflowInfo(BaseModel): + """Workflow information""" + name: str = Field(..., description="Workflow filename") + display_name: str = Field(..., description="Display name with source info") + source: str = Field(..., description="Source (runninghub or selfhost)") + path: str = Field(..., description="Full path to workflow file") + key: str = Field(..., description="Workflow key (source/name)") + workflow_id: Optional[str] = Field(None, description="RunningHub workflow ID (if applicable)") + + +class WorkflowListResponse(BaseModel): + """Workflow list response""" + success: bool = True + message: str = "Success" + workflows: List[WorkflowInfo] = Field(..., description="List of available workflows") + + +class TemplateInfo(BaseModel): + """Template information""" + name: str = Field(..., description="Template filename") + display_name: str = Field(..., description="Display name") + size: str = Field(..., description="Size (e.g., 1080x1920)") + width: int = Field(..., description="Width in pixels") + height: int = Field(..., description="Height in pixels") + orientation: str = Field(..., description="Orientation (portrait/landscape/square)") + path: str = Field(..., description="Full path to template file") + key: str = Field(..., description="Template key (size/name)") + + +class TemplateListResponse(BaseModel): + """Template list response""" + success: bool = True + message: str = "Success" + templates: List[TemplateInfo] = Field(..., description="List of available templates") + + +class BGMInfo(BaseModel): + """BGM information""" + name: str = Field(..., description="BGM filename") + path: str = Field(..., description="Full path to BGM file") + source: str = Field(..., description="Source (default or custom)") + + +class BGMListResponse(BaseModel): + """BGM list response""" + success: bool = True + message: str = "Success" + bgm_files: List[BGMInfo] = Field(..., description="List of available BGM files") + diff --git a/api/schemas/tts.py b/api/schemas/tts.py index 632abd8..41dce9a 100644 --- a/api/schemas/tts.py +++ b/api/schemas/tts.py @@ -2,19 +2,32 @@ TTS API schemas """ +from typing import Optional from pydantic import BaseModel, Field class TTSSynthesizeRequest(BaseModel): """TTS synthesis request""" text: str = Field(..., description="Text to synthesize") - voice_id: str = Field("[Chinese] zh-CN Yunjian", description="Voice ID") + workflow: Optional[str] = Field( + None, + description="TTS workflow key (e.g., 'runninghub/tts_edge.json' or 'selfhost/tts_edge.json'). If not specified, uses default workflow from config." + ) + ref_audio: Optional[str] = Field( + None, + description="Reference audio path for voice cloning (optional). Can be a local file path or URL." + ) + voice_id: Optional[str] = Field( + None, + description="Voice ID (deprecated, use workflow instead)" + ) class Config: json_schema_extra = { "example": { "text": "Hello, welcome to Pixelle-Video!", - "voice_id": "[Chinese] zh-CN Yunjian" + "workflow": "runninghub/tts_edge.json", + "ref_audio": None } } diff --git a/api/schemas/video.py b/api/schemas/video.py index 715c3fd..d74240c 100644 --- a/api/schemas/video.py +++ b/api/schemas/video.py @@ -23,7 +23,20 @@ class VideoGenerateRequest(BaseModel): # === Basic Config === n_scenes: int = Field(5, ge=1, le=20, description="Number of scenes (generate mode only)") - voice_id: str = Field("[Chinese] zh-CN Yunjian", description="TTS voice ID") + + # === TTS Parameters === + tts_workflow: Optional[str] = Field( + None, + description="TTS workflow key (e.g., 'runninghub/tts_edge.json'). If not specified, uses default workflow from config." + ) + ref_audio: Optional[str] = Field( + None, + description="Reference audio path for voice cloning (optional)" + ) + voice_id: Optional[str] = Field( + None, + description="(Deprecated) TTS voice ID for legacy compatibility" + ) # === LLM Parameters === min_narration_words: int = Field(5, ge=1, le=100, description="Min narration words")