Merge branch 'main' of https://github.com/AIDC-AI/Pixelle-Video
This commit is contained in:
23
Dockerfile
23
Dockerfile
@@ -46,16 +46,15 @@ RUN uv --version
|
||||
COPY pyproject.toml uv.lock README.md ./
|
||||
COPY pixelle_video ./pixelle_video
|
||||
|
||||
# Install Python dependencies using uv with configurable index URL
|
||||
# Create uv.toml config file to force using the mirror (most reliable method)
|
||||
# Only create config when USE_CN_MIRROR=true, otherwise use default PyPI
|
||||
RUN if [ "$USE_CN_MIRROR" = "true" ]; then \
|
||||
echo '[[index]]' > uv.toml && \
|
||||
echo 'url = "https://pypi.tuna.tsinghua.edu.cn/simple"' >> uv.toml && \
|
||||
echo 'default = true' >> uv.toml; \
|
||||
fi && \
|
||||
export UV_HTTP_TIMEOUT=300 && \
|
||||
uv sync --frozen --no-dev
|
||||
# Create virtual environment and install dependencies
|
||||
# Use -i flag to specify mirror when USE_CN_MIRROR=true
|
||||
RUN export UV_HTTP_TIMEOUT=300 && \
|
||||
uv venv && \
|
||||
if [ "$USE_CN_MIRROR" = "true" ]; then \
|
||||
uv pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \
|
||||
else \
|
||||
uv pip install -e .; \
|
||||
fi
|
||||
|
||||
# Copy rest of application code
|
||||
COPY api ./api
|
||||
@@ -65,8 +64,8 @@ COPY templates ./templates
|
||||
COPY workflows ./workflows
|
||||
COPY resources ./resources
|
||||
|
||||
# Create output and data directories
|
||||
RUN mkdir -p /app/output /app/data
|
||||
# Create output, data and temp directories
|
||||
RUN mkdir -p /app/output /app/data /app/temp
|
||||
|
||||
# Set environment variables for html2image to use chromium
|
||||
ENV BROWSER_EXECUTABLE_PATH=/usr/bin/chromium
|
||||
|
||||
@@ -76,12 +76,12 @@ async def list_tts_workflows(pixelle_video: PixelleVideoDep):
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/workflows/image", response_model=WorkflowListResponse)
|
||||
async def list_image_workflows(pixelle_video: PixelleVideoDep):
|
||||
@router.get("/workflows/media", response_model=WorkflowListResponse)
|
||||
async def list_media_workflows(pixelle_video: PixelleVideoDep):
|
||||
"""
|
||||
List available image generation workflows
|
||||
List available media workflows (both image and video)
|
||||
|
||||
Returns list of image workflows from both RunningHub and self-hosted sources.
|
||||
Returns list of all media workflows from both RunningHub and self-hosted sources.
|
||||
|
||||
Example response:
|
||||
```json
|
||||
@@ -94,13 +94,41 @@ async def list_image_workflows(pixelle_video: PixelleVideoDep):
|
||||
"path": "workflows/runninghub/image_flux.json",
|
||||
"key": "runninghub/image_flux.json",
|
||||
"workflow_id": "123456"
|
||||
},
|
||||
{
|
||||
"name": "video_wan2.1.json",
|
||||
"display_name": "video_wan2.1.json - Runninghub",
|
||||
"source": "runninghub",
|
||||
"path": "workflows/runninghub/video_wan2.1.json",
|
||||
"key": "runninghub/video_wan2.1.json",
|
||||
"workflow_id": "123457"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
"""
|
||||
try:
|
||||
# Get all workflows from media service (image generation is handled by media service)
|
||||
# Get all workflows from media service (includes both image and video)
|
||||
all_workflows = pixelle_video.media.list_workflows()
|
||||
|
||||
media_workflows = [WorkflowInfo(**wf) for wf in all_workflows]
|
||||
|
||||
return WorkflowListResponse(workflows=media_workflows)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"List media workflows error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# Keep old endpoint for backward compatibility
|
||||
@router.get("/workflows/image", response_model=WorkflowListResponse)
|
||||
async def list_image_workflows(pixelle_video: PixelleVideoDep):
|
||||
"""
|
||||
List available image workflows (deprecated, use /workflows/media instead)
|
||||
|
||||
This endpoint is kept for backward compatibility but will filter to image_ workflows only.
|
||||
"""
|
||||
try:
|
||||
all_workflows = pixelle_video.media.list_workflows()
|
||||
|
||||
# Filter to image workflows only (filename starts with "image_")
|
||||
|
||||
@@ -63,6 +63,17 @@ async def generate_video_sync(
|
||||
try:
|
||||
logger.info(f"Sync video generation: {request_body.text[:50]}...")
|
||||
|
||||
# Auto-determine media_width and media_height from template meta tags (required)
|
||||
if not request_body.frame_template:
|
||||
raise ValueError("frame_template is required to determine media size")
|
||||
|
||||
from pixelle_video.services.frame_html import HTMLFrameGenerator
|
||||
from pixelle_video.utils.template_util import resolve_template_path
|
||||
template_path = resolve_template_path(request_body.frame_template)
|
||||
generator = HTMLFrameGenerator(template_path)
|
||||
media_width, media_height = generator.get_media_size()
|
||||
logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}")
|
||||
|
||||
# Build video generation parameters
|
||||
video_params = {
|
||||
"text": request_body.text,
|
||||
@@ -73,8 +84,9 @@ async def generate_video_sync(
|
||||
"max_narration_words": request_body.max_narration_words,
|
||||
"min_image_prompt_words": request_body.min_image_prompt_words,
|
||||
"max_image_prompt_words": request_body.max_image_prompt_words,
|
||||
# Note: image_width and image_height are now auto-determined from template
|
||||
"image_workflow": request_body.image_workflow,
|
||||
"media_width": media_width,
|
||||
"media_height": media_height,
|
||||
"media_workflow": request_body.media_workflow,
|
||||
"video_fps": request_body.video_fps,
|
||||
"frame_template": request_body.frame_template,
|
||||
"prompt_prefix": request_body.prompt_prefix,
|
||||
@@ -150,6 +162,17 @@ async def generate_video_async(
|
||||
# Define async execution function
|
||||
async def execute_video_generation():
|
||||
"""Execute video generation in background"""
|
||||
# Auto-determine media_width and media_height from template meta tags (required)
|
||||
if not request_body.frame_template:
|
||||
raise ValueError("frame_template is required to determine media size")
|
||||
|
||||
from pixelle_video.services.frame_html import HTMLFrameGenerator
|
||||
from pixelle_video.utils.template_util import resolve_template_path
|
||||
template_path = resolve_template_path(request_body.frame_template)
|
||||
generator = HTMLFrameGenerator(template_path)
|
||||
media_width, media_height = generator.get_media_size()
|
||||
logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}")
|
||||
|
||||
# Build video generation parameters
|
||||
video_params = {
|
||||
"text": request_body.text,
|
||||
@@ -160,8 +183,9 @@ async def generate_video_async(
|
||||
"max_narration_words": request_body.max_narration_words,
|
||||
"min_image_prompt_words": request_body.min_image_prompt_words,
|
||||
"max_image_prompt_words": request_body.max_image_prompt_words,
|
||||
# Note: image_width and image_height are now auto-determined from template
|
||||
"image_workflow": request_body.image_workflow,
|
||||
"media_width": media_width,
|
||||
"media_height": media_height,
|
||||
"media_workflow": request_body.media_workflow,
|
||||
"video_fps": request_body.video_fps,
|
||||
"frame_template": request_body.frame_template,
|
||||
"prompt_prefix": request_body.prompt_prefix,
|
||||
|
||||
@@ -56,9 +56,9 @@ class VideoGenerateRequest(BaseModel):
|
||||
min_image_prompt_words: int = Field(30, ge=10, le=100, description="Min image prompt words")
|
||||
max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words")
|
||||
|
||||
# === Image Parameters ===
|
||||
# Note: image_width and image_height are now auto-determined from template meta tags
|
||||
image_workflow: Optional[str] = Field(None, description="Custom image workflow")
|
||||
# === Media Parameters ===
|
||||
# Note: media_width and media_height are auto-determined from template meta tags
|
||||
media_workflow: Optional[str] = Field(None, description="Custom media workflow (image or video)")
|
||||
|
||||
# === Video Parameters ===
|
||||
video_fps: int = Field(30, ge=15, le=60, description="Video FPS")
|
||||
|
||||
@@ -23,6 +23,10 @@ from typing import List, Optional, Dict, Any
|
||||
class StoryboardConfig:
|
||||
"""Storyboard configuration parameters"""
|
||||
|
||||
# Required parameters (must come first in dataclass)
|
||||
media_width: int # Media width (image or video, required)
|
||||
media_height: int # Media height (image or video, required)
|
||||
|
||||
# Task isolation
|
||||
task_id: Optional[str] = None # Task ID for file isolation (auto-generated if None)
|
||||
|
||||
@@ -42,10 +46,8 @@ class StoryboardConfig:
|
||||
tts_speed: Optional[float] = None # TTS speed multiplier (0.5-2.0, 1.0 = normal)
|
||||
ref_audio: Optional[str] = None # Reference audio for voice cloning (ComfyUI mode only)
|
||||
|
||||
# Image parameters
|
||||
image_width: int = 1024
|
||||
image_height: int = 1024
|
||||
image_workflow: Optional[str] = None # Image workflow filename (None = use default)
|
||||
# Media workflow
|
||||
media_workflow: Optional[str] = None # Media workflow filename (image or video, None = use default)
|
||||
|
||||
# Frame template (includes size information in path)
|
||||
frame_template: str = "1080x1920/default.html" # Template path with size (e.g., "1080x1920/default.html")
|
||||
|
||||
@@ -93,8 +93,8 @@ class CustomPipeline(BasePipeline):
|
||||
tts_speed: float = 1.2,
|
||||
ref_audio: Optional[str] = None,
|
||||
|
||||
image_workflow: Optional[str] = None,
|
||||
# Note: image_width and image_height are now auto-determined from template
|
||||
media_workflow: Optional[str] = None,
|
||||
# Note: media_width and media_height are auto-determined from template
|
||||
|
||||
frame_template: Optional[str] = None,
|
||||
video_fps: int = 30,
|
||||
@@ -189,8 +189,8 @@ class CustomPipeline(BasePipeline):
|
||||
# Read media size from template meta tags
|
||||
template_path = resolve_template_path(frame_template)
|
||||
generator = HTMLFrameGenerator(template_path)
|
||||
image_width, image_height = generator.get_media_size()
|
||||
logger.info(f"📐 Media size from template: {image_width}x{image_height}")
|
||||
media_width, media_height = generator.get_media_size()
|
||||
logger.info(f"📐 Media size from template: {media_width}x{media_height}")
|
||||
|
||||
if template_type == "image":
|
||||
logger.info(f"📸 Template requires image generation")
|
||||
@@ -270,9 +270,9 @@ class CustomPipeline(BasePipeline):
|
||||
tts_workflow=final_tts_workflow, # Use processed workflow
|
||||
tts_speed=tts_speed,
|
||||
ref_audio=ref_audio,
|
||||
image_width=image_width,
|
||||
image_height=image_height,
|
||||
image_workflow=image_workflow,
|
||||
media_width=media_width,
|
||||
media_height=media_height,
|
||||
media_workflow=media_workflow,
|
||||
frame_template=frame_template
|
||||
)
|
||||
|
||||
@@ -387,7 +387,7 @@ class CustomPipeline(BasePipeline):
|
||||
"tts_workflow": tts_workflow,
|
||||
"tts_speed": tts_speed,
|
||||
"ref_audio": ref_audio,
|
||||
"image_workflow": image_workflow,
|
||||
"media_workflow": media_workflow,
|
||||
"frame_template": frame_template,
|
||||
"bgm_path": bgm_path,
|
||||
"bgm_volume": bgm_volume,
|
||||
|
||||
@@ -68,8 +68,10 @@ class StandardPipeline(BasePipeline):
|
||||
|
||||
async def __call__(
|
||||
self,
|
||||
# === Input ===
|
||||
# === Input (Required) ===
|
||||
text: str,
|
||||
media_width: int, # Required: Media width (from template)
|
||||
media_height: int, # Required: Media height (from template)
|
||||
|
||||
# === Processing Mode ===
|
||||
mode: Literal["generate", "fixed"] = "generate",
|
||||
@@ -95,10 +97,8 @@ class StandardPipeline(BasePipeline):
|
||||
min_image_prompt_words: int = 30,
|
||||
max_image_prompt_words: int = 60,
|
||||
|
||||
# === Image Parameters ===
|
||||
image_width: int = 1024,
|
||||
image_height: int = 1024,
|
||||
image_workflow: Optional[str] = None,
|
||||
# === Media Workflow ===
|
||||
media_workflow: Optional[str] = None,
|
||||
|
||||
# === Video Parameters ===
|
||||
video_fps: int = 30,
|
||||
@@ -155,9 +155,9 @@ class StandardPipeline(BasePipeline):
|
||||
min_image_prompt_words: Min image prompt length
|
||||
max_image_prompt_words: Max image prompt length
|
||||
|
||||
image_width: Generated image width (default 1024)
|
||||
image_height: Generated image height (default 1024)
|
||||
image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)
|
||||
media_width: Media width (image or video, required)
|
||||
media_height: Media height (image or video, required)
|
||||
media_workflow: Media workflow filename (image or video, e.g., "image_flux.json", "video_wan.json", None = use default)
|
||||
|
||||
video_fps: Video frame rate (default 30)
|
||||
|
||||
@@ -254,9 +254,9 @@ class StandardPipeline(BasePipeline):
|
||||
tts_workflow=final_tts_workflow, # Use processed workflow
|
||||
tts_speed=tts_speed,
|
||||
ref_audio=ref_audio,
|
||||
image_width=image_width,
|
||||
image_height=image_height,
|
||||
image_workflow=image_workflow,
|
||||
media_width=media_width,
|
||||
media_height=media_height,
|
||||
media_workflow=media_workflow,
|
||||
frame_template=frame_template or "1080x1920/default.html",
|
||||
template_params=template_params # Custom template parameters
|
||||
)
|
||||
@@ -374,13 +374,13 @@ class StandardPipeline(BasePipeline):
|
||||
# Enable parallel if either TTS or Image uses RunningHub (most time-consuming parts)
|
||||
is_runninghub = (
|
||||
(config.tts_workflow and config.tts_workflow.startswith("runninghub/")) or
|
||||
(config.image_workflow and config.image_workflow.startswith("runninghub/"))
|
||||
(config.media_workflow and config.media_workflow.startswith("runninghub/"))
|
||||
)
|
||||
|
||||
if is_runninghub and RUNNING_HUB_PARALLEL_LIMIT > 1:
|
||||
logger.info(f"🚀 Using parallel processing for RunningHub workflows (max {RUNNING_HUB_PARALLEL_LIMIT} concurrent)")
|
||||
logger.info(f" TTS: {'runninghub' if config.tts_workflow and config.tts_workflow.startswith('runninghub/') else 'local'}")
|
||||
logger.info(f" Image: {'runninghub' if config.image_workflow and config.image_workflow.startswith('runninghub/') else 'local'}")
|
||||
logger.info(f" Media: {'runninghub' if config.media_workflow and config.media_workflow.startswith('runninghub/') else 'local'}")
|
||||
|
||||
semaphore = asyncio.Semaphore(RUNNING_HUB_PARALLEL_LIMIT)
|
||||
completed_count = 0
|
||||
@@ -541,7 +541,7 @@ class StandardPipeline(BasePipeline):
|
||||
"tts_workflow": tts_workflow,
|
||||
"tts_speed": tts_speed,
|
||||
"ref_audio": ref_audio,
|
||||
"image_workflow": image_workflow,
|
||||
"media_workflow": media_workflow,
|
||||
"prompt_prefix": prompt_prefix,
|
||||
"frame_template": frame_template,
|
||||
"template_params": template_params,
|
||||
|
||||
@@ -14,6 +14,10 @@
|
||||
Frame processor - Process single frame through complete pipeline
|
||||
|
||||
Orchestrates: TTS → Image Generation → Frame Composition → Video Segment
|
||||
|
||||
Key Feature:
|
||||
- TTS-driven video duration: Audio duration from TTS is passed to video generation workflows
|
||||
to ensure perfect sync between audio and video (no padding, no trimming needed)
|
||||
"""
|
||||
|
||||
from typing import Callable, Optional
|
||||
@@ -187,20 +191,29 @@ class FrameProcessor:
|
||||
|
||||
# Determine media type based on workflow
|
||||
# video_ prefix in workflow name indicates video generation
|
||||
workflow_name = config.image_workflow or ""
|
||||
workflow_name = config.media_workflow or ""
|
||||
is_video_workflow = "video_" in workflow_name.lower()
|
||||
media_type = "video" if is_video_workflow else "image"
|
||||
|
||||
logger.debug(f" → Media type: {media_type} (workflow: {workflow_name})")
|
||||
|
||||
# Call Media generation (with optional preset)
|
||||
media_result = await self.core.media(
|
||||
prompt=frame.image_prompt,
|
||||
workflow=config.image_workflow, # Pass workflow from config (None = use default)
|
||||
media_type=media_type,
|
||||
width=config.image_width,
|
||||
height=config.image_height
|
||||
)
|
||||
# Build media generation parameters
|
||||
media_params = {
|
||||
"prompt": frame.image_prompt,
|
||||
"workflow": config.media_workflow, # Pass workflow from config (None = use default)
|
||||
"media_type": media_type,
|
||||
"width": config.media_width,
|
||||
"height": config.media_height
|
||||
}
|
||||
|
||||
# For video workflows: pass audio duration as target video duration
|
||||
# This ensures video length matches audio length from the source
|
||||
if is_video_workflow and frame.duration:
|
||||
media_params["duration"] = frame.duration
|
||||
logger.info(f" → Generating video with target duration: {frame.duration:.2f}s (from TTS audio)")
|
||||
|
||||
# Call Media generation
|
||||
media_result = await self.core.media(**media_params)
|
||||
|
||||
# Store media type
|
||||
frame.media_type = media_result.media_type
|
||||
|
||||
@@ -119,6 +119,7 @@ class MediaService(ComfyBaseService):
|
||||
# Common workflow parameters
|
||||
width: Optional[int] = None,
|
||||
height: Optional[int] = None,
|
||||
duration: Optional[float] = None, # Video duration in seconds (for video workflows)
|
||||
negative_prompt: Optional[str] = None,
|
||||
steps: Optional[int] = None,
|
||||
seed: Optional[int] = None,
|
||||
@@ -140,6 +141,7 @@ class MediaService(ComfyBaseService):
|
||||
runninghub_api_key: RunningHub API key (optional, overrides config)
|
||||
width: Media width
|
||||
height: Media height
|
||||
duration: Target video duration in seconds (only for video workflows, typically from TTS audio duration)
|
||||
negative_prompt: Negative prompt
|
||||
steps: Sampling steps
|
||||
seed: Random seed
|
||||
@@ -203,6 +205,10 @@ class MediaService(ComfyBaseService):
|
||||
workflow_params["width"] = width
|
||||
if height is not None:
|
||||
workflow_params["height"] = height
|
||||
if duration is not None:
|
||||
workflow_params["duration"] = duration
|
||||
if media_type == "video":
|
||||
logger.info(f"📏 Target video duration: {duration:.2f}s (from TTS audio)")
|
||||
if negative_prompt is not None:
|
||||
workflow_params["negative_prompt"] = negative_prompt
|
||||
if steps is not None:
|
||||
|
||||
@@ -380,9 +380,9 @@ class PersistenceService:
|
||||
"tts_workflow": config.tts_workflow,
|
||||
"tts_speed": config.tts_speed,
|
||||
"ref_audio": config.ref_audio,
|
||||
"image_width": config.image_width,
|
||||
"image_height": config.image_height,
|
||||
"image_workflow": config.image_workflow,
|
||||
"media_width": config.media_width,
|
||||
"media_height": config.media_height,
|
||||
"media_workflow": config.media_workflow,
|
||||
"frame_template": config.frame_template,
|
||||
"template_params": config.template_params,
|
||||
}
|
||||
@@ -402,9 +402,9 @@ class PersistenceService:
|
||||
tts_workflow=data.get("tts_workflow"),
|
||||
tts_speed=data.get("tts_speed"),
|
||||
ref_audio=data.get("ref_audio"),
|
||||
image_width=data.get("image_width", 1024),
|
||||
image_height=data.get("image_height", 1024),
|
||||
image_workflow=data.get("image_workflow"),
|
||||
media_width=data.get("media_width", data.get("image_width", 1024)), # Backward compatibility
|
||||
media_height=data.get("media_height", data.get("image_height", 1024)), # Backward compatibility
|
||||
media_workflow=data.get("media_workflow", data.get("image_workflow")), # Backward compatibility
|
||||
frame_template=data.get("frame_template", "1080x1920/default.html"),
|
||||
template_params=data.get("template_params"),
|
||||
)
|
||||
|
||||
@@ -27,6 +27,7 @@ Note: Requires FFmpeg to be installed on the system.
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import List, Literal, Optional
|
||||
|
||||
@@ -316,12 +317,16 @@ class VideoService:
|
||||
audio_volume: float = 1.0,
|
||||
video_volume: float = 0.0,
|
||||
pad_strategy: str = "freeze", # "freeze" (freeze last frame) or "black" (black screen)
|
||||
auto_adjust_duration: bool = True, # Automatically adjust video duration to match audio
|
||||
duration_tolerance: float = 0.3, # Tolerance for video being longer than audio (seconds)
|
||||
) -> str:
|
||||
"""
|
||||
Merge audio with video, using the longer duration
|
||||
Merge audio with video with intelligent duration adjustment
|
||||
|
||||
The output video duration will be the maximum of video and audio duration.
|
||||
If audio is longer than video, the video will be padded using the specified strategy.
|
||||
Automatically handles duration mismatches between video and audio:
|
||||
- If video < audio: Pad video to match audio (avoid black screen)
|
||||
- If video > audio (within tolerance): Keep as-is (acceptable)
|
||||
- If video > audio (exceeds tolerance): Trim video to match audio
|
||||
|
||||
Automatically handles videos with or without audio streams.
|
||||
- If video has no audio: adds the audio track
|
||||
@@ -339,6 +344,9 @@ class VideoService:
|
||||
pad_strategy: Strategy to pad video if audio is longer
|
||||
- "freeze": Freeze last frame (default)
|
||||
- "black": Fill with black screen
|
||||
auto_adjust_duration: Enable intelligent duration adjustment (default: True)
|
||||
duration_tolerance: Tolerance for video being longer than audio in seconds (default: 0.3)
|
||||
Videos within this tolerance won't be trimmed
|
||||
|
||||
Returns:
|
||||
Path to the output video file
|
||||
@@ -361,6 +369,28 @@ class VideoService:
|
||||
|
||||
logger.info(f"Video duration: {video_duration:.2f}s, Audio duration: {audio_duration:.2f}s")
|
||||
|
||||
# Intelligent duration adjustment (if enabled)
|
||||
if auto_adjust_duration:
|
||||
diff = video_duration - audio_duration
|
||||
|
||||
if diff < 0:
|
||||
# Video shorter than audio → Must pad to avoid black screen
|
||||
logger.warning(f"⚠️ Video shorter than audio by {abs(diff):.2f}s, padding required")
|
||||
video = self._pad_video_to_duration(video, audio_duration, pad_strategy)
|
||||
video_duration = audio_duration # Update duration after padding
|
||||
logger.info(f"📌 Padded video to {audio_duration:.2f}s")
|
||||
|
||||
elif diff > duration_tolerance:
|
||||
# Video significantly longer than audio → Trim
|
||||
logger.info(f"⚠️ Video longer than audio by {diff:.2f}s (tolerance: {duration_tolerance}s)")
|
||||
video = self._trim_video_to_duration(video, audio_duration)
|
||||
video_duration = audio_duration # Update duration after trimming
|
||||
logger.info(f"✂️ Trimmed video to {audio_duration:.2f}s")
|
||||
|
||||
else: # 0 <= diff <= duration_tolerance
|
||||
# Video slightly longer but within tolerance → Keep as-is
|
||||
logger.info(f"✅ Duration acceptable: video={video_duration:.2f}s, audio={audio_duration:.2f}s (diff={diff:.2f}s)")
|
||||
|
||||
# Determine target duration (max of both)
|
||||
target_duration = max(video_duration, audio_duration)
|
||||
logger.info(f"Target output duration: {target_duration:.2f}s")
|
||||
@@ -382,9 +412,6 @@ class VideoService:
|
||||
video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration)
|
||||
else: # black
|
||||
# Generate black frames for padding duration
|
||||
from pixelle_video.utils.os_util import get_temp_path
|
||||
import os
|
||||
|
||||
# Get video properties
|
||||
probe = ffmpeg.probe(video)
|
||||
video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
|
||||
@@ -395,7 +422,7 @@ class VideoService:
|
||||
fps = fps_num / fps_den if fps_den != 0 else 30
|
||||
|
||||
# Create black video for padding
|
||||
black_video_path = get_temp_path(f"black_pad_{os.path.basename(output)}")
|
||||
black_video_path = self._get_unique_temp_path("black_pad", os.path.basename(output))
|
||||
black_input = ffmpeg.input(
|
||||
f'color=c=black:s={width}x{height}:r={fps}',
|
||||
f='lavfi',
|
||||
@@ -778,6 +805,26 @@ class VideoService:
|
||||
fade_in=0.0
|
||||
)
|
||||
|
||||
def _get_unique_temp_path(self, prefix: str, original_filename: str) -> str:
|
||||
"""
|
||||
Generate unique temporary file path to avoid concurrent conflicts
|
||||
|
||||
Args:
|
||||
prefix: Prefix for the temp file (e.g., "trimmed", "padded", "black_pad")
|
||||
original_filename: Original filename to preserve in temp path
|
||||
|
||||
Returns:
|
||||
Unique temporary file path with format: temp/{prefix}_{uuid}_{original_filename}
|
||||
|
||||
Example:
|
||||
>>> self._get_unique_temp_path("trimmed", "video.mp4")
|
||||
>>> # Returns: "temp/trimmed_a3f2d8c1_video.mp4"
|
||||
"""
|
||||
from pixelle_video.utils.os_util import get_temp_path
|
||||
|
||||
unique_id = uuid.uuid4().hex[:8]
|
||||
return get_temp_path(f"{prefix}_{unique_id}_{original_filename}")
|
||||
|
||||
def _resolve_bgm_path(self, bgm_path: str) -> str:
|
||||
"""
|
||||
Resolve BGM path (filename or custom path) with custom override support
|
||||
@@ -841,4 +888,120 @@ class VideoService:
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to list BGM files: {e}")
|
||||
return []
|
||||
|
||||
def _trim_video_to_duration(self, video: str, target_duration: float) -> str:
|
||||
"""
|
||||
Trim video to specified duration
|
||||
|
||||
Args:
|
||||
video: Input video file path
|
||||
target_duration: Target duration in seconds
|
||||
|
||||
Returns:
|
||||
Path to trimmed video (temp file)
|
||||
|
||||
Raises:
|
||||
RuntimeError: If FFmpeg execution fails
|
||||
"""
|
||||
output = self._get_unique_temp_path("trimmed", os.path.basename(video))
|
||||
|
||||
try:
|
||||
# Use stream copy when possible for fast trimming
|
||||
(
|
||||
ffmpeg
|
||||
.input(video, t=target_duration)
|
||||
.output(output, vcodec='copy', acodec='copy' if self.has_audio_stream(video) else 'copy')
|
||||
.overwrite_output()
|
||||
.run(capture_stdout=True, capture_stderr=True, quiet=True)
|
||||
)
|
||||
return output
|
||||
except ffmpeg.Error as e:
|
||||
error_msg = e.stderr.decode() if e.stderr else str(e)
|
||||
logger.error(f"FFmpeg error trimming video: {error_msg}")
|
||||
raise RuntimeError(f"Failed to trim video: {error_msg}")
|
||||
|
||||
def _pad_video_to_duration(self, video: str, target_duration: float, pad_strategy: str = "freeze") -> str:
|
||||
"""
|
||||
Pad video to specified duration by extending the last frame or adding black frames
|
||||
|
||||
Args:
|
||||
video: Input video file path
|
||||
target_duration: Target duration in seconds
|
||||
pad_strategy: Padding strategy - "freeze" (freeze last frame) or "black" (black screen)
|
||||
|
||||
Returns:
|
||||
Path to padded video (temp file)
|
||||
|
||||
Raises:
|
||||
RuntimeError: If FFmpeg execution fails
|
||||
"""
|
||||
output = self._get_unique_temp_path("padded", os.path.basename(video))
|
||||
|
||||
video_duration = self._get_video_duration(video)
|
||||
pad_duration = target_duration - video_duration
|
||||
|
||||
if pad_duration <= 0:
|
||||
# No padding needed, return original
|
||||
return video
|
||||
|
||||
try:
|
||||
input_video = ffmpeg.input(video)
|
||||
video_stream = input_video.video
|
||||
|
||||
if pad_strategy == "freeze":
|
||||
# Freeze last frame using tpad filter
|
||||
video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration)
|
||||
|
||||
# Output with re-encoding (tpad requires it)
|
||||
(
|
||||
ffmpeg
|
||||
.output(
|
||||
video_stream,
|
||||
output,
|
||||
vcodec='libx264',
|
||||
preset='fast',
|
||||
crf=23
|
||||
)
|
||||
.overwrite_output()
|
||||
.run(capture_stdout=True, capture_stderr=True, quiet=True)
|
||||
)
|
||||
else: # black
|
||||
# Generate black frames for padding duration
|
||||
# Get video properties
|
||||
probe = ffmpeg.probe(video)
|
||||
video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
|
||||
width = int(video_info['width'])
|
||||
height = int(video_info['height'])
|
||||
fps_str = video_info['r_frame_rate']
|
||||
fps_num, fps_den = map(int, fps_str.split('/'))
|
||||
fps = fps_num / fps_den if fps_den != 0 else 30
|
||||
|
||||
# Create black video for padding
|
||||
black_input = ffmpeg.input(
|
||||
f'color=c=black:s={width}x{height}:r={fps}',
|
||||
f='lavfi',
|
||||
t=pad_duration
|
||||
)
|
||||
|
||||
# Concatenate original video with black padding
|
||||
video_stream = ffmpeg.concat(video_stream, black_input.video, v=1, a=0)
|
||||
|
||||
(
|
||||
ffmpeg
|
||||
.output(
|
||||
video_stream,
|
||||
output,
|
||||
vcodec='libx264',
|
||||
preset='fast',
|
||||
crf=23
|
||||
)
|
||||
.overwrite_output()
|
||||
.run(capture_stdout=True, capture_stderr=True, quiet=True)
|
||||
)
|
||||
|
||||
return output
|
||||
except ffmpeg.Error as e:
|
||||
error_msg = e.stderr.decode() if e.stderr else str(e)
|
||||
logger.error(f"FFmpeg error padding video: {error_msg}")
|
||||
raise RuntimeError(f"Failed to pad video: {error_msg}")
|
||||
|
||||
|
||||
@@ -83,6 +83,8 @@ def get_temp_path(*paths: str) -> str:
|
||||
"""
|
||||
Get path relative to Pixelle-Video temp folder
|
||||
|
||||
Ensures temp directory exists before returning path.
|
||||
|
||||
Args:
|
||||
*paths: Path components to join
|
||||
|
||||
@@ -94,6 +96,10 @@ def get_temp_path(*paths: str) -> str:
|
||||
# Returns: "/path/to/project/temp/audio.mp3"
|
||||
"""
|
||||
temp_path = get_root_path("temp")
|
||||
|
||||
# Ensure temp directory exists
|
||||
os.makedirs(temp_path, exist_ok=True)
|
||||
|
||||
if paths:
|
||||
return os.path.join(temp_path, *paths)
|
||||
return temp_path
|
||||
@@ -102,6 +108,8 @@ def get_temp_path(*paths: str) -> str:
|
||||
def get_data_path(*paths: str) -> str:
|
||||
"""
|
||||
Get path relative to Pixelle-Video data folder
|
||||
|
||||
Ensures data directory exists before returning path.
|
||||
|
||||
Args:
|
||||
*paths: Path components to join
|
||||
@@ -114,6 +122,10 @@ def get_data_path(*paths: str) -> str:
|
||||
# Returns: "/path/to/project/data/videos/output.mp4"
|
||||
"""
|
||||
data_path = get_root_path("data")
|
||||
|
||||
# Ensure data directory exists
|
||||
os.makedirs(data_path, exist_ok=True)
|
||||
|
||||
if paths:
|
||||
return os.path.join(data_path, *paths)
|
||||
return data_path
|
||||
@@ -122,6 +134,8 @@ def get_data_path(*paths: str) -> str:
|
||||
def get_output_path(*paths: str) -> str:
|
||||
"""
|
||||
Get path relative to Pixelle-Video output folder
|
||||
|
||||
Ensures output directory exists before returning path.
|
||||
|
||||
Args:
|
||||
*paths: Path components to join
|
||||
@@ -134,6 +148,10 @@ def get_output_path(*paths: str) -> str:
|
||||
# Returns: "/path/to/project/output/video.mp4"
|
||||
"""
|
||||
output_path = get_root_path("output")
|
||||
|
||||
# Ensure output directory exists
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
|
||||
if paths:
|
||||
return os.path.join(output_path, *paths)
|
||||
return output_path
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "pixelle-video"
|
||||
version = "0.1.6"
|
||||
version = "0.1.7"
|
||||
description = "AI-powered video creation platform - Part of Pixelle ecosystem"
|
||||
authors = [
|
||||
{name = "Pixelle.AI"}
|
||||
|
||||
2
uv.lock
generated
2
uv.lock
generated
@@ -1664,7 +1664,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pixelle-video"
|
||||
version = "0.1.6"
|
||||
version = "0.1.7"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "beautifulsoup4" },
|
||||
|
||||
@@ -58,7 +58,7 @@ def render_single_output(pixelle_video, video_params):
|
||||
|
||||
frame_template = video_params.get("frame_template")
|
||||
custom_values_for_video = video_params.get("template_params", {})
|
||||
workflow_key = video_params.get("image_workflow")
|
||||
workflow_key = video_params.get("media_workflow")
|
||||
prompt_prefix = video_params.get("prompt_prefix", "")
|
||||
|
||||
with st.container(border=True):
|
||||
@@ -123,18 +123,20 @@ def render_single_output(pixelle_video, video_params):
|
||||
progress_bar.progress(min(int(event.progress * 100), 99)) # Cap at 99% until complete
|
||||
|
||||
# Generate video (directly pass parameters)
|
||||
# Note: image_width and image_height are now auto-determined from template
|
||||
# Note: media_width and media_height are auto-determined from template
|
||||
video_params = {
|
||||
"text": text,
|
||||
"mode": mode,
|
||||
"title": title if title else None,
|
||||
"n_scenes": n_scenes,
|
||||
"image_workflow": workflow_key,
|
||||
"media_workflow": workflow_key,
|
||||
"frame_template": frame_template,
|
||||
"prompt_prefix": prompt_prefix,
|
||||
"bgm_path": bgm_path,
|
||||
"bgm_volume": bgm_volume if bgm_path else 0.2,
|
||||
"progress_callback": update_progress,
|
||||
"media_width": st.session_state.get('template_media_width'),
|
||||
"media_height": st.session_state.get('template_media_height'),
|
||||
}
|
||||
|
||||
# Add TTS parameters based on mode
|
||||
@@ -245,12 +247,14 @@ def render_batch_output(pixelle_video, video_params):
|
||||
shared_config = {
|
||||
"title_prefix": video_params.get("title_prefix"),
|
||||
"n_scenes": video_params.get("n_scenes") or 5,
|
||||
"image_workflow": video_params.get("image_workflow"),
|
||||
"media_workflow": video_params.get("media_workflow"),
|
||||
"frame_template": video_params.get("frame_template"),
|
||||
"prompt_prefix": video_params.get("prompt_prefix") or "",
|
||||
"bgm_path": video_params.get("bgm_path"),
|
||||
"bgm_volume": video_params.get("bgm_volume") or 0.2,
|
||||
"tts_inference_mode": video_params.get("tts_inference_mode") or "local",
|
||||
"media_width": video_params.get("media_width"),
|
||||
"media_height": video_params.get("media_height"),
|
||||
}
|
||||
|
||||
# Add TTS parameters based on mode (only add non-None values)
|
||||
@@ -368,13 +372,28 @@ def render_batch_output(pixelle_video, video_params):
|
||||
st.success(tr("batch.success_message"))
|
||||
st.info(tr("batch.view_in_history"))
|
||||
|
||||
# Button to go to History page
|
||||
if st.button(
|
||||
f"📚 {tr('batch.goto_history')}",
|
||||
type="secondary",
|
||||
use_container_width=True
|
||||
):
|
||||
st.switch_page("pages/2_📚_History.py")
|
||||
# Button to go to History page using JavaScript URL navigation
|
||||
st.markdown(
|
||||
f"""
|
||||
<a href="/History" target="_blank">
|
||||
<button style="
|
||||
width: 100%;
|
||||
padding: 0.5rem 1rem;
|
||||
background-color: white;
|
||||
color: rgb(49, 51, 63);
|
||||
border: 1px solid rgba(49, 51, 63, 0.2);
|
||||
border-radius: 0.5rem;
|
||||
cursor: pointer;
|
||||
font-size: 1rem;
|
||||
font-weight: 400;
|
||||
text-align: center;
|
||||
">
|
||||
📚 {tr('batch.goto_history')}
|
||||
</button>
|
||||
</a>
|
||||
""",
|
||||
unsafe_allow_html=True
|
||||
)
|
||||
|
||||
# Show failed tasks if any
|
||||
if batch_result["errors"]:
|
||||
|
||||
@@ -610,7 +610,7 @@ def render_style_config(pixelle_video):
|
||||
workflow_options if workflow_options else ["No workflows found"],
|
||||
index=default_workflow_index,
|
||||
label_visibility="collapsed",
|
||||
key="image_workflow_select"
|
||||
key="media_workflow_select"
|
||||
)
|
||||
|
||||
# Get the actual workflow key (e.g., "runninghub/image_flux.json")
|
||||
@@ -621,14 +621,14 @@ def render_style_config(pixelle_video):
|
||||
workflow_key = "runninghub/image_flux.json" # fallback
|
||||
|
||||
# Get media size from template
|
||||
image_width = st.session_state.get('template_media_width', 1024)
|
||||
image_height = st.session_state.get('template_media_height', 1024)
|
||||
media_width = st.session_state.get('template_media_width')
|
||||
media_height = st.session_state.get('template_media_height')
|
||||
|
||||
# Display media size info (read-only)
|
||||
if template_media_type == "video":
|
||||
size_info_text = tr('style.video_size_info', width=image_width, height=image_height)
|
||||
size_info_text = tr('style.video_size_info', width=media_width, height=media_height)
|
||||
else:
|
||||
size_info_text = tr('style.image_size_info', width=image_width, height=image_height)
|
||||
size_info_text = tr('style.image_size_info', width=media_width, height=media_height)
|
||||
st.info(f"📐 {size_info_text}")
|
||||
|
||||
# Prompt prefix input
|
||||
@@ -679,8 +679,8 @@ def render_style_config(pixelle_video):
|
||||
prompt=final_prompt,
|
||||
workflow=workflow_key,
|
||||
media_type=template_media_type,
|
||||
width=int(image_width),
|
||||
height=int(image_height)
|
||||
width=int(media_width),
|
||||
height=int(media_height)
|
||||
))
|
||||
preview_media_path = media_result.url
|
||||
|
||||
@@ -725,8 +725,8 @@ def render_style_config(pixelle_video):
|
||||
st.caption(tr("image.not_required_hint"))
|
||||
|
||||
# Get media size from template (even though not used, for consistency)
|
||||
image_width = st.session_state.get('template_media_width', 1024)
|
||||
image_height = st.session_state.get('template_media_height', 1024)
|
||||
media_width = st.session_state.get('template_media_width')
|
||||
media_height = st.session_state.get('template_media_height')
|
||||
|
||||
# Set default values for later use
|
||||
workflow_key = None
|
||||
@@ -741,6 +741,8 @@ def render_style_config(pixelle_video):
|
||||
"ref_audio": str(ref_audio_path) if ref_audio_path else None,
|
||||
"frame_template": frame_template,
|
||||
"template_params": custom_values_for_video if custom_values_for_video else None,
|
||||
"image_workflow": workflow_key,
|
||||
"prompt_prefix": prompt_prefix if prompt_prefix else ""
|
||||
"media_workflow": workflow_key,
|
||||
"prompt_prefix": prompt_prefix if prompt_prefix else "",
|
||||
"media_width": media_width,
|
||||
"media_height": media_height
|
||||
}
|
||||
|
||||
4
workflows/runninghub/video_wan2.2.json
Normal file
4
workflows/runninghub/video_wan2.2.json
Normal file
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"source": "runninghub",
|
||||
"workflow_id": "1991693844100100097"
|
||||
}
|
||||
64
workflows/selfhost/tts_index2.json
Normal file
64
workflows/selfhost/tts_index2.json
Normal file
@@ -0,0 +1,64 @@
|
||||
{
|
||||
"3": {
|
||||
"inputs": {
|
||||
"text": "床前明月光,疑是地上霜。"
|
||||
},
|
||||
"class_type": "Text _O",
|
||||
"_meta": {
|
||||
"title": "$text.text!"
|
||||
}
|
||||
},
|
||||
"5": {
|
||||
"inputs": {
|
||||
"text": [
|
||||
"3",
|
||||
0
|
||||
],
|
||||
"mode": "Auto",
|
||||
"do_sample_mode": "on",
|
||||
"temperature": 0.8,
|
||||
"top_p": 0.9,
|
||||
"top_k": 30,
|
||||
"num_beams": 3,
|
||||
"repetition_penalty": 10,
|
||||
"length_penalty": 0,
|
||||
"max_mel_tokens": 1815,
|
||||
"max_tokens_per_sentence": 120,
|
||||
"seed": 4266796044,
|
||||
"reference_audio": [
|
||||
"12",
|
||||
0
|
||||
]
|
||||
},
|
||||
"class_type": "IndexTTS2BaseNode",
|
||||
"_meta": {
|
||||
"title": "Index TTS 2 - Base"
|
||||
}
|
||||
},
|
||||
"8": {
|
||||
"inputs": {
|
||||
"filename_prefix": "audio/ComfyUI",
|
||||
"quality": "V0",
|
||||
"audioUI": "",
|
||||
"audio": [
|
||||
"5",
|
||||
0
|
||||
]
|
||||
},
|
||||
"class_type": "SaveAudioMP3",
|
||||
"_meta": {
|
||||
"title": "Save Audio (MP3)"
|
||||
}
|
||||
},
|
||||
"12": {
|
||||
"inputs": {
|
||||
"audio": "小裴钱.wav",
|
||||
"start_time": 0,
|
||||
"duration": 0
|
||||
},
|
||||
"class_type": "VHS_LoadAudioUpload",
|
||||
"_meta": {
|
||||
"title": "$ref_audio.audio"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user