This commit is contained in:
li
2025-11-21 17:27:04 +08:00
18 changed files with 438 additions and 96 deletions

View File

@@ -46,16 +46,15 @@ RUN uv --version
COPY pyproject.toml uv.lock README.md ./ COPY pyproject.toml uv.lock README.md ./
COPY pixelle_video ./pixelle_video COPY pixelle_video ./pixelle_video
# Install Python dependencies using uv with configurable index URL # Create virtual environment and install dependencies
# Create uv.toml config file to force using the mirror (most reliable method) # Use -i flag to specify mirror when USE_CN_MIRROR=true
# Only create config when USE_CN_MIRROR=true, otherwise use default PyPI RUN export UV_HTTP_TIMEOUT=300 && \
RUN if [ "$USE_CN_MIRROR" = "true" ]; then \ uv venv && \
echo '[[index]]' > uv.toml && \ if [ "$USE_CN_MIRROR" = "true" ]; then \
echo 'url = "https://pypi.tuna.tsinghua.edu.cn/simple"' >> uv.toml && \ uv pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple; \
echo 'default = true' >> uv.toml; \ else \
fi && \ uv pip install -e .; \
export UV_HTTP_TIMEOUT=300 && \ fi
uv sync --frozen --no-dev
# Copy rest of application code # Copy rest of application code
COPY api ./api COPY api ./api
@@ -65,8 +64,8 @@ COPY templates ./templates
COPY workflows ./workflows COPY workflows ./workflows
COPY resources ./resources COPY resources ./resources
# Create output and data directories # Create output, data and temp directories
RUN mkdir -p /app/output /app/data RUN mkdir -p /app/output /app/data /app/temp
# Set environment variables for html2image to use chromium # Set environment variables for html2image to use chromium
ENV BROWSER_EXECUTABLE_PATH=/usr/bin/chromium ENV BROWSER_EXECUTABLE_PATH=/usr/bin/chromium

View File

@@ -76,12 +76,12 @@ async def list_tts_workflows(pixelle_video: PixelleVideoDep):
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@router.get("/workflows/image", response_model=WorkflowListResponse) @router.get("/workflows/media", response_model=WorkflowListResponse)
async def list_image_workflows(pixelle_video: PixelleVideoDep): async def list_media_workflows(pixelle_video: PixelleVideoDep):
""" """
List available image generation workflows List available media workflows (both image and video)
Returns list of image workflows from both RunningHub and self-hosted sources. Returns list of all media workflows from both RunningHub and self-hosted sources.
Example response: Example response:
```json ```json
@@ -94,13 +94,41 @@ async def list_image_workflows(pixelle_video: PixelleVideoDep):
"path": "workflows/runninghub/image_flux.json", "path": "workflows/runninghub/image_flux.json",
"key": "runninghub/image_flux.json", "key": "runninghub/image_flux.json",
"workflow_id": "123456" "workflow_id": "123456"
},
{
"name": "video_wan2.1.json",
"display_name": "video_wan2.1.json - Runninghub",
"source": "runninghub",
"path": "workflows/runninghub/video_wan2.1.json",
"key": "runninghub/video_wan2.1.json",
"workflow_id": "123457"
} }
] ]
} }
``` ```
""" """
try: try:
# Get all workflows from media service (image generation is handled by media service) # Get all workflows from media service (includes both image and video)
all_workflows = pixelle_video.media.list_workflows()
media_workflows = [WorkflowInfo(**wf) for wf in all_workflows]
return WorkflowListResponse(workflows=media_workflows)
except Exception as e:
logger.error(f"List media workflows error: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Keep old endpoint for backward compatibility
@router.get("/workflows/image", response_model=WorkflowListResponse)
async def list_image_workflows(pixelle_video: PixelleVideoDep):
"""
List available image workflows (deprecated, use /workflows/media instead)
This endpoint is kept for backward compatibility but will filter to image_ workflows only.
"""
try:
all_workflows = pixelle_video.media.list_workflows() all_workflows = pixelle_video.media.list_workflows()
# Filter to image workflows only (filename starts with "image_") # Filter to image workflows only (filename starts with "image_")

View File

@@ -63,6 +63,17 @@ async def generate_video_sync(
try: try:
logger.info(f"Sync video generation: {request_body.text[:50]}...") logger.info(f"Sync video generation: {request_body.text[:50]}...")
# Auto-determine media_width and media_height from template meta tags (required)
if not request_body.frame_template:
raise ValueError("frame_template is required to determine media size")
from pixelle_video.services.frame_html import HTMLFrameGenerator
from pixelle_video.utils.template_util import resolve_template_path
template_path = resolve_template_path(request_body.frame_template)
generator = HTMLFrameGenerator(template_path)
media_width, media_height = generator.get_media_size()
logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}")
# Build video generation parameters # Build video generation parameters
video_params = { video_params = {
"text": request_body.text, "text": request_body.text,
@@ -73,8 +84,9 @@ async def generate_video_sync(
"max_narration_words": request_body.max_narration_words, "max_narration_words": request_body.max_narration_words,
"min_image_prompt_words": request_body.min_image_prompt_words, "min_image_prompt_words": request_body.min_image_prompt_words,
"max_image_prompt_words": request_body.max_image_prompt_words, "max_image_prompt_words": request_body.max_image_prompt_words,
# Note: image_width and image_height are now auto-determined from template "media_width": media_width,
"image_workflow": request_body.image_workflow, "media_height": media_height,
"media_workflow": request_body.media_workflow,
"video_fps": request_body.video_fps, "video_fps": request_body.video_fps,
"frame_template": request_body.frame_template, "frame_template": request_body.frame_template,
"prompt_prefix": request_body.prompt_prefix, "prompt_prefix": request_body.prompt_prefix,
@@ -150,6 +162,17 @@ async def generate_video_async(
# Define async execution function # Define async execution function
async def execute_video_generation(): async def execute_video_generation():
"""Execute video generation in background""" """Execute video generation in background"""
# Auto-determine media_width and media_height from template meta tags (required)
if not request_body.frame_template:
raise ValueError("frame_template is required to determine media size")
from pixelle_video.services.frame_html import HTMLFrameGenerator
from pixelle_video.utils.template_util import resolve_template_path
template_path = resolve_template_path(request_body.frame_template)
generator = HTMLFrameGenerator(template_path)
media_width, media_height = generator.get_media_size()
logger.debug(f"Auto-determined media size from template: {media_width}x{media_height}")
# Build video generation parameters # Build video generation parameters
video_params = { video_params = {
"text": request_body.text, "text": request_body.text,
@@ -160,8 +183,9 @@ async def generate_video_async(
"max_narration_words": request_body.max_narration_words, "max_narration_words": request_body.max_narration_words,
"min_image_prompt_words": request_body.min_image_prompt_words, "min_image_prompt_words": request_body.min_image_prompt_words,
"max_image_prompt_words": request_body.max_image_prompt_words, "max_image_prompt_words": request_body.max_image_prompt_words,
# Note: image_width and image_height are now auto-determined from template "media_width": media_width,
"image_workflow": request_body.image_workflow, "media_height": media_height,
"media_workflow": request_body.media_workflow,
"video_fps": request_body.video_fps, "video_fps": request_body.video_fps,
"frame_template": request_body.frame_template, "frame_template": request_body.frame_template,
"prompt_prefix": request_body.prompt_prefix, "prompt_prefix": request_body.prompt_prefix,

View File

@@ -56,9 +56,9 @@ class VideoGenerateRequest(BaseModel):
min_image_prompt_words: int = Field(30, ge=10, le=100, description="Min image prompt words") min_image_prompt_words: int = Field(30, ge=10, le=100, description="Min image prompt words")
max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words") max_image_prompt_words: int = Field(60, ge=10, le=200, description="Max image prompt words")
# === Image Parameters === # === Media Parameters ===
# Note: image_width and image_height are now auto-determined from template meta tags # Note: media_width and media_height are auto-determined from template meta tags
image_workflow: Optional[str] = Field(None, description="Custom image workflow") media_workflow: Optional[str] = Field(None, description="Custom media workflow (image or video)")
# === Video Parameters === # === Video Parameters ===
video_fps: int = Field(30, ge=15, le=60, description="Video FPS") video_fps: int = Field(30, ge=15, le=60, description="Video FPS")

View File

@@ -23,6 +23,10 @@ from typing import List, Optional, Dict, Any
class StoryboardConfig: class StoryboardConfig:
"""Storyboard configuration parameters""" """Storyboard configuration parameters"""
# Required parameters (must come first in dataclass)
media_width: int # Media width (image or video, required)
media_height: int # Media height (image or video, required)
# Task isolation # Task isolation
task_id: Optional[str] = None # Task ID for file isolation (auto-generated if None) task_id: Optional[str] = None # Task ID for file isolation (auto-generated if None)
@@ -42,10 +46,8 @@ class StoryboardConfig:
tts_speed: Optional[float] = None # TTS speed multiplier (0.5-2.0, 1.0 = normal) tts_speed: Optional[float] = None # TTS speed multiplier (0.5-2.0, 1.0 = normal)
ref_audio: Optional[str] = None # Reference audio for voice cloning (ComfyUI mode only) ref_audio: Optional[str] = None # Reference audio for voice cloning (ComfyUI mode only)
# Image parameters # Media workflow
image_width: int = 1024 media_workflow: Optional[str] = None # Media workflow filename (image or video, None = use default)
image_height: int = 1024
image_workflow: Optional[str] = None # Image workflow filename (None = use default)
# Frame template (includes size information in path) # Frame template (includes size information in path)
frame_template: str = "1080x1920/default.html" # Template path with size (e.g., "1080x1920/default.html") frame_template: str = "1080x1920/default.html" # Template path with size (e.g., "1080x1920/default.html")

View File

@@ -93,8 +93,8 @@ class CustomPipeline(BasePipeline):
tts_speed: float = 1.2, tts_speed: float = 1.2,
ref_audio: Optional[str] = None, ref_audio: Optional[str] = None,
image_workflow: Optional[str] = None, media_workflow: Optional[str] = None,
# Note: image_width and image_height are now auto-determined from template # Note: media_width and media_height are auto-determined from template
frame_template: Optional[str] = None, frame_template: Optional[str] = None,
video_fps: int = 30, video_fps: int = 30,
@@ -189,8 +189,8 @@ class CustomPipeline(BasePipeline):
# Read media size from template meta tags # Read media size from template meta tags
template_path = resolve_template_path(frame_template) template_path = resolve_template_path(frame_template)
generator = HTMLFrameGenerator(template_path) generator = HTMLFrameGenerator(template_path)
image_width, image_height = generator.get_media_size() media_width, media_height = generator.get_media_size()
logger.info(f"📐 Media size from template: {image_width}x{image_height}") logger.info(f"📐 Media size from template: {media_width}x{media_height}")
if template_type == "image": if template_type == "image":
logger.info(f"📸 Template requires image generation") logger.info(f"📸 Template requires image generation")
@@ -270,9 +270,9 @@ class CustomPipeline(BasePipeline):
tts_workflow=final_tts_workflow, # Use processed workflow tts_workflow=final_tts_workflow, # Use processed workflow
tts_speed=tts_speed, tts_speed=tts_speed,
ref_audio=ref_audio, ref_audio=ref_audio,
image_width=image_width, media_width=media_width,
image_height=image_height, media_height=media_height,
image_workflow=image_workflow, media_workflow=media_workflow,
frame_template=frame_template frame_template=frame_template
) )
@@ -387,7 +387,7 @@ class CustomPipeline(BasePipeline):
"tts_workflow": tts_workflow, "tts_workflow": tts_workflow,
"tts_speed": tts_speed, "tts_speed": tts_speed,
"ref_audio": ref_audio, "ref_audio": ref_audio,
"image_workflow": image_workflow, "media_workflow": media_workflow,
"frame_template": frame_template, "frame_template": frame_template,
"bgm_path": bgm_path, "bgm_path": bgm_path,
"bgm_volume": bgm_volume, "bgm_volume": bgm_volume,

View File

@@ -68,8 +68,10 @@ class StandardPipeline(BasePipeline):
async def __call__( async def __call__(
self, self,
# === Input === # === Input (Required) ===
text: str, text: str,
media_width: int, # Required: Media width (from template)
media_height: int, # Required: Media height (from template)
# === Processing Mode === # === Processing Mode ===
mode: Literal["generate", "fixed"] = "generate", mode: Literal["generate", "fixed"] = "generate",
@@ -95,10 +97,8 @@ class StandardPipeline(BasePipeline):
min_image_prompt_words: int = 30, min_image_prompt_words: int = 30,
max_image_prompt_words: int = 60, max_image_prompt_words: int = 60,
# === Image Parameters === # === Media Workflow ===
image_width: int = 1024, media_workflow: Optional[str] = None,
image_height: int = 1024,
image_workflow: Optional[str] = None,
# === Video Parameters === # === Video Parameters ===
video_fps: int = 30, video_fps: int = 30,
@@ -155,9 +155,9 @@ class StandardPipeline(BasePipeline):
min_image_prompt_words: Min image prompt length min_image_prompt_words: Min image prompt length
max_image_prompt_words: Max image prompt length max_image_prompt_words: Max image prompt length
image_width: Generated image width (default 1024) media_width: Media width (image or video, required)
image_height: Generated image height (default 1024) media_height: Media height (image or video, required)
image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default) media_workflow: Media workflow filename (image or video, e.g., "image_flux.json", "video_wan.json", None = use default)
video_fps: Video frame rate (default 30) video_fps: Video frame rate (default 30)
@@ -254,9 +254,9 @@ class StandardPipeline(BasePipeline):
tts_workflow=final_tts_workflow, # Use processed workflow tts_workflow=final_tts_workflow, # Use processed workflow
tts_speed=tts_speed, tts_speed=tts_speed,
ref_audio=ref_audio, ref_audio=ref_audio,
image_width=image_width, media_width=media_width,
image_height=image_height, media_height=media_height,
image_workflow=image_workflow, media_workflow=media_workflow,
frame_template=frame_template or "1080x1920/default.html", frame_template=frame_template or "1080x1920/default.html",
template_params=template_params # Custom template parameters template_params=template_params # Custom template parameters
) )
@@ -374,13 +374,13 @@ class StandardPipeline(BasePipeline):
# Enable parallel if either TTS or Image uses RunningHub (most time-consuming parts) # Enable parallel if either TTS or Image uses RunningHub (most time-consuming parts)
is_runninghub = ( is_runninghub = (
(config.tts_workflow and config.tts_workflow.startswith("runninghub/")) or (config.tts_workflow and config.tts_workflow.startswith("runninghub/")) or
(config.image_workflow and config.image_workflow.startswith("runninghub/")) (config.media_workflow and config.media_workflow.startswith("runninghub/"))
) )
if is_runninghub and RUNNING_HUB_PARALLEL_LIMIT > 1: if is_runninghub and RUNNING_HUB_PARALLEL_LIMIT > 1:
logger.info(f"🚀 Using parallel processing for RunningHub workflows (max {RUNNING_HUB_PARALLEL_LIMIT} concurrent)") logger.info(f"🚀 Using parallel processing for RunningHub workflows (max {RUNNING_HUB_PARALLEL_LIMIT} concurrent)")
logger.info(f" TTS: {'runninghub' if config.tts_workflow and config.tts_workflow.startswith('runninghub/') else 'local'}") logger.info(f" TTS: {'runninghub' if config.tts_workflow and config.tts_workflow.startswith('runninghub/') else 'local'}")
logger.info(f" Image: {'runninghub' if config.image_workflow and config.image_workflow.startswith('runninghub/') else 'local'}") logger.info(f" Media: {'runninghub' if config.media_workflow and config.media_workflow.startswith('runninghub/') else 'local'}")
semaphore = asyncio.Semaphore(RUNNING_HUB_PARALLEL_LIMIT) semaphore = asyncio.Semaphore(RUNNING_HUB_PARALLEL_LIMIT)
completed_count = 0 completed_count = 0
@@ -541,7 +541,7 @@ class StandardPipeline(BasePipeline):
"tts_workflow": tts_workflow, "tts_workflow": tts_workflow,
"tts_speed": tts_speed, "tts_speed": tts_speed,
"ref_audio": ref_audio, "ref_audio": ref_audio,
"image_workflow": image_workflow, "media_workflow": media_workflow,
"prompt_prefix": prompt_prefix, "prompt_prefix": prompt_prefix,
"frame_template": frame_template, "frame_template": frame_template,
"template_params": template_params, "template_params": template_params,

View File

@@ -14,6 +14,10 @@
Frame processor - Process single frame through complete pipeline Frame processor - Process single frame through complete pipeline
Orchestrates: TTS → Image Generation → Frame Composition → Video Segment Orchestrates: TTS → Image Generation → Frame Composition → Video Segment
Key Feature:
- TTS-driven video duration: Audio duration from TTS is passed to video generation workflows
to ensure perfect sync between audio and video (no padding, no trimming needed)
""" """
from typing import Callable, Optional from typing import Callable, Optional
@@ -187,20 +191,29 @@ class FrameProcessor:
# Determine media type based on workflow # Determine media type based on workflow
# video_ prefix in workflow name indicates video generation # video_ prefix in workflow name indicates video generation
workflow_name = config.image_workflow or "" workflow_name = config.media_workflow or ""
is_video_workflow = "video_" in workflow_name.lower() is_video_workflow = "video_" in workflow_name.lower()
media_type = "video" if is_video_workflow else "image" media_type = "video" if is_video_workflow else "image"
logger.debug(f" → Media type: {media_type} (workflow: {workflow_name})") logger.debug(f" → Media type: {media_type} (workflow: {workflow_name})")
# Call Media generation (with optional preset) # Build media generation parameters
media_result = await self.core.media( media_params = {
prompt=frame.image_prompt, "prompt": frame.image_prompt,
workflow=config.image_workflow, # Pass workflow from config (None = use default) "workflow": config.media_workflow, # Pass workflow from config (None = use default)
media_type=media_type, "media_type": media_type,
width=config.image_width, "width": config.media_width,
height=config.image_height "height": config.media_height
) }
# For video workflows: pass audio duration as target video duration
# This ensures video length matches audio length from the source
if is_video_workflow and frame.duration:
media_params["duration"] = frame.duration
logger.info(f" → Generating video with target duration: {frame.duration:.2f}s (from TTS audio)")
# Call Media generation
media_result = await self.core.media(**media_params)
# Store media type # Store media type
frame.media_type = media_result.media_type frame.media_type = media_result.media_type

View File

@@ -119,6 +119,7 @@ class MediaService(ComfyBaseService):
# Common workflow parameters # Common workflow parameters
width: Optional[int] = None, width: Optional[int] = None,
height: Optional[int] = None, height: Optional[int] = None,
duration: Optional[float] = None, # Video duration in seconds (for video workflows)
negative_prompt: Optional[str] = None, negative_prompt: Optional[str] = None,
steps: Optional[int] = None, steps: Optional[int] = None,
seed: Optional[int] = None, seed: Optional[int] = None,
@@ -140,6 +141,7 @@ class MediaService(ComfyBaseService):
runninghub_api_key: RunningHub API key (optional, overrides config) runninghub_api_key: RunningHub API key (optional, overrides config)
width: Media width width: Media width
height: Media height height: Media height
duration: Target video duration in seconds (only for video workflows, typically from TTS audio duration)
negative_prompt: Negative prompt negative_prompt: Negative prompt
steps: Sampling steps steps: Sampling steps
seed: Random seed seed: Random seed
@@ -203,6 +205,10 @@ class MediaService(ComfyBaseService):
workflow_params["width"] = width workflow_params["width"] = width
if height is not None: if height is not None:
workflow_params["height"] = height workflow_params["height"] = height
if duration is not None:
workflow_params["duration"] = duration
if media_type == "video":
logger.info(f"📏 Target video duration: {duration:.2f}s (from TTS audio)")
if negative_prompt is not None: if negative_prompt is not None:
workflow_params["negative_prompt"] = negative_prompt workflow_params["negative_prompt"] = negative_prompt
if steps is not None: if steps is not None:

View File

@@ -380,9 +380,9 @@ class PersistenceService:
"tts_workflow": config.tts_workflow, "tts_workflow": config.tts_workflow,
"tts_speed": config.tts_speed, "tts_speed": config.tts_speed,
"ref_audio": config.ref_audio, "ref_audio": config.ref_audio,
"image_width": config.image_width, "media_width": config.media_width,
"image_height": config.image_height, "media_height": config.media_height,
"image_workflow": config.image_workflow, "media_workflow": config.media_workflow,
"frame_template": config.frame_template, "frame_template": config.frame_template,
"template_params": config.template_params, "template_params": config.template_params,
} }
@@ -402,9 +402,9 @@ class PersistenceService:
tts_workflow=data.get("tts_workflow"), tts_workflow=data.get("tts_workflow"),
tts_speed=data.get("tts_speed"), tts_speed=data.get("tts_speed"),
ref_audio=data.get("ref_audio"), ref_audio=data.get("ref_audio"),
image_width=data.get("image_width", 1024), media_width=data.get("media_width", data.get("image_width", 1024)), # Backward compatibility
image_height=data.get("image_height", 1024), media_height=data.get("media_height", data.get("image_height", 1024)), # Backward compatibility
image_workflow=data.get("image_workflow"), media_workflow=data.get("media_workflow", data.get("image_workflow")), # Backward compatibility
frame_template=data.get("frame_template", "1080x1920/default.html"), frame_template=data.get("frame_template", "1080x1920/default.html"),
template_params=data.get("template_params"), template_params=data.get("template_params"),
) )

View File

@@ -27,6 +27,7 @@ Note: Requires FFmpeg to be installed on the system.
import os import os
import shutil import shutil
import tempfile import tempfile
import uuid
from pathlib import Path from pathlib import Path
from typing import List, Literal, Optional from typing import List, Literal, Optional
@@ -316,12 +317,16 @@ class VideoService:
audio_volume: float = 1.0, audio_volume: float = 1.0,
video_volume: float = 0.0, video_volume: float = 0.0,
pad_strategy: str = "freeze", # "freeze" (freeze last frame) or "black" (black screen) pad_strategy: str = "freeze", # "freeze" (freeze last frame) or "black" (black screen)
auto_adjust_duration: bool = True, # Automatically adjust video duration to match audio
duration_tolerance: float = 0.3, # Tolerance for video being longer than audio (seconds)
) -> str: ) -> str:
""" """
Merge audio with video, using the longer duration Merge audio with video with intelligent duration adjustment
The output video duration will be the maximum of video and audio duration. Automatically handles duration mismatches between video and audio:
If audio is longer than video, the video will be padded using the specified strategy. - If video < audio: Pad video to match audio (avoid black screen)
- If video > audio (within tolerance): Keep as-is (acceptable)
- If video > audio (exceeds tolerance): Trim video to match audio
Automatically handles videos with or without audio streams. Automatically handles videos with or without audio streams.
- If video has no audio: adds the audio track - If video has no audio: adds the audio track
@@ -339,6 +344,9 @@ class VideoService:
pad_strategy: Strategy to pad video if audio is longer pad_strategy: Strategy to pad video if audio is longer
- "freeze": Freeze last frame (default) - "freeze": Freeze last frame (default)
- "black": Fill with black screen - "black": Fill with black screen
auto_adjust_duration: Enable intelligent duration adjustment (default: True)
duration_tolerance: Tolerance for video being longer than audio in seconds (default: 0.3)
Videos within this tolerance won't be trimmed
Returns: Returns:
Path to the output video file Path to the output video file
@@ -361,6 +369,28 @@ class VideoService:
logger.info(f"Video duration: {video_duration:.2f}s, Audio duration: {audio_duration:.2f}s") logger.info(f"Video duration: {video_duration:.2f}s, Audio duration: {audio_duration:.2f}s")
# Intelligent duration adjustment (if enabled)
if auto_adjust_duration:
diff = video_duration - audio_duration
if diff < 0:
# Video shorter than audio → Must pad to avoid black screen
logger.warning(f"⚠️ Video shorter than audio by {abs(diff):.2f}s, padding required")
video = self._pad_video_to_duration(video, audio_duration, pad_strategy)
video_duration = audio_duration # Update duration after padding
logger.info(f"📌 Padded video to {audio_duration:.2f}s")
elif diff > duration_tolerance:
# Video significantly longer than audio → Trim
logger.info(f"⚠️ Video longer than audio by {diff:.2f}s (tolerance: {duration_tolerance}s)")
video = self._trim_video_to_duration(video, audio_duration)
video_duration = audio_duration # Update duration after trimming
logger.info(f"✂️ Trimmed video to {audio_duration:.2f}s")
else: # 0 <= diff <= duration_tolerance
# Video slightly longer but within tolerance → Keep as-is
logger.info(f"✅ Duration acceptable: video={video_duration:.2f}s, audio={audio_duration:.2f}s (diff={diff:.2f}s)")
# Determine target duration (max of both) # Determine target duration (max of both)
target_duration = max(video_duration, audio_duration) target_duration = max(video_duration, audio_duration)
logger.info(f"Target output duration: {target_duration:.2f}s") logger.info(f"Target output duration: {target_duration:.2f}s")
@@ -382,9 +412,6 @@ class VideoService:
video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration) video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration)
else: # black else: # black
# Generate black frames for padding duration # Generate black frames for padding duration
from pixelle_video.utils.os_util import get_temp_path
import os
# Get video properties # Get video properties
probe = ffmpeg.probe(video) probe = ffmpeg.probe(video)
video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video') video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
@@ -395,7 +422,7 @@ class VideoService:
fps = fps_num / fps_den if fps_den != 0 else 30 fps = fps_num / fps_den if fps_den != 0 else 30
# Create black video for padding # Create black video for padding
black_video_path = get_temp_path(f"black_pad_{os.path.basename(output)}") black_video_path = self._get_unique_temp_path("black_pad", os.path.basename(output))
black_input = ffmpeg.input( black_input = ffmpeg.input(
f'color=c=black:s={width}x{height}:r={fps}', f'color=c=black:s={width}x{height}:r={fps}',
f='lavfi', f='lavfi',
@@ -778,6 +805,26 @@ class VideoService:
fade_in=0.0 fade_in=0.0
) )
def _get_unique_temp_path(self, prefix: str, original_filename: str) -> str:
"""
Generate unique temporary file path to avoid concurrent conflicts
Args:
prefix: Prefix for the temp file (e.g., "trimmed", "padded", "black_pad")
original_filename: Original filename to preserve in temp path
Returns:
Unique temporary file path with format: temp/{prefix}_{uuid}_{original_filename}
Example:
>>> self._get_unique_temp_path("trimmed", "video.mp4")
>>> # Returns: "temp/trimmed_a3f2d8c1_video.mp4"
"""
from pixelle_video.utils.os_util import get_temp_path
unique_id = uuid.uuid4().hex[:8]
return get_temp_path(f"{prefix}_{unique_id}_{original_filename}")
def _resolve_bgm_path(self, bgm_path: str) -> str: def _resolve_bgm_path(self, bgm_path: str) -> str:
""" """
Resolve BGM path (filename or custom path) with custom override support Resolve BGM path (filename or custom path) with custom override support
@@ -841,4 +888,120 @@ class VideoService:
except Exception as e: except Exception as e:
logger.warning(f"Failed to list BGM files: {e}") logger.warning(f"Failed to list BGM files: {e}")
return [] return []
def _trim_video_to_duration(self, video: str, target_duration: float) -> str:
"""
Trim video to specified duration
Args:
video: Input video file path
target_duration: Target duration in seconds
Returns:
Path to trimmed video (temp file)
Raises:
RuntimeError: If FFmpeg execution fails
"""
output = self._get_unique_temp_path("trimmed", os.path.basename(video))
try:
# Use stream copy when possible for fast trimming
(
ffmpeg
.input(video, t=target_duration)
.output(output, vcodec='copy', acodec='copy' if self.has_audio_stream(video) else 'copy')
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True, quiet=True)
)
return output
except ffmpeg.Error as e:
error_msg = e.stderr.decode() if e.stderr else str(e)
logger.error(f"FFmpeg error trimming video: {error_msg}")
raise RuntimeError(f"Failed to trim video: {error_msg}")
def _pad_video_to_duration(self, video: str, target_duration: float, pad_strategy: str = "freeze") -> str:
"""
Pad video to specified duration by extending the last frame or adding black frames
Args:
video: Input video file path
target_duration: Target duration in seconds
pad_strategy: Padding strategy - "freeze" (freeze last frame) or "black" (black screen)
Returns:
Path to padded video (temp file)
Raises:
RuntimeError: If FFmpeg execution fails
"""
output = self._get_unique_temp_path("padded", os.path.basename(video))
video_duration = self._get_video_duration(video)
pad_duration = target_duration - video_duration
if pad_duration <= 0:
# No padding needed, return original
return video
try:
input_video = ffmpeg.input(video)
video_stream = input_video.video
if pad_strategy == "freeze":
# Freeze last frame using tpad filter
video_stream = video_stream.filter('tpad', stop_mode='clone', stop_duration=pad_duration)
# Output with re-encoding (tpad requires it)
(
ffmpeg
.output(
video_stream,
output,
vcodec='libx264',
preset='fast',
crf=23
)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True, quiet=True)
)
else: # black
# Generate black frames for padding duration
# Get video properties
probe = ffmpeg.probe(video)
video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
width = int(video_info['width'])
height = int(video_info['height'])
fps_str = video_info['r_frame_rate']
fps_num, fps_den = map(int, fps_str.split('/'))
fps = fps_num / fps_den if fps_den != 0 else 30
# Create black video for padding
black_input = ffmpeg.input(
f'color=c=black:s={width}x{height}:r={fps}',
f='lavfi',
t=pad_duration
)
# Concatenate original video with black padding
video_stream = ffmpeg.concat(video_stream, black_input.video, v=1, a=0)
(
ffmpeg
.output(
video_stream,
output,
vcodec='libx264',
preset='fast',
crf=23
)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True, quiet=True)
)
return output
except ffmpeg.Error as e:
error_msg = e.stderr.decode() if e.stderr else str(e)
logger.error(f"FFmpeg error padding video: {error_msg}")
raise RuntimeError(f"Failed to pad video: {error_msg}")

View File

@@ -83,6 +83,8 @@ def get_temp_path(*paths: str) -> str:
""" """
Get path relative to Pixelle-Video temp folder Get path relative to Pixelle-Video temp folder
Ensures temp directory exists before returning path.
Args: Args:
*paths: Path components to join *paths: Path components to join
@@ -94,6 +96,10 @@ def get_temp_path(*paths: str) -> str:
# Returns: "/path/to/project/temp/audio.mp3" # Returns: "/path/to/project/temp/audio.mp3"
""" """
temp_path = get_root_path("temp") temp_path = get_root_path("temp")
# Ensure temp directory exists
os.makedirs(temp_path, exist_ok=True)
if paths: if paths:
return os.path.join(temp_path, *paths) return os.path.join(temp_path, *paths)
return temp_path return temp_path
@@ -102,6 +108,8 @@ def get_temp_path(*paths: str) -> str:
def get_data_path(*paths: str) -> str: def get_data_path(*paths: str) -> str:
""" """
Get path relative to Pixelle-Video data folder Get path relative to Pixelle-Video data folder
Ensures data directory exists before returning path.
Args: Args:
*paths: Path components to join *paths: Path components to join
@@ -114,6 +122,10 @@ def get_data_path(*paths: str) -> str:
# Returns: "/path/to/project/data/videos/output.mp4" # Returns: "/path/to/project/data/videos/output.mp4"
""" """
data_path = get_root_path("data") data_path = get_root_path("data")
# Ensure data directory exists
os.makedirs(data_path, exist_ok=True)
if paths: if paths:
return os.path.join(data_path, *paths) return os.path.join(data_path, *paths)
return data_path return data_path
@@ -122,6 +134,8 @@ def get_data_path(*paths: str) -> str:
def get_output_path(*paths: str) -> str: def get_output_path(*paths: str) -> str:
""" """
Get path relative to Pixelle-Video output folder Get path relative to Pixelle-Video output folder
Ensures output directory exists before returning path.
Args: Args:
*paths: Path components to join *paths: Path components to join
@@ -134,6 +148,10 @@ def get_output_path(*paths: str) -> str:
# Returns: "/path/to/project/output/video.mp4" # Returns: "/path/to/project/output/video.mp4"
""" """
output_path = get_root_path("output") output_path = get_root_path("output")
# Ensure output directory exists
os.makedirs(output_path, exist_ok=True)
if paths: if paths:
return os.path.join(output_path, *paths) return os.path.join(output_path, *paths)
return output_path return output_path

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "pixelle-video" name = "pixelle-video"
version = "0.1.6" version = "0.1.7"
description = "AI-powered video creation platform - Part of Pixelle ecosystem" description = "AI-powered video creation platform - Part of Pixelle ecosystem"
authors = [ authors = [
{name = "Pixelle.AI"} {name = "Pixelle.AI"}

2
uv.lock generated
View File

@@ -1664,7 +1664,7 @@ wheels = [
[[package]] [[package]]
name = "pixelle-video" name = "pixelle-video"
version = "0.1.6" version = "0.1.7"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "beautifulsoup4" }, { name = "beautifulsoup4" },

View File

@@ -58,7 +58,7 @@ def render_single_output(pixelle_video, video_params):
frame_template = video_params.get("frame_template") frame_template = video_params.get("frame_template")
custom_values_for_video = video_params.get("template_params", {}) custom_values_for_video = video_params.get("template_params", {})
workflow_key = video_params.get("image_workflow") workflow_key = video_params.get("media_workflow")
prompt_prefix = video_params.get("prompt_prefix", "") prompt_prefix = video_params.get("prompt_prefix", "")
with st.container(border=True): with st.container(border=True):
@@ -123,18 +123,20 @@ def render_single_output(pixelle_video, video_params):
progress_bar.progress(min(int(event.progress * 100), 99)) # Cap at 99% until complete progress_bar.progress(min(int(event.progress * 100), 99)) # Cap at 99% until complete
# Generate video (directly pass parameters) # Generate video (directly pass parameters)
# Note: image_width and image_height are now auto-determined from template # Note: media_width and media_height are auto-determined from template
video_params = { video_params = {
"text": text, "text": text,
"mode": mode, "mode": mode,
"title": title if title else None, "title": title if title else None,
"n_scenes": n_scenes, "n_scenes": n_scenes,
"image_workflow": workflow_key, "media_workflow": workflow_key,
"frame_template": frame_template, "frame_template": frame_template,
"prompt_prefix": prompt_prefix, "prompt_prefix": prompt_prefix,
"bgm_path": bgm_path, "bgm_path": bgm_path,
"bgm_volume": bgm_volume if bgm_path else 0.2, "bgm_volume": bgm_volume if bgm_path else 0.2,
"progress_callback": update_progress, "progress_callback": update_progress,
"media_width": st.session_state.get('template_media_width'),
"media_height": st.session_state.get('template_media_height'),
} }
# Add TTS parameters based on mode # Add TTS parameters based on mode
@@ -245,12 +247,14 @@ def render_batch_output(pixelle_video, video_params):
shared_config = { shared_config = {
"title_prefix": video_params.get("title_prefix"), "title_prefix": video_params.get("title_prefix"),
"n_scenes": video_params.get("n_scenes") or 5, "n_scenes": video_params.get("n_scenes") or 5,
"image_workflow": video_params.get("image_workflow"), "media_workflow": video_params.get("media_workflow"),
"frame_template": video_params.get("frame_template"), "frame_template": video_params.get("frame_template"),
"prompt_prefix": video_params.get("prompt_prefix") or "", "prompt_prefix": video_params.get("prompt_prefix") or "",
"bgm_path": video_params.get("bgm_path"), "bgm_path": video_params.get("bgm_path"),
"bgm_volume": video_params.get("bgm_volume") or 0.2, "bgm_volume": video_params.get("bgm_volume") or 0.2,
"tts_inference_mode": video_params.get("tts_inference_mode") or "local", "tts_inference_mode": video_params.get("tts_inference_mode") or "local",
"media_width": video_params.get("media_width"),
"media_height": video_params.get("media_height"),
} }
# Add TTS parameters based on mode (only add non-None values) # Add TTS parameters based on mode (only add non-None values)
@@ -368,13 +372,28 @@ def render_batch_output(pixelle_video, video_params):
st.success(tr("batch.success_message")) st.success(tr("batch.success_message"))
st.info(tr("batch.view_in_history")) st.info(tr("batch.view_in_history"))
# Button to go to History page # Button to go to History page using JavaScript URL navigation
if st.button( st.markdown(
f"📚 {tr('batch.goto_history')}", f"""
type="secondary", <a href="/History" target="_blank">
use_container_width=True <button style="
): width: 100%;
st.switch_page("pages/2_📚_History.py") padding: 0.5rem 1rem;
background-color: white;
color: rgb(49, 51, 63);
border: 1px solid rgba(49, 51, 63, 0.2);
border-radius: 0.5rem;
cursor: pointer;
font-size: 1rem;
font-weight: 400;
text-align: center;
">
📚 {tr('batch.goto_history')}
</button>
</a>
""",
unsafe_allow_html=True
)
# Show failed tasks if any # Show failed tasks if any
if batch_result["errors"]: if batch_result["errors"]:

View File

@@ -610,7 +610,7 @@ def render_style_config(pixelle_video):
workflow_options if workflow_options else ["No workflows found"], workflow_options if workflow_options else ["No workflows found"],
index=default_workflow_index, index=default_workflow_index,
label_visibility="collapsed", label_visibility="collapsed",
key="image_workflow_select" key="media_workflow_select"
) )
# Get the actual workflow key (e.g., "runninghub/image_flux.json") # Get the actual workflow key (e.g., "runninghub/image_flux.json")
@@ -621,14 +621,14 @@ def render_style_config(pixelle_video):
workflow_key = "runninghub/image_flux.json" # fallback workflow_key = "runninghub/image_flux.json" # fallback
# Get media size from template # Get media size from template
image_width = st.session_state.get('template_media_width', 1024) media_width = st.session_state.get('template_media_width')
image_height = st.session_state.get('template_media_height', 1024) media_height = st.session_state.get('template_media_height')
# Display media size info (read-only) # Display media size info (read-only)
if template_media_type == "video": if template_media_type == "video":
size_info_text = tr('style.video_size_info', width=image_width, height=image_height) size_info_text = tr('style.video_size_info', width=media_width, height=media_height)
else: else:
size_info_text = tr('style.image_size_info', width=image_width, height=image_height) size_info_text = tr('style.image_size_info', width=media_width, height=media_height)
st.info(f"📐 {size_info_text}") st.info(f"📐 {size_info_text}")
# Prompt prefix input # Prompt prefix input
@@ -679,8 +679,8 @@ def render_style_config(pixelle_video):
prompt=final_prompt, prompt=final_prompt,
workflow=workflow_key, workflow=workflow_key,
media_type=template_media_type, media_type=template_media_type,
width=int(image_width), width=int(media_width),
height=int(image_height) height=int(media_height)
)) ))
preview_media_path = media_result.url preview_media_path = media_result.url
@@ -725,8 +725,8 @@ def render_style_config(pixelle_video):
st.caption(tr("image.not_required_hint")) st.caption(tr("image.not_required_hint"))
# Get media size from template (even though not used, for consistency) # Get media size from template (even though not used, for consistency)
image_width = st.session_state.get('template_media_width', 1024) media_width = st.session_state.get('template_media_width')
image_height = st.session_state.get('template_media_height', 1024) media_height = st.session_state.get('template_media_height')
# Set default values for later use # Set default values for later use
workflow_key = None workflow_key = None
@@ -741,6 +741,8 @@ def render_style_config(pixelle_video):
"ref_audio": str(ref_audio_path) if ref_audio_path else None, "ref_audio": str(ref_audio_path) if ref_audio_path else None,
"frame_template": frame_template, "frame_template": frame_template,
"template_params": custom_values_for_video if custom_values_for_video else None, "template_params": custom_values_for_video if custom_values_for_video else None,
"image_workflow": workflow_key, "media_workflow": workflow_key,
"prompt_prefix": prompt_prefix if prompt_prefix else "" "prompt_prefix": prompt_prefix if prompt_prefix else "",
"media_width": media_width,
"media_height": media_height
} }

View File

@@ -0,0 +1,4 @@
{
"source": "runninghub",
"workflow_id": "1991693844100100097"
}

View File

@@ -0,0 +1,64 @@
{
"3": {
"inputs": {
"text": "床前明月光,疑是地上霜。"
},
"class_type": "Text _O",
"_meta": {
"title": "$text.text!"
}
},
"5": {
"inputs": {
"text": [
"3",
0
],
"mode": "Auto",
"do_sample_mode": "on",
"temperature": 0.8,
"top_p": 0.9,
"top_k": 30,
"num_beams": 3,
"repetition_penalty": 10,
"length_penalty": 0,
"max_mel_tokens": 1815,
"max_tokens_per_sentence": 120,
"seed": 4266796044,
"reference_audio": [
"12",
0
]
},
"class_type": "IndexTTS2BaseNode",
"_meta": {
"title": "Index TTS 2 - Base"
}
},
"8": {
"inputs": {
"filename_prefix": "audio/ComfyUI",
"quality": "V0",
"audioUI": "",
"audio": [
"5",
0
]
},
"class_type": "SaveAudioMP3",
"_meta": {
"title": "Save Audio (MP3)"
}
},
"12": {
"inputs": {
"audio": "小裴钱.wav",
"start_time": 0,
"duration": 0
},
"class_type": "VHS_LoadAudioUpload",
"_meta": {
"title": "$ref_audio.audio"
}
}
}