项目重命名: ReelForge => Pixelle-Video

This commit is contained in:
puke
2025-10-31 10:23:38 +08:00
parent dfdba73b74
commit 136514e466
60 changed files with 384 additions and 383 deletions

View File

@@ -0,0 +1,3 @@
"""
Pixelle-Video utilities
"""

View File

@@ -0,0 +1,285 @@
"""
OS utilities for file and path management
Provides utilities for managing paths and files in Pixelle-Video.
Inspired by Pixelle-MCP's os_util.py.
"""
import os
import random
from datetime import datetime
from pathlib import Path
from typing import Optional, Tuple, Literal
def get_pixelle_video_root_path() -> str:
"""
Get Pixelle-Video root path - current working directory
Returns:
Current working directory as string
"""
return str(Path.cwd())
def ensure_pixelle_video_root_path() -> str:
"""
Ensure Pixelle-Video root path exists and return the path
Returns:
Root path as string
"""
root_path = get_pixelle_video_root_path()
root_path_obj = Path(root_path)
output_dir = root_path_obj / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
return root_path
def get_root_path(*paths: str) -> str:
"""
Get path relative to Pixelle-Video root path
Args:
*paths: Path components to join
Returns:
Absolute path as string
Example:
get_root_path("temp", "audio.mp3")
# Returns: "/path/to/project/temp/audio.mp3"
"""
root_path = ensure_pixelle_video_root_path()
if paths:
return os.path.join(root_path, *paths)
return root_path
def get_temp_path(*paths: str) -> str:
"""
Get path relative to Pixelle-Video temp folder
Args:
*paths: Path components to join
Returns:
Absolute path to temp directory or file
Example:
get_temp_path("audio.mp3")
# Returns: "/path/to/project/temp/audio.mp3"
"""
temp_path = get_root_path("temp")
if paths:
return os.path.join(temp_path, *paths)
return temp_path
def get_data_path(*paths: str) -> str:
"""
Get path relative to Pixelle-Video data folder
Args:
*paths: Path components to join
Returns:
Absolute path to data directory or file
Example:
get_data_path("videos", "output.mp4")
# Returns: "/path/to/project/data/videos/output.mp4"
"""
data_path = get_root_path("data")
if paths:
return os.path.join(data_path, *paths)
return data_path
def get_output_path(*paths: str) -> str:
"""
Get path relative to Pixelle-Video output folder
Args:
*paths: Path components to join
Returns:
Absolute path to output directory or file
Example:
get_output_path("video.mp4")
# Returns: "/path/to/project/output/video.mp4"
"""
output_path = get_root_path("output")
if paths:
return os.path.join(output_path, *paths)
return output_path
def save_bytes_to_file(data: bytes, file_path: str) -> str:
"""
Save bytes data to file
Creates parent directories if they don't exist.
Args:
data: Binary data to save
file_path: Target file path
Returns:
Absolute path of saved file
Example:
save_bytes_to_file(audio_data, get_temp_path("audio.mp3"))
"""
# Ensure parent directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)
# Write binary data
with open(file_path, "wb") as f:
f.write(data)
return os.path.abspath(file_path)
def ensure_dir(path: str) -> str:
"""
Ensure directory exists, create if not
Args:
path: Directory path
Returns:
Absolute path of directory
"""
os.makedirs(path, exist_ok=True)
return os.path.abspath(path)
# ========== Task Directory Management ==========
def create_task_id() -> str:
"""
Create unique task ID with timestamp + random suffix
Format: {timestamp}_{random_hex}
Example: "20251028_143052_ab3d"
Collision probability: < 0.0001% (65536 combinations per second)
Returns:
Task ID string
"""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
random_suffix = f"{random.randint(0, 0xFFFF):04x}" # 4-digit hex (0000-ffff)
return f"{timestamp}_{random_suffix}"
def create_task_output_dir(task_id: Optional[str] = None) -> Tuple[str, str]:
"""
Create isolated output directory for single video generation task
Directory structure:
output/{task_id}/
├── final.mp4 # Final video output
├── frames/ # All frame-related files
│ ├── 01_audio.mp3
│ ├── 01_image.png
│ ├── 01_composed.png
│ ├── 01_segment.mp4
│ └── ...
└── metadata.json # Optional: task metadata
Args:
task_id: Optional task ID (auto-generated if None)
Returns:
(task_dir, task_id) tuple
Example:
>>> task_dir, task_id = create_task_output_dir()
>>> # task_dir = "/path/to/project/output/20251028_143052_ab3d"
>>> # task_id = "20251028_143052_ab3d"
"""
if task_id is None:
task_id = create_task_id()
task_dir = get_output_path(task_id)
frames_dir = os.path.join(task_dir, "frames")
# Create directories
os.makedirs(frames_dir, exist_ok=True)
return task_dir, task_id
def get_task_path(task_id: str, *paths: str) -> str:
"""
Get path within task directory
Args:
task_id: Task ID
*paths: Path components to join
Returns:
Absolute path within task directory
Example:
>>> get_task_path("20251028_143052_ab3d", "final.mp4")
>>> # Returns: "/path/to/project/output/20251028_143052_ab3d/final.mp4"
"""
task_dir = get_output_path(task_id)
if paths:
return os.path.join(task_dir, *paths)
return task_dir
def get_task_frame_path(
task_id: str,
frame_index: int,
file_type: Literal["audio", "image", "composed", "segment"]
) -> str:
"""
Get frame file path within task directory
Args:
task_id: Task ID
frame_index: Frame index (0-based internally, but filename starts from 01)
file_type: File type (audio/image/composed/segment)
Returns:
Absolute path to frame file
Example:
>>> get_task_frame_path("20251028_143052_ab3d", 0, "audio")
>>> # Returns: ".../output/20251028_143052_ab3d/frames/01_audio.mp3"
"""
ext_map = {
"audio": "mp3",
"image": "png",
"composed": "png",
"segment": "mp4"
}
# Frame number starts from 01 for better human readability
filename = f"{frame_index + 1:02d}_{file_type}.{ext_map[file_type]}"
return get_task_path(task_id, "frames", filename)
def get_task_final_video_path(task_id: str) -> str:
"""
Get final video path within task directory
Args:
task_id: Task ID
Returns:
Absolute path to final video
Example:
>>> get_task_final_video_path("20251028_143052_ab3d")
>>> # Returns: ".../output/20251028_143052_ab3d/final.mp4"
"""
return get_task_path(task_id, "final.mp4")

View File

@@ -0,0 +1,38 @@
"""
Prompt helper utilities
Simple utilities for building prompts with optional prefixes.
"""
def build_image_prompt(prompt: str, prefix: str = "") -> str:
"""
Build final image prompt with optional prefix
Args:
prompt: User's raw prompt
prefix: Optional prefix to add before the prompt
Returns:
Final prompt with prefix applied (if provided)
Examples:
>>> build_image_prompt("a cat", "")
'a cat'
>>> build_image_prompt("a cat", "anime style")
'anime style, a cat'
>>> build_image_prompt("a cat", " anime style ")
'anime style, a cat'
"""
prefix = prefix.strip() if prefix else ""
prompt = prompt.strip() if prompt else ""
if prefix and prompt:
return f"{prefix}, {prompt}"
elif prefix:
return prefix
else:
return prompt

View File

@@ -0,0 +1,330 @@
"""
Edge TTS Utility - Temporarily not used
This is the original edge-tts implementation, kept here for potential future use.
Currently, TTS service uses ComfyUI workflows only.
"""
import asyncio
import ssl
import random
import edge_tts as edge_tts_sdk
from loguru import logger
from aiohttp import WSServerHandshakeError, ClientResponseError
# Global flag for SSL verification (set to False for development only)
_SSL_VERIFY_ENABLED = False
# Retry configuration for Edge TTS (to handle 401 errors)
_RETRY_COUNT = 5 # Default retry count (increased from 3 to 5)
_RETRY_BASE_DELAY = 1.0 # Base retry delay in seconds (for exponential backoff)
_MAX_RETRY_DELAY = 10.0 # Maximum retry delay in seconds
# Rate limiting configuration
_REQUEST_DELAY = 0.5 # Minimum delay before each request (seconds)
_MAX_CONCURRENT_REQUESTS = 3 # Maximum concurrent requests
# Global semaphore for rate limiting
_request_semaphore = asyncio.Semaphore(_MAX_CONCURRENT_REQUESTS)
async def edge_tts(
text: str,
voice: str = "[Chinese] zh-CN Yunjian",
rate: str = "+0%",
volume: str = "+0%",
pitch: str = "+0Hz",
output_path: str = None,
retry_count: int = _RETRY_COUNT,
retry_base_delay: float = _RETRY_BASE_DELAY,
) -> bytes:
"""
Convert text to speech using Microsoft Edge TTS
This service is free and requires no API key.
Supports 400+ voices across 100+ languages.
Returns audio data as bytes (MP3 format).
Includes automatic retry mechanism with exponential backoff and jitter
to handle 401 authentication errors and temporary network issues.
Also includes concurrent request limiting and rate limiting.
Args:
text: Text to convert to speech
voice: Voice ID (e.g., [Chinese] zh-CN Yunjian, [English] en-US Jenny)
rate: Speech rate (e.g., +0%, +50%, -20%)
volume: Speech volume (e.g., +0%, +50%, -20%)
pitch: Speech pitch (e.g., +0Hz, +10Hz, -5Hz)
output_path: Optional output file path to save audio
retry_count: Number of retries on failure (default: 5)
retry_base_delay: Base delay for exponential backoff (default: 1.0s)
Returns:
Audio data as bytes (MP3 format)
Popular Chinese voices:
- [Chinese] zh-CN Yunjian (male, default)
- [Chinese] zh-CN Xiaoxiao (female)
- [Chinese] zh-CN Yunxi (male)
- [Chinese] zh-CN Xiaoyi (female)
Popular English voices:
- [English] en-US Jenny (female)
- [English] en-US Guy (male)
- [English] en-GB Sonia (female, British)
Example:
audio_bytes = await edge_tts(
text="你好,世界!",
voice="[Chinese] zh-CN Yunjian",
rate="+20%"
)
"""
logger.debug(f"Calling Edge TTS with voice: {voice}, rate: {rate}, retry_count: {retry_count}")
# Use semaphore to limit concurrent requests
async with _request_semaphore:
# Add a small random delay before each request to avoid rate limiting
pre_delay = _REQUEST_DELAY + random.uniform(0, 0.3)
logger.debug(f"Waiting {pre_delay:.2f}s before request (rate limiting)")
await asyncio.sleep(pre_delay)
last_error = None
# Retry loop
for attempt in range(retry_count + 1): # +1 because first attempt is not a retry
if attempt > 0:
# Exponential backoff with jitter
# delay = base * (2 ^ attempt) + random jitter
exponential_delay = retry_base_delay * (2 ** (attempt - 1))
jitter = random.uniform(0, retry_base_delay)
retry_delay = min(exponential_delay + jitter, _MAX_RETRY_DELAY)
logger.info(f"🔄 Retrying Edge TTS (attempt {attempt + 1}/{retry_count + 1}) after {retry_delay:.2f}s delay...")
await asyncio.sleep(retry_delay)
# Monkey patch ssl.create_default_context if SSL verification is disabled
if not _SSL_VERIFY_ENABLED:
if attempt == 0: # Only log warning once
logger.warning("SSL verification is disabled for development. This is NOT recommended for production!")
original_create_default_context = ssl.create_default_context
def create_unverified_context(*args, **kwargs):
ctx = original_create_default_context(*args, **kwargs)
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
return ctx
# Temporarily replace the function
ssl.create_default_context = create_unverified_context
try:
# Create communicate instance
communicate = edge_tts_sdk.Communicate(
text=text,
voice=voice,
rate=rate,
volume=volume,
pitch=pitch,
)
# Collect audio chunks
audio_chunks = []
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_chunks.append(chunk["data"])
audio_data = b"".join(audio_chunks)
if attempt > 0:
logger.success(f"✅ Retry succeeded on attempt {attempt + 1}")
logger.info(f"Generated {len(audio_data)} bytes of audio data")
# Save to file if output_path is provided
if output_path:
with open(output_path, "wb") as f:
f.write(audio_data)
logger.info(f"Audio saved to: {output_path}")
return audio_data
except (WSServerHandshakeError, ClientResponseError) as e:
# Network/authentication errors - retry
last_error = e
error_code = getattr(e, 'status', 'unknown')
error_msg = str(e)
# Log more detailed information for 401 errors
if error_code == 401 or '401' in error_msg:
logger.warning(f"⚠️ Edge TTS 401 Authentication Error (attempt {attempt + 1}/{retry_count + 1})")
logger.debug(f"Error details: {error_msg}")
logger.debug(f"This is usually caused by rate limiting. Will retry with exponential backoff...")
else:
logger.warning(f"⚠️ Edge TTS error (attempt {attempt + 1}/{retry_count + 1}): {error_code} - {e}")
if attempt >= retry_count:
# Last attempt failed
logger.error(f"❌ All {retry_count + 1} attempts failed. Last error: {error_code}")
raise
# Otherwise, continue to next retry
except Exception as e:
# Other errors - don't retry, raise immediately
logger.error(f"Edge TTS error (non-retryable): {type(e).__name__} - {e}")
raise
finally:
# Restore original function if we patched it
if not _SSL_VERIFY_ENABLED:
ssl.create_default_context = original_create_default_context
# Should not reach here, but just in case
if last_error:
raise last_error
else:
raise RuntimeError("Edge TTS failed without error (unexpected)")
def get_audio_duration(audio_path: str) -> float:
"""
Get audio file duration in seconds
Args:
audio_path: Path to audio file
Returns:
Duration in seconds
"""
try:
# Try using ffmpeg-python
import ffmpeg
probe = ffmpeg.probe(audio_path)
duration = float(probe['format']['duration'])
return duration
except Exception as e:
logger.warning(f"Failed to get audio duration: {e}, using estimate")
# Fallback: estimate based on file size (very rough)
import os
file_size = os.path.getsize(audio_path)
# Assume ~16kbps for MP3, so 2KB per second
estimated_duration = file_size / 2000
return max(1.0, estimated_duration) # At least 1 second
async def list_voices(locale: str = None, retry_count: int = _RETRY_COUNT, retry_base_delay: float = _RETRY_BASE_DELAY) -> list[str]:
"""
List all available voices for Edge TTS
Returns a list of voice IDs (ShortName).
Optionally filter by locale.
Includes automatic retry mechanism with exponential backoff and jitter
to handle network errors and rate limiting.
Args:
locale: Filter by locale (e.g., zh-CN, en-US, ja-JP)
retry_count: Number of retries on failure (default: 5)
retry_base_delay: Base delay for exponential backoff (default: 1.0s)
Returns:
List of voice IDs
Example:
# List all voices
voices = await list_voices()
# Returns: ['[Chinese] zh-CN Yunjian', '[Chinese] zh-CN Xiaoxiao', ...]
# List Chinese voices only
voices = await list_voices(locale="zh-CN")
# Returns: ['[Chinese] zh-CN Yunjian', '[Chinese] zh-CN Xiaoxiao', ...]
"""
logger.debug(f"Fetching Edge TTS voices, locale filter: {locale}, retry_count: {retry_count}")
# Use semaphore to limit concurrent requests
async with _request_semaphore:
# Add a small random delay before each request to avoid rate limiting
pre_delay = _REQUEST_DELAY + random.uniform(0, 0.3)
logger.debug(f"Waiting {pre_delay:.2f}s before request (rate limiting)")
await asyncio.sleep(pre_delay)
last_error = None
# Retry loop
for attempt in range(retry_count + 1):
if attempt > 0:
# Exponential backoff with jitter
exponential_delay = retry_base_delay * (2 ** (attempt - 1))
jitter = random.uniform(0, retry_base_delay)
retry_delay = min(exponential_delay + jitter, _MAX_RETRY_DELAY)
logger.info(f"🔄 Retrying list voices (attempt {attempt + 1}/{retry_count + 1}) after {retry_delay:.2f}s delay...")
await asyncio.sleep(retry_delay)
# Monkey patch SSL if verification is disabled
if not _SSL_VERIFY_ENABLED:
if attempt == 0: # Only log warning once
logger.warning("SSL verification is disabled for development. This is NOT recommended for production!")
original_create_default_context = ssl.create_default_context
def create_unverified_context(*args, **kwargs):
ctx = original_create_default_context(*args, **kwargs)
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
return ctx
ssl.create_default_context = create_unverified_context
try:
# Get all voices
voices = await edge_tts_sdk.list_voices()
# Filter by locale if specified
if locale:
voices = [v for v in voices if v["Locale"].startswith(locale)]
# Extract voice IDs (ShortName)
voice_ids = [voice["ShortName"] for voice in voices]
if attempt > 0:
logger.success(f"✅ Retry succeeded on attempt {attempt + 1}")
logger.info(f"Found {len(voice_ids)} voices" + (f" for locale '{locale}'" if locale else ""))
return voice_ids
except (WSServerHandshakeError, ClientResponseError) as e:
# Network/authentication errors - retry
last_error = e
error_code = getattr(e, 'status', 'unknown')
error_msg = str(e)
# Log more detailed information for 401 errors
if error_code == 401 or '401' in error_msg:
logger.warning(f"⚠️ Edge TTS 401 Authentication Error (list_voices attempt {attempt + 1}/{retry_count + 1})")
logger.debug(f"Error details: {error_msg}")
logger.debug(f"This is usually caused by rate limiting. Will retry with exponential backoff...")
else:
logger.warning(f"⚠️ List voices error (attempt {attempt + 1}/{retry_count + 1}): {error_code} - {e}")
if attempt >= retry_count:
logger.error(f"❌ All {retry_count + 1} attempts failed. Last error: {error_code}")
raise
except Exception as e:
# Other errors - don't retry, raise immediately
logger.error(f"List voices error (non-retryable): {type(e).__name__} - {e}")
raise
finally:
# Restore original function if we patched it
if not _SSL_VERIFY_ENABLED:
ssl.create_default_context = original_create_default_context
# Should not reach here, but just in case
if last_error:
raise last_error
else:
raise RuntimeError("List voices failed without error (unexpected)")