Compare commits
33 Commits
8db3693e08
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3d3aba3670 | ||
|
|
3a8ec576ee | ||
|
|
3f59b324ad | ||
|
|
6bf16936af | ||
|
|
c65b040fe3 | ||
|
|
be2639c596 | ||
|
|
c854bd80e0 | ||
|
|
90ceb76296 | ||
|
|
1b54552fec | ||
|
|
f19804facb | ||
|
|
bc077475c6 | ||
|
|
9675b9c23b | ||
|
|
92183b083b | ||
|
|
be216eacad | ||
|
|
8d82cf91d5 | ||
|
|
8c35b0066f | ||
|
|
44249889df | ||
|
|
49e667cc94 | ||
|
|
b3cf9e64e5 | ||
|
|
da98d0842a | ||
|
|
c0eb4ed320 | ||
|
|
2be9256c48 | ||
|
|
297f3ccda4 | ||
|
|
a3ab12e87c | ||
|
|
4d3c89a8f6 | ||
|
|
7da6ed6a74 | ||
|
|
bf5a2af4fd | ||
|
|
1d343e55ba | ||
|
|
2978622f7f | ||
|
|
b62fdb6958 | ||
|
|
f8b102c2e0 | ||
|
|
4b86803692 | ||
|
|
e29615a885 |
21
.env.example
Normal file
21
.env.example
Normal file
@@ -0,0 +1,21 @@
|
||||
# Pixelle-Video Environment Configuration
|
||||
# Copy this file to .env and customize as needed
|
||||
|
||||
# ============================================================================
|
||||
# Port Configuration
|
||||
# ============================================================================
|
||||
|
||||
# FastAPI Backend Port
|
||||
API_PORT=8000
|
||||
|
||||
# Next.js Editor Port
|
||||
EDITOR_PORT=3000
|
||||
|
||||
# Streamlit Web UI Port
|
||||
WEB_PORT=8501
|
||||
|
||||
# ============================================================================
|
||||
# Other Configuration
|
||||
# ============================================================================
|
||||
|
||||
# Add other environment variables here as needed
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -76,3 +76,5 @@ examples/
|
||||
repositories/
|
||||
|
||||
*.out
|
||||
.pids/
|
||||
.serena/
|
||||
|
||||
@@ -41,28 +41,37 @@ from api.schemas.editor import (
|
||||
ExportRequest,
|
||||
ExportResponse,
|
||||
ExportStatusResponse,
|
||||
AlignPromptRequest,
|
||||
AlignPromptResponse,
|
||||
)
|
||||
from fastapi import BackgroundTasks
|
||||
import asyncio
|
||||
import uuid as uuid_module
|
||||
import os
|
||||
|
||||
router = APIRouter(prefix="/editor", tags=["Editor"])
|
||||
|
||||
# Export task storage
|
||||
_export_tasks: dict = {}
|
||||
|
||||
# Get API port from environment
|
||||
API_PORT = os.getenv("API_PORT", "8000")
|
||||
|
||||
def _path_to_url(file_path: str, base_url: str = "http://localhost:8000") -> str:
|
||||
|
||||
def _path_to_url(file_path: str, base_url: str = None) -> str:
|
||||
"""Convert local file path to URL accessible through API"""
|
||||
if not file_path:
|
||||
return None
|
||||
|
||||
|
||||
if base_url is None:
|
||||
base_url = f"http://localhost:{API_PORT}"
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Normalize path separators
|
||||
file_path = file_path.replace("\\", "/")
|
||||
|
||||
|
||||
# Extract relative path from output directory
|
||||
parts = file_path.split("/")
|
||||
try:
|
||||
@@ -71,7 +80,7 @@ def _path_to_url(file_path: str, base_url: str = "http://localhost:8000") -> str
|
||||
relative_path = "/".join(relative_parts)
|
||||
except ValueError:
|
||||
relative_path = Path(file_path).name
|
||||
|
||||
|
||||
return f"{base_url}/api/files/{relative_path}"
|
||||
|
||||
|
||||
@@ -450,22 +459,69 @@ async def regenerate_frame_image(
|
||||
raise HTTPException(status_code=400, detail="No image prompt available")
|
||||
|
||||
try:
|
||||
# Import and use PixelleVideo core for image generation
|
||||
# Import and use PixelleVideo core services
|
||||
from api.dependencies import get_pixelle_video
|
||||
from pixelle_video.models.storyboard import StoryboardFrame, StoryboardConfig
|
||||
from api.routers.quality import _style_anchors
|
||||
|
||||
pixelle_video = get_pixelle_video()
|
||||
logger.debug(f"[REGEN-IMG] Starting image regeneration for frame {frame_id}")
|
||||
logger.debug(f"[REGEN-IMG] Original prompt: {prompt[:100]}...")
|
||||
|
||||
# Generate image using ComfyKit
|
||||
result = await pixelle_video.comfy(
|
||||
workflow="image_gen",
|
||||
prompt=prompt,
|
||||
task_id=storyboard_id,
|
||||
pixelle_video = await get_pixelle_video()
|
||||
|
||||
# Get style anchor prefix if available
|
||||
style_prefix = ""
|
||||
logger.debug(f"[REGEN-IMG] Checking style anchors for storyboard {storyboard_id}")
|
||||
logger.debug(f"[REGEN-IMG] Available style anchors: {list(_style_anchors.keys())}")
|
||||
|
||||
if storyboard_id in _style_anchors:
|
||||
style_data = _style_anchors[storyboard_id]
|
||||
style_prefix = style_data.get("style_prefix", "")
|
||||
logger.info(f"[REGEN-IMG] Found style anchor: {style_prefix[:80] if style_prefix else 'EMPTY'}...")
|
||||
else:
|
||||
logger.warning(f"[REGEN-IMG] No style anchor found for {storyboard_id}")
|
||||
|
||||
# Get character descriptions for prompt injection
|
||||
character_prefix = ""
|
||||
from api.routers.quality import _character_stores
|
||||
if storyboard_id in _character_stores:
|
||||
char_descriptions = []
|
||||
for char_data in _character_stores[storyboard_id].values():
|
||||
appearance = char_data.get("appearance_description", "")
|
||||
clothing = char_data.get("clothing_description", "")
|
||||
name = char_data.get("name", "character")
|
||||
|
||||
if appearance or clothing:
|
||||
parts = [f"{name}:"]
|
||||
if appearance:
|
||||
parts.append(appearance)
|
||||
if clothing:
|
||||
parts.append(f"wearing {clothing}")
|
||||
char_descriptions.append(" ".join(parts))
|
||||
|
||||
if char_descriptions:
|
||||
character_prefix = "Characters: " + "; ".join(char_descriptions) + ". "
|
||||
logger.info(f"[REGEN-IMG] Injecting character descriptions: {character_prefix[:80]}...")
|
||||
|
||||
# Apply style prefix and character descriptions to prompt
|
||||
final_prompt = ""
|
||||
if style_prefix:
|
||||
final_prompt += f"{style_prefix}, "
|
||||
if character_prefix:
|
||||
final_prompt += character_prefix
|
||||
final_prompt += prompt
|
||||
logger.info(f"[REGEN-IMG] Final prompt: {final_prompt[:120]}...")
|
||||
|
||||
# Use MediaService to generate image via RunningHub workflow
|
||||
# Use image_flux2 workflow (FLUX.1 Kontext model for better consistency)
|
||||
logger.debug(f"[REGEN-IMG] Calling pixelle_video.image with workflow=runninghub/image_flux2.json")
|
||||
result = await pixelle_video.image(
|
||||
prompt=final_prompt,
|
||||
media_type="image",
|
||||
workflow="runninghub/image_flux2.json",
|
||||
)
|
||||
|
||||
if result and result.get("images"):
|
||||
if result and result.url:
|
||||
# Download and save image
|
||||
image_url = result["images"][0]
|
||||
import aiohttp
|
||||
import os
|
||||
|
||||
@@ -473,17 +529,47 @@ async def regenerate_frame_image(
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
image_path = f"{output_dir}/frame_{frame_index}_regenerated.png"
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(image_url) as resp:
|
||||
if resp.status == 200:
|
||||
with open(image_path, 'wb') as f:
|
||||
f.write(await resp.read())
|
||||
# Check if URL is remote or local
|
||||
if result.url.startswith("http"):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(result.url) as resp:
|
||||
if resp.status == 200:
|
||||
with open(image_path, 'wb') as f:
|
||||
f.write(await resp.read())
|
||||
else:
|
||||
# Local file, copy it
|
||||
import shutil
|
||||
if os.path.exists(result.url):
|
||||
shutil.copy2(result.url, image_path)
|
||||
else:
|
||||
image_path = result.url
|
||||
|
||||
# Update frame
|
||||
target_frame["image_path"] = _path_to_url(image_path)
|
||||
_storyboard_cache[storyboard_id] = storyboard
|
||||
|
||||
logger.info(f"Regenerated image for frame {frame_id}")
|
||||
# Persist changes to storyboard.json
|
||||
try:
|
||||
from pixelle_video.services.persistence import PersistenceService
|
||||
persistence = PersistenceService()
|
||||
|
||||
# Load existing storyboard model
|
||||
storyboard_model = await persistence.load_storyboard(storyboard_id)
|
||||
if storyboard_model:
|
||||
# Update the specific frame's image_path
|
||||
for frame in storyboard_model.frames:
|
||||
if f"frame-{frame.index}" == frame_id:
|
||||
frame.image_path = image_path
|
||||
logger.debug(f"[PERSIST] Updated frame {frame_id} image_path in model")
|
||||
break
|
||||
|
||||
# Save back to JSON
|
||||
await persistence.save_storyboard(storyboard_id, storyboard_model)
|
||||
logger.info(f"[PERSIST] Saved storyboard to JSON for {storyboard_id}")
|
||||
except Exception as pe:
|
||||
logger.warning(f"[PERSIST] Failed to persist storyboard: {pe}")
|
||||
|
||||
logger.info(f"Regenerated image for frame {frame_id} via RunningHub")
|
||||
|
||||
return RegenerateImageResponse(
|
||||
image_path=target_frame["image_path"],
|
||||
@@ -542,7 +628,7 @@ async def regenerate_frame_audio(
|
||||
from api.dependencies import get_pixelle_video
|
||||
import os
|
||||
|
||||
pixelle_video = get_pixelle_video()
|
||||
pixelle_video = await get_pixelle_video()
|
||||
|
||||
# Create output path
|
||||
output_dir = f"output/{storyboard_id}"
|
||||
@@ -574,6 +660,31 @@ async def regenerate_frame_audio(
|
||||
storyboard["total_duration"] = sum(f.get("duration", 3.0) for f in frames)
|
||||
_storyboard_cache[storyboard_id] = storyboard
|
||||
|
||||
# Persist changes to storyboard.json
|
||||
try:
|
||||
from pixelle_video.services.persistence import PersistenceService
|
||||
persistence = PersistenceService()
|
||||
|
||||
# Load existing storyboard model
|
||||
storyboard_model = await persistence.load_storyboard(storyboard_id)
|
||||
if storyboard_model:
|
||||
# Update the specific frame's audio_path and duration
|
||||
for frame in storyboard_model.frames:
|
||||
if f"frame-{frame.index}" == frame_id:
|
||||
frame.audio_path = result_path
|
||||
frame.duration = duration
|
||||
logger.debug(f"[PERSIST] Updated frame {frame_id} audio_path in model")
|
||||
break
|
||||
|
||||
# Update total duration
|
||||
storyboard_model.total_duration = sum(f.duration or 3.0 for f in storyboard_model.frames)
|
||||
|
||||
# Save back to JSON
|
||||
await persistence.save_storyboard(storyboard_id, storyboard_model)
|
||||
logger.info(f"[PERSIST] Saved storyboard to JSON for {storyboard_id}")
|
||||
except Exception as pe:
|
||||
logger.warning(f"[PERSIST] Failed to persist storyboard: {pe}")
|
||||
|
||||
logger.info(f"Regenerated audio for frame {frame_id}, duration: {duration}s")
|
||||
|
||||
return RegenerateAudioResponse(
|
||||
@@ -590,6 +701,98 @@ async def regenerate_frame_audio(
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/storyboard/{storyboard_id}/frames/{frame_id}/align-prompt",
|
||||
response_model=AlignPromptResponse
|
||||
)
|
||||
async def align_frame_prompt(
|
||||
storyboard_id: str = Path(..., description="Storyboard/task ID"),
|
||||
frame_id: str = Path(..., description="Frame ID"),
|
||||
request: AlignPromptRequest = None
|
||||
):
|
||||
"""
|
||||
Align image prompt with narration
|
||||
|
||||
Regenerates the image prompt based on the frame's narration using
|
||||
enhanced core imagery extraction for better semantic relevance.
|
||||
"""
|
||||
if storyboard_id not in _storyboard_cache:
|
||||
raise HTTPException(status_code=404, detail=f"Storyboard {storyboard_id} not found")
|
||||
|
||||
storyboard = _storyboard_cache[storyboard_id]
|
||||
frames = storyboard["frames"]
|
||||
|
||||
# Find frame
|
||||
target_frame = None
|
||||
for frame in frames:
|
||||
if frame["id"] == frame_id:
|
||||
target_frame = frame
|
||||
break
|
||||
|
||||
if not target_frame:
|
||||
raise HTTPException(status_code=404, detail=f"Frame {frame_id} not found")
|
||||
|
||||
# Get narration to use
|
||||
narration = request.narration if request and request.narration else target_frame.get("narration", "")
|
||||
|
||||
if not narration:
|
||||
raise HTTPException(status_code=400, detail="No narration text available")
|
||||
|
||||
try:
|
||||
from api.dependencies import get_pixelle_video
|
||||
|
||||
pixelle_video = await get_pixelle_video()
|
||||
|
||||
# Use LLM to generate aligned image prompt
|
||||
from pixelle_video.prompts import build_image_prompt_prompt
|
||||
|
||||
prompt = build_image_prompt_prompt(
|
||||
narrations=[narration],
|
||||
min_words=30,
|
||||
max_words=60
|
||||
)
|
||||
|
||||
response = await pixelle_video.llm(
|
||||
prompt=prompt,
|
||||
temperature=0.7,
|
||||
max_tokens=500
|
||||
)
|
||||
|
||||
# Parse response
|
||||
import json
|
||||
import re
|
||||
|
||||
# Try to extract JSON
|
||||
try:
|
||||
result = json.loads(response)
|
||||
except json.JSONDecodeError:
|
||||
# Try markdown code block
|
||||
match = re.search(r'```(?:json)?\s*([\s\S]+?)\s*```', response)
|
||||
if match:
|
||||
result = json.loads(match.group(1))
|
||||
else:
|
||||
raise ValueError("Failed to parse LLM response")
|
||||
|
||||
if "image_prompts" not in result or len(result["image_prompts"]) == 0:
|
||||
raise ValueError("No image prompts in response")
|
||||
|
||||
new_prompt = result["image_prompts"][0]
|
||||
|
||||
# Update frame
|
||||
target_frame["image_prompt"] = new_prompt
|
||||
_storyboard_cache[storyboard_id] = storyboard
|
||||
|
||||
logger.info(f"Aligned image prompt for frame {frame_id}")
|
||||
|
||||
return AlignPromptResponse(
|
||||
image_prompt=new_prompt,
|
||||
success=True
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Prompt alignment failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post(
|
||||
"/storyboard/{storyboard_id}/frames/{frame_id}/inpaint",
|
||||
response_model=InpaintResponse
|
||||
@@ -755,8 +958,9 @@ async def export_video(
|
||||
for frame in sorted_frames:
|
||||
path = frame.get("video_segment_path", "")
|
||||
if path.startswith("http"):
|
||||
# Extract path from URL
|
||||
path = path.replace("http://localhost:8000/api/files/", "output/")
|
||||
# Extract path from URL (format: http://localhost:{port}/api/files/{relative_path})
|
||||
if "/api/files/" in path:
|
||||
path = "output/" + path.split("/api/files/")[-1]
|
||||
video_segments.append(path)
|
||||
|
||||
_export_tasks[task_id]["progress"] = 0.3
|
||||
|
||||
@@ -15,10 +15,13 @@ Provides endpoints for:
|
||||
- Quality gate evaluation
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Path, Body
|
||||
from fastapi import APIRouter, HTTPException, Path, Body, File, UploadFile, Query
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional
|
||||
from loguru import logger
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
|
||||
router = APIRouter(prefix="/quality", tags=["Quality"])
|
||||
|
||||
@@ -45,6 +48,20 @@ class CharacterCreateRequest(BaseModel):
|
||||
clothing_description: str = Field("", description="Clothing description")
|
||||
distinctive_features: List[str] = Field(default_factory=list)
|
||||
character_type: str = Field("person")
|
||||
reference_image_path: Optional[str] = Field(None, description="Reference image path for VLM analysis")
|
||||
|
||||
|
||||
class CharacterAnalyzeRequest(BaseModel):
|
||||
"""Request to analyze a character image"""
|
||||
image_path: str = Field(..., description="Path to the reference image")
|
||||
|
||||
|
||||
class CharacterAnalyzeResponse(BaseModel):
|
||||
"""Response from character image analysis"""
|
||||
appearance_description: str = ""
|
||||
clothing_description: str = ""
|
||||
distinctive_features: List[str] = []
|
||||
prompt_description: str = "" # Combined description for prompt injection
|
||||
|
||||
|
||||
class ContentCheckRequest(BaseModel):
|
||||
@@ -115,20 +132,49 @@ async def create_character(
|
||||
storyboard_id: str = Path(..., description="Storyboard ID"),
|
||||
request: CharacterCreateRequest = Body(...)
|
||||
):
|
||||
"""Register a new character"""
|
||||
"""
|
||||
Register a new character
|
||||
|
||||
If reference_image_path is provided and appearance_description is empty,
|
||||
VLM will analyze the image to extract appearance descriptions automatically.
|
||||
"""
|
||||
import uuid
|
||||
|
||||
if storyboard_id not in _character_stores:
|
||||
_character_stores[storyboard_id] = {}
|
||||
|
||||
# Auto-analyze reference image if provided and no description
|
||||
appearance_desc = request.appearance_description
|
||||
clothing_desc = request.clothing_description
|
||||
distinctive = request.distinctive_features
|
||||
ref_image = request.reference_image_path
|
||||
|
||||
if ref_image and not appearance_desc:
|
||||
try:
|
||||
from pixelle_video.services.quality.character_analyzer import CharacterAnalyzer
|
||||
analyzer = CharacterAnalyzer()
|
||||
result = await analyzer.analyze_reference_image(ref_image)
|
||||
|
||||
if result.appearance_description:
|
||||
appearance_desc = result.appearance_description
|
||||
if result.clothing_description:
|
||||
clothing_desc = result.clothing_description
|
||||
if result.distinctive_features:
|
||||
distinctive = result.distinctive_features
|
||||
|
||||
logger.info(f"Auto-analyzed character from image: {ref_image}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to auto-analyze character image: {e}")
|
||||
|
||||
char_id = f"char_{uuid.uuid4().hex[:8]}"
|
||||
character = CharacterSchema(
|
||||
id=char_id,
|
||||
name=request.name,
|
||||
appearance_description=request.appearance_description,
|
||||
clothing_description=request.clothing_description,
|
||||
distinctive_features=request.distinctive_features,
|
||||
appearance_description=appearance_desc,
|
||||
clothing_description=clothing_desc,
|
||||
distinctive_features=distinctive,
|
||||
character_type=request.character_type,
|
||||
reference_image=ref_image,
|
||||
)
|
||||
|
||||
_character_stores[storyboard_id][char_id] = character.model_dump()
|
||||
@@ -184,6 +230,75 @@ async def delete_character(
|
||||
return {"deleted": True}
|
||||
|
||||
|
||||
@router.post(
|
||||
"/characters/{storyboard_id}/analyze-image",
|
||||
response_model=CharacterAnalyzeResponse
|
||||
)
|
||||
async def analyze_character_image(
|
||||
storyboard_id: str = Path(..., description="Storyboard ID"),
|
||||
request: CharacterAnalyzeRequest = Body(...)
|
||||
):
|
||||
"""
|
||||
Analyze a character reference image using VLM
|
||||
|
||||
Extracts detailed appearance descriptions that can be used
|
||||
to maintain character consistency across frames.
|
||||
"""
|
||||
from pixelle_video.services.quality.character_analyzer import CharacterAnalyzer
|
||||
|
||||
logger.info(f"Analyzing character image for storyboard {storyboard_id}: {request.image_path}")
|
||||
|
||||
analyzer = CharacterAnalyzer()
|
||||
result = await analyzer.analyze_reference_image(request.image_path)
|
||||
|
||||
return CharacterAnalyzeResponse(
|
||||
appearance_description=result.appearance_description,
|
||||
clothing_description=result.clothing_description,
|
||||
distinctive_features=result.distinctive_features,
|
||||
prompt_description=result.to_prompt_description()
|
||||
)
|
||||
|
||||
|
||||
@router.post("/upload")
|
||||
async def upload_file(
|
||||
file: UploadFile = File(...),
|
||||
storyboard_id: str = Query(..., description="Storyboard ID"),
|
||||
type: str = Query("character", description="File type (character, reference)")
|
||||
):
|
||||
"""
|
||||
Upload a file for character reference or other purposes.
|
||||
|
||||
Returns the saved file path that can be used for analysis.
|
||||
"""
|
||||
try:
|
||||
# Create output directory
|
||||
output_dir = f"output/{storyboard_id}"
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Generate filename with timestamp
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
ext = os.path.splitext(file.filename)[1] or ".png"
|
||||
filename = f"{type}_{timestamp}{ext}"
|
||||
file_path = os.path.join(output_dir, filename)
|
||||
|
||||
# Save file
|
||||
with open(file_path, "wb") as buffer:
|
||||
content = await file.read()
|
||||
buffer.write(content)
|
||||
|
||||
logger.info(f"Uploaded file to: {file_path}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"path": file_path,
|
||||
"file_path": file_path,
|
||||
"filename": filename
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to upload file: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# ============================================================
|
||||
# Content Filter Endpoints
|
||||
# ============================================================
|
||||
@@ -234,8 +349,9 @@ async def extract_style(
|
||||
# Convert URL to file path if needed
|
||||
actual_path = image_path
|
||||
if image_path.startswith("http"):
|
||||
# Extract path from URL like http://localhost:8000/api/files/...
|
||||
actual_path = image_path.replace("http://localhost:8000/api/files/", "output/")
|
||||
# Extract path from URL (format: http://localhost:{port}/api/files/{relative_path})
|
||||
if "/api/files/" in image_path:
|
||||
actual_path = "output/" + image_path.split("/api/files/")[-1]
|
||||
|
||||
# Check if file exists
|
||||
import os
|
||||
@@ -254,9 +370,14 @@ async def extract_style(
|
||||
return style_schema
|
||||
|
||||
from pixelle_video.services.quality.style_guard import StyleGuard
|
||||
from api.dependencies import get_pixelle_video
|
||||
|
||||
style_guard = StyleGuard()
|
||||
anchor = style_guard.extract_style_anchor(actual_path)
|
||||
# Get LLM service for VLM-based style extraction
|
||||
pixelle_video = await get_pixelle_video()
|
||||
llm_service = pixelle_video.llm if pixelle_video else None
|
||||
|
||||
style_guard = StyleGuard(llm_service=llm_service)
|
||||
anchor = await style_guard.extract_style_anchor(actual_path)
|
||||
|
||||
style_schema = StyleAnchorSchema(
|
||||
color_palette=anchor.color_palette,
|
||||
|
||||
@@ -144,3 +144,12 @@ class ExportStatusResponse(BaseModel):
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class AlignPromptRequest(BaseModel):
|
||||
"""Request to align image prompt with narration"""
|
||||
narration: Optional[str] = Field(None, description="Override narration text")
|
||||
|
||||
|
||||
class AlignPromptResponse(BaseModel):
|
||||
"""Response after aligning prompt"""
|
||||
image_prompt: str
|
||||
success: bool = True
|
||||
|
||||
@@ -17,6 +17,20 @@ llm:
|
||||
# DeepSeek: base_url: "https://api.deepseek.com" model: "deepseek-chat"
|
||||
# Ollama (Local): base_url: "http://localhost:11434/v1" model: "llama3.2"
|
||||
|
||||
# ==================== VLM Configuration (Vision Language Model) ====================
|
||||
# Used for character analysis and image understanding
|
||||
# If not configured, will try to use LLM config with vision model auto-detection
|
||||
vlm:
|
||||
provider: "qwen" # Options: qwen, glm, openai
|
||||
api_key: "" # Leave empty to use DASHSCOPE_API_KEY or VLM_API_KEY env var
|
||||
base_url: "" # Leave empty for auto-detection based on provider
|
||||
model: "" # Leave empty for default model based on provider
|
||||
|
||||
# VLM Provider presets:
|
||||
# Qwen (通义千问): provider: "qwen" model: "qwen-vl-plus" or "qwen-vl-max" or "qwen3-vl-plus"
|
||||
# GLM (智谱): provider: "glm" model: "glm-4v-flash" or "glm-4v"
|
||||
# OpenAI: provider: "openai" model: "gpt-4-vision-preview" or "gpt-4o"
|
||||
|
||||
# ==================== ComfyUI Configuration ====================
|
||||
comfyui:
|
||||
# Global ComfyUI settings
|
||||
|
||||
6
dev.sh
6
dev.sh
@@ -54,7 +54,7 @@ print_banner() {
|
||||
|
||||
start_api() {
|
||||
echo -e "${GREEN}🚀 Starting FastAPI Backend...${NC}"
|
||||
uv run python api/app.py --port $API_PORT --reload &
|
||||
API_PORT=$API_PORT uv run python api/app.py --port $API_PORT --reload &
|
||||
echo $! > "$PID_DIR/api.pid"
|
||||
echo -e " ${GREEN}✓${NC} API running at: ${YELLOW}http://localhost:$API_PORT${NC}"
|
||||
echo -e " ${GREEN}✓${NC} API Docs at: ${YELLOW}http://localhost:$API_PORT/docs${NC}"
|
||||
@@ -63,7 +63,7 @@ start_api() {
|
||||
start_editor() {
|
||||
echo -e "${GREEN}🎬 Starting Next.js Editor...${NC}"
|
||||
cd "$PROJECT_ROOT/frontend"
|
||||
PORT=$EDITOR_PORT npm run dev &
|
||||
API_PORT=$API_PORT PORT=$EDITOR_PORT npm run dev &
|
||||
echo $! > "$PID_DIR/editor.pid"
|
||||
cd "$PROJECT_ROOT"
|
||||
echo -e " ${GREEN}✓${NC} Editor running at: ${YELLOW}http://localhost:$EDITOR_PORT${NC}"
|
||||
@@ -71,7 +71,7 @@ start_editor() {
|
||||
|
||||
start_web() {
|
||||
echo -e "${GREEN}🌐 Starting Streamlit Web UI...${NC}"
|
||||
uv run streamlit run web/app.py --server.port $WEB_PORT &
|
||||
API_PORT=$API_PORT EDITOR_PORT=$EDITOR_PORT uv run streamlit run web/app.py --server.port $WEB_PORT &
|
||||
echo $! > "$PID_DIR/web.pid"
|
||||
echo -e " ${GREEN}✓${NC} Web UI running at: ${YELLOW}http://localhost:$WEB_PORT${NC}"
|
||||
}
|
||||
|
||||
54
docs/port-configuration.md
Normal file
54
docs/port-configuration.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# 端口配置说明
|
||||
|
||||
## 默认端口
|
||||
|
||||
| 服务 | 默认端口 | 说明 |
|
||||
|------|---------|------|
|
||||
| FastAPI 后端 | 8000 | API 服务和文档 |
|
||||
| Next.js 编辑器 | 3000 | 时间轴编辑器 |
|
||||
| Streamlit Web UI | 8501 | Web 界面 |
|
||||
|
||||
## 自定义端口
|
||||
|
||||
### 方式 1: 环境变量(临时)
|
||||
|
||||
```bash
|
||||
# 自定义所有端口
|
||||
API_PORT=8080 EDITOR_PORT=3001 WEB_PORT=8502 ./dev.sh
|
||||
|
||||
# 只自定义部分端口
|
||||
API_PORT=8080 ./dev.sh
|
||||
```
|
||||
|
||||
### 方式 2: .env 文件(持久)
|
||||
|
||||
1. 复制示例配置文件:
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
2. 编辑 `.env` 文件,修改端口:
|
||||
```bash
|
||||
API_PORT=8080
|
||||
EDITOR_PORT=3001
|
||||
WEB_PORT=8502
|
||||
```
|
||||
|
||||
3. 启动服务:
|
||||
```bash
|
||||
./dev.sh
|
||||
```
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. **端口冲突**:确保选择的端口没有被其他程序占用
|
||||
2. **防火墙**:如需外部访问,请配置防火墙规则
|
||||
3. **前端重建**:修改端口后,Next.js 前端会自动重建(首次启动较慢)
|
||||
|
||||
## 验证端口配置
|
||||
|
||||
启动后访问以下地址确认服务正常:
|
||||
|
||||
- API 文档: `http://localhost:{API_PORT}/docs`
|
||||
- 编辑器: `http://localhost:{EDITOR_PORT}`
|
||||
- Web UI: `http://localhost:{WEB_PORT}`
|
||||
434
docs/工作流完整接入示例.md
Normal file
434
docs/工作流完整接入示例.md
Normal file
@@ -0,0 +1,434 @@
|
||||
# 工作流完整接入示例
|
||||
|
||||
# 📝 RunningHub AI 工作流交互使用手册(workflow 版本)
|
||||
|
||||
## 1. 功能概述
|
||||
|
||||
本脚本通过调用 RunningHub AI 平台的 OpenAPI,实现从本地加载工作流 JSON、修改节点信息、上传文件、提交任务并自动查询结果的全流程操作。
|
||||
|
||||
主要功能包括:
|
||||
|
||||
- 读取本地工作流配置(JSON 文件)
|
||||
- 生成可修改节点信息列表(nodeInfoList)
|
||||
- 根据节点类型(图片、文本等)修改节点值
|
||||
- 上传图片、音频、视频文件
|
||||
- 向 RunningHub 提交任务并实时查询状态
|
||||
- 输出最终生成结果的文件链接
|
||||
|
||||
✅ 适用于有自定义工作流(workflowId)的高级用户,可在不打开网页的情况下自动执行 AI 工作流。
|
||||
|
||||
---
|
||||
|
||||
## 2. 文件说明与主要函数
|
||||
|
||||
### 💡 主要文件
|
||||
|
||||
| 文件名 | 功能 |
|
||||
|-------------|------|
|
||||
| workflow.py | 主执行脚本 |
|
||||
| api.json | 从 RunningHub 下载的工作流配置文件(包含节点定义) |
|
||||
|
||||
### 🔧 核心函数介绍
|
||||
|
||||
| 函数名 | 功能描述 |
|
||||
|--------|----------|
|
||||
| load_json(file_path) | 从本地读取并解析工作流 JSON 文件 |
|
||||
| convert_to_node_info_list(data) | 将 JSON 格式转换为节点信息列表 |
|
||||
| upload_file(API_KEY, file_path) | 上传本地文件(image/audio/video)至 RunningHub |
|
||||
| submit_task(workflowId, node_info_list, API_KEY) | 提交任务,启动 AI 工作流执行 |
|
||||
| query_task_outputs(task_id, API_KEY) | 轮询任务执行状态并获取结果输出 |
|
||||
|
||||
---
|
||||
|
||||
## 3. 操作步骤详解
|
||||
|
||||
### Step 1️⃣:输入必要信息
|
||||
|
||||
运行脚本后,系统会提示输入以下信息:
|
||||
|
||||
```text
|
||||
请输入你的 api_key:
|
||||
```
|
||||
说明:在 RunningHub 控制台“API 调用”中可获得。
|
||||
示例:`0s2d1***********2n3mk4`
|
||||
```
|
||||
请输入 workflowId:
|
||||
```
|
||||
示例:`1980468315921559554`
|
||||
来源于链接末尾:https://www.runninghub.cn/workflow/1980237776367083521?source=workspace
|
||||
|
||||
然后输入本地工作流 JSON 文件路径:
|
||||
|
||||
```
|
||||
输入您的json文件地址(json文件一定要在自己的工作台中获得,获得途径为导出工作流api到本地):
|
||||
```
|
||||
示例:`C:\Users\Mayn\Downloads\api.json`
|
||||
|
||||
此时脚本会输出工作流中的所有节点信息:
|
||||
|
||||
```
|
||||
等待node_info_list生成(包含所有可修改的节点)
|
||||
{'3': {'inputs': {...}}, '4': {...}, '6': {...}, ...}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 2️⃣:查看并修改节点
|
||||
|
||||
脚本会提示:
|
||||
|
||||
```text
|
||||
请输入 nodeId(输入 'exit' 结束修改):
|
||||
```
|
||||
|
||||
输入节点 nodeId(如 10),脚本会展示该节点的所有字段:
|
||||
|
||||
```
|
||||
🧩 找到节点 10 的字段如下:
|
||||
(0, {'nodeId': '10', 'fieldName': 'image', 'fieldValue': 'xxx.jpg'})
|
||||
```
|
||||
|
||||
接着输入要修改的字段名:
|
||||
|
||||
```
|
||||
请输入要修改的 fieldName:
|
||||
```
|
||||
示例:`image`
|
||||
|
||||
---
|
||||
|
||||
### Step 3️⃣:修改字段值
|
||||
|
||||
#### 📷 如果是文件类型(image/audio/video)
|
||||
|
||||
```
|
||||
请输入您本地image文件路径:
|
||||
```
|
||||
示例输入:`D:\R.jpg`
|
||||
|
||||
上传成功后:
|
||||
|
||||
```
|
||||
等待文件上传中
|
||||
上传结果: {'code': 0, 'msg': 'success', 'data': {'fileName': 'api/xxx.jpg', 'fileType': 'input'}}
|
||||
✅ 已更新 image fieldValue: api/xxx.jpg
|
||||
```
|
||||
|
||||
#### 📝 如果是文本或数值类型
|
||||
|
||||
```
|
||||
请输入新的 fieldValue (text):
|
||||
```
|
||||
示例输入:`1 girl in classroom`
|
||||
|
||||
返回:
|
||||
|
||||
```
|
||||
✅ 已更新 fieldValue: 1 girl in classroom
|
||||
```
|
||||
|
||||
> 可多次修改不同节点,输入 `exit` 结束。
|
||||
|
||||
---
|
||||
|
||||
### Step 4️⃣:提交任务
|
||||
|
||||
输入完成后,脚本自动提交任务:
|
||||
|
||||
```
|
||||
开始提交任务,请等待
|
||||
📌 提交任务返回: {'code': 0, 'msg': 'success', 'data': {...}}
|
||||
📝 taskId: 1980471280073846785
|
||||
✅ 无节点错误,任务提交成功。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 5️⃣:任务状态轮询
|
||||
|
||||
脚本每隔 5 秒查询任务状态:
|
||||
|
||||
```
|
||||
⏳ 任务运行中...
|
||||
⏳ 任务运行中...
|
||||
🎉 生成结果完成!
|
||||
```
|
||||
|
||||
如果任务失败,会打印详细原因:
|
||||
|
||||
```
|
||||
❌ 任务失败!
|
||||
节点 SaveImage 失败原因: 'str' object has no attribute 'shape'
|
||||
Traceback: [...]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 6️⃣:查看结果文件
|
||||
|
||||
任务成功后会输出生成文件链接:
|
||||
|
||||
```
|
||||
🎉 生成结果完成!
|
||||
[{'fileUrl': 'https://rh-images.xiaoyaoyou.com/f24a6365b08fa3bc02f55cd1f63e74a7/output/ComfyUI_00001_hnqxe_1761016156.png',
|
||||
'fileType': 'png',
|
||||
'taskCostTime': '35',
|
||||
'nodeId': '17'}]
|
||||
✅ 任务完成!
|
||||
```
|
||||
|
||||
打开 `fileUrl` 即可查看 AI 生成的图片。
|
||||
|
||||
## 4. 完整运行流程概览
|
||||
|
||||
1️⃣ 输入 API_KEY 和 workflowId
|
||||
2️⃣ 加载本地 JSON 工作流
|
||||
3️⃣ 自动生成可修改节点列表
|
||||
4️⃣ 修改所需节点参数
|
||||
5️⃣ 上传文件(如图片)
|
||||
6️⃣ 提交任务至 RunningHub
|
||||
7️⃣ 轮询任务状态
|
||||
8️⃣ 获取并打印生成结果链接
|
||||
|
||||
---
|
||||
|
||||
## 5. 示例输出结果
|
||||
|
||||
```
|
||||
请输入你的 api_key: a0fada**************b2ke21
|
||||
请输入 workflowId: ***8315921559***
|
||||
输入您的json文件地址(json文件一定要在自己的工作台中获得,获得途径为导出工作流api到本地):C:\Users\Mayn\Downloads\api.json
|
||||
```
|
||||
```
|
||||
🧩 找到节点 10 的字段如下:
|
||||
(0, {'nodeId': '10', 'fieldName': 'image', 'fieldValue': 'xxx.jpg'})
|
||||
✅ 已更新 image fieldValue: api/xxx.jpg
|
||||
```
|
||||
```
|
||||
开始提交任务,请等待
|
||||
📌 提交任务返回: {...}
|
||||
⏳ 任务运行中...
|
||||
🎉 生成结果完成!
|
||||
✅ 任务完成!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. 小贴士(Tips)
|
||||
|
||||
- 建议使用 Python 3.8+
|
||||
- 脚本可直接在终端运行:
|
||||
|
||||
```bash
|
||||
python workflow.py
|
||||
```
|
||||
|
||||
- Windows 用户注意文件路径需使用双反斜杠 `\\`
|
||||
- 若使用代理或云主机,请确保端口 443 可访问 `www.runninghub.cn`
|
||||
|
||||
```python
|
||||
import http.client
|
||||
import json
|
||||
import mimetypes
|
||||
from codecs import encode
|
||||
import time
|
||||
import os
|
||||
import requests
|
||||
API_HOST = "www.runninghub.cn"
|
||||
def load_json(file_path):
|
||||
# 打开并读取 JSON 文件
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f) # 将 JSON 内容解析为 Python 对象(dict 或 list)
|
||||
# 打印读取到的数据
|
||||
print(data)
|
||||
return data
|
||||
def convert_to_node_info_list(data):
|
||||
node_info_list = []
|
||||
|
||||
for node_id, node_content in data.items():
|
||||
inputs = node_content.get("inputs", {})
|
||||
for field_name, field_value in inputs.items():
|
||||
# 如果 field_value 是列表或字典,可以选择转换成字符串
|
||||
if isinstance(field_value, (list, dict)):
|
||||
field_value = json.dumps(field_value)
|
||||
else:
|
||||
field_value = str(field_value)
|
||||
|
||||
node_info_list.append({
|
||||
"nodeId": str(node_id),
|
||||
"fieldName": str(field_name),
|
||||
"fieldValue": field_value
|
||||
})
|
||||
return node_info_list
|
||||
def upload_file(API_KEY, file_path):
|
||||
"""
|
||||
上传文件到 RunningHub 平台
|
||||
"""
|
||||
url = "https://www.runninghub.cn/task/openapi/upload"
|
||||
headers = {
|
||||
'Host': 'www.runninghub.cn'
|
||||
}
|
||||
data = {
|
||||
'apiKey': API_KEY,
|
||||
'fileType': 'input'
|
||||
}
|
||||
with open(file_path, 'rb') as f:
|
||||
files = {'file': f}
|
||||
response = requests.post(url, headers=headers, files=files, data=data)
|
||||
return response.json()
|
||||
# 1️⃣ 提交任务
|
||||
def submit_task(workflowId, node_info_list,API_KEY):
|
||||
conn = http.client.HTTPSConnection("www.runninghub.cn")
|
||||
payload = json.dumps({
|
||||
"apiKey": API_KEY,
|
||||
"workflowId": workflowId,
|
||||
"nodeInfoList": node_info_list
|
||||
})
|
||||
headers = {
|
||||
'Host': 'www.runninghub.cn',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
conn.request("POST", "/task/openapi/create", payload, headers)
|
||||
res = conn.getresponse()
|
||||
data = res.read()
|
||||
# ✅ 注意这里:用 json.loads 而不是 json.load
|
||||
data = json.loads(data.decode("utf-8"))
|
||||
print(data)
|
||||
return data
|
||||
def query_task_outputs(task_id,API_KEY):
|
||||
conn = http.client.HTTPSConnection(API_HOST)
|
||||
payload = json.dumps({
|
||||
"apiKey": API_KEY,
|
||||
"taskId": task_id
|
||||
})
|
||||
headers = {
|
||||
'Host': API_HOST,
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
conn.request("POST", "/task/openapi/outputs", payload, headers)
|
||||
res = conn.getresponse()
|
||||
data = json.loads(res.read().decode("utf-8"))
|
||||
conn.close()
|
||||
return data
|
||||
if __name__ == "__main__":
|
||||
print("下面两个输入用于获得AI工作流所需要的信息,api_key为用户的密钥从api调用——进入控制台中获得,workflowId(此为示例,具体的workflowId为你所选择的AI工作流界面上方的链接https://www.runninghub.cn/workflow/1980237776367083521?source=workspace,最后的数字为workflowId)")
|
||||
Api_key = input("请输入你的 api_key: ").strip()
|
||||
workflowId = input("请输入 workflowId: ").strip()
|
||||
print("请您下载您的工作流API json到本地")
|
||||
file_path = input("输入您的json文件地址(json文件一定要在自己的工作台中获得,获得途径为导出工作流api到本地):").strip()
|
||||
print("等待node_info_list生成(包涵所有的可以修改的node节点)")
|
||||
data = load_json(file_path)
|
||||
node_info_list = convert_to_node_info_list(data)
|
||||
print(node_info_list)
|
||||
print("下面用户可以输入工作流可以修改的节点id:nodeId,以及对应的fileName,锁定具体的节点位置,在找到具体位置之后,输入您需要修改的fileValue信息完成信息的修改用户发送AI工作流请求")
|
||||
modified_nodes = []
|
||||
while True:
|
||||
node_id_input = input("请输入 nodeId(输入 'exit' 结束修改): ").strip()
|
||||
if node_id_input.lower() == "exit":
|
||||
break
|
||||
|
||||
# 找出该 nodeId 对应的所有字段
|
||||
node_fields = [n for n in node_info_list if n['nodeId'] == node_id_input]
|
||||
|
||||
if not node_fields:
|
||||
print("❌ 未找到该 nodeId 对应的节点")
|
||||
continue
|
||||
|
||||
print(f"\n🧩 找到节点 {node_id_input} 的字段如下:")
|
||||
for field in enumerate(node_fields):
|
||||
print(field)
|
||||
|
||||
# 让用户选择要修改的字段
|
||||
field_name_input = input("\n请输入要修改的 fieldName: ").strip()
|
||||
target_node = next(
|
||||
(f for f in node_fields if f['fieldName'] == field_name_input), None
|
||||
)
|
||||
|
||||
if not target_node:
|
||||
print("❌ 未找到该 fieldName")
|
||||
continue
|
||||
|
||||
print(f"选中字段: {target_node}")
|
||||
# 根据类型处理
|
||||
if target_node['fieldName'] in ["image", "audio", "video"]:
|
||||
file_path = input(f"请输入您本地{target_node['fieldName']}文件路径: ").strip()
|
||||
print("等待文件上传中")
|
||||
upload_result = upload_file(Api_key, file_path)
|
||||
print("上传结果:", upload_result)
|
||||
# 假设 upload_file 已返回解析后的 JSON 字典
|
||||
if upload_result and upload_result.get("msg") == "success":
|
||||
uploaded_file_name = upload_result.get("data", {}).get("fileName")
|
||||
if uploaded_file_name:
|
||||
target_node['fieldValue'] = uploaded_file_name
|
||||
print(f"✅ 已更新 {target_node['fieldName']} fieldValue:", uploaded_file_name)
|
||||
else:
|
||||
print("❌ 上传失败或返回格式异常:", upload_result)
|
||||
else:
|
||||
# 其他类型直接修改
|
||||
new_value = input(f"请输入新的 fieldValue ({target_node['fieldName']}): ").strip()
|
||||
target_node['fieldValue'] = new_value
|
||||
print("✅ 已更新 fieldValue:", new_value)
|
||||
modified_nodes.append({
|
||||
"nodeId": target_node['nodeId'],
|
||||
"fieldName": target_node['fieldName'],
|
||||
"fieldValue": target_node['fieldValue']
|
||||
})
|
||||
print(modified_nodes)
|
||||
print("开始提交任务,请等待")
|
||||
# 提交任务
|
||||
submit_result = submit_task(workflowId, modified_nodes,Api_key)
|
||||
print("📌 提交任务返回:", submit_result)
|
||||
if submit_result.get("code") != 0:
|
||||
print("❌ 提交任务失败:", submit_result)
|
||||
exit()
|
||||
task_id = submit_result["data"]["taskId"]
|
||||
print(f"📝 taskId: {task_id}")
|
||||
# 解析成功返回
|
||||
prompt_tips_str = submit_result["data"].get("promptTips")
|
||||
if prompt_tips_str:
|
||||
try:
|
||||
prompt_tips = json.loads(prompt_tips_str)
|
||||
node_errors = prompt_tips.get("node_errors", {})
|
||||
if node_errors:
|
||||
print("⚠️ 节点错误信息如下:")
|
||||
for node_id, err in node_errors.items():
|
||||
print(f" 节点 {node_id} 错误: {err}")
|
||||
else:
|
||||
print("✅ 无节点错误,任务提交成功。")
|
||||
except Exception as e:
|
||||
print("⚠️ 无法解析 promptTips:", e)
|
||||
else:
|
||||
print("⚠️ 未返回 promptTips 字段。")
|
||||
timeout = 600
|
||||
start_time = time.time()
|
||||
while True:
|
||||
outputs_result = query_task_outputs(task_id, Api_key)
|
||||
code = outputs_result.get("code")
|
||||
msg = outputs_result.get("msg")
|
||||
data = outputs_result.get("data")
|
||||
if code == 0 and data: # 成功
|
||||
file_url = data[0].get("fileUrl")
|
||||
print("🎉 生成结果完成!")
|
||||
print(data)
|
||||
break
|
||||
elif code == 805: # 任务失败
|
||||
failed_reason = data.get("failedReason") if data else None
|
||||
print("❌ 任务失败!")
|
||||
if failed_reason:
|
||||
print(f"节点 {failed_reason.get('node_name')} 失败原因: {failed_reason.get('exception_message')}")
|
||||
print("Traceback:", failed_reason.get("traceback"))
|
||||
else:
|
||||
print(outputs_result)
|
||||
break
|
||||
elif code == 804 or code == 813: # 运行中或排队中
|
||||
status_text = "运行中" if code == 804 else "排队中"
|
||||
print(f"⏳ 任务{status_text}...")
|
||||
else:
|
||||
print("⚠️ 未知状态:", outputs_result)
|
||||
# 超时检查
|
||||
if time.time() - start_time > timeout:
|
||||
print("⏰ 等待超时(超过10分钟),任务未完成。")
|
||||
break
|
||||
time.sleep(5)
|
||||
print("✅ 任务完成!")
|
||||
```
|
||||
@@ -2,10 +2,11 @@ import type { NextConfig } from "next";
|
||||
|
||||
const nextConfig: NextConfig = {
|
||||
async rewrites() {
|
||||
const apiPort = process.env.API_PORT || '8000';
|
||||
return [
|
||||
{
|
||||
source: '/api/:path*',
|
||||
destination: 'http://localhost:8000/api/:path*',
|
||||
destination: `http://localhost:${apiPort}/api/:path*`,
|
||||
},
|
||||
]
|
||||
},
|
||||
|
||||
@@ -74,6 +74,9 @@ export default function EditorPage() {
|
||||
const [exportDownloadUrl, setExportDownloadUrl] = useState<string | null>(null)
|
||||
const [exportError, setExportError] = useState<string | null>(null)
|
||||
|
||||
// Save all state
|
||||
const [isSavingAll, setIsSavingAll] = useState(false)
|
||||
|
||||
useEffect(() => {
|
||||
async function loadStoryboard() {
|
||||
// Get storyboard_id from URL, default to demo-1
|
||||
@@ -160,12 +163,39 @@ export default function EditorPage() {
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-2">
|
||||
<Button variant="ghost" size="sm">
|
||||
<Button variant="ghost" size="sm" onClick={() => {
|
||||
console.log('[SETTINGS] Settings clicked - not implemented yet')
|
||||
alert('设置功能开发中...')
|
||||
}}>
|
||||
<Settings className="h-4 w-4 mr-2" />
|
||||
设置
|
||||
</Button>
|
||||
<Button variant="ghost" size="sm">
|
||||
<Save className="h-4 w-4 mr-2" />
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
disabled={isSavingAll}
|
||||
onClick={async () => {
|
||||
if (!storyboard) return
|
||||
console.log('[SAVE-ALL] Starting save all frames...')
|
||||
setIsSavingAll(true)
|
||||
try {
|
||||
// Save is handled automatically by updateFrame during edits
|
||||
// This button confirms the current state is synced
|
||||
console.log('[SAVE-ALL] Current storyboard state:', storyboard.frames.length, 'frames')
|
||||
alert('当前状态已保存!\n\n提示:编辑分镜后点击右侧「保存」按钮可保存单个分镜的修改。')
|
||||
} catch (err: any) {
|
||||
console.error('[SAVE-ALL] Error:', err)
|
||||
alert('保存失败: ' + err.message)
|
||||
} finally {
|
||||
setIsSavingAll(false)
|
||||
}
|
||||
}}
|
||||
>
|
||||
{isSavingAll ? (
|
||||
<Loader2 className="h-4 w-4 mr-2 animate-spin" />
|
||||
) : (
|
||||
<Save className="h-4 w-4 mr-2" />
|
||||
)}
|
||||
保存
|
||||
</Button>
|
||||
<ExportButton
|
||||
@@ -229,6 +259,7 @@ function SelectedFrameDetails() {
|
||||
const [isSaving, setIsSaving] = useState(false)
|
||||
const [isRegeneratingImage, setIsRegeneratingImage] = useState(false)
|
||||
const [isRegeneratingAudio, setIsRegeneratingAudio] = useState(false)
|
||||
const [isAligningPrompt, setIsAligningPrompt] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
// Update local state when frame changes
|
||||
@@ -250,14 +281,19 @@ function SelectedFrameDetails() {
|
||||
const handleSave = async () => {
|
||||
if (!storyboard || !selectedFrame) return
|
||||
|
||||
console.log('[SAVE] Starting save for frame:', selectedFrame.id)
|
||||
console.log('[SAVE] Narration:', narration)
|
||||
console.log('[SAVE] Image Prompt:', imagePrompt?.slice(0, 50))
|
||||
|
||||
setIsSaving(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
await editorApi.updateFrame(storyboard.id, selectedFrame.id, {
|
||||
const result = await editorApi.updateFrame(storyboard.id, selectedFrame.id, {
|
||||
narration,
|
||||
image_prompt: imagePrompt,
|
||||
})
|
||||
console.log('[SAVE] API response:', result)
|
||||
|
||||
// Update local store
|
||||
updateFrame(selectedFrame.id, {
|
||||
@@ -265,8 +301,10 @@ function SelectedFrameDetails() {
|
||||
imagePrompt,
|
||||
})
|
||||
|
||||
console.log('[SAVE] Success!')
|
||||
setIsEditing(false)
|
||||
} catch (err: any) {
|
||||
console.error('[SAVE] Error:', err)
|
||||
setError(err.message || '保存失败')
|
||||
} finally {
|
||||
setIsSaving(false)
|
||||
@@ -276,6 +314,9 @@ function SelectedFrameDetails() {
|
||||
const handleRegenerateImage = async () => {
|
||||
if (!storyboard || !selectedFrame) return
|
||||
|
||||
console.log('[REGEN-IMG] Starting regenerate image for frame:', selectedFrame.id)
|
||||
console.log('[REGEN-IMG] Image prompt:', imagePrompt?.slice(0, 80))
|
||||
|
||||
setIsRegeneratingImage(true)
|
||||
setError(null)
|
||||
|
||||
@@ -285,12 +326,15 @@ function SelectedFrameDetails() {
|
||||
selectedFrame.id,
|
||||
imagePrompt
|
||||
)
|
||||
console.log('[REGEN-IMG] API response:', result)
|
||||
|
||||
// Update local store with new image path
|
||||
updateFrame(selectedFrame.id, {
|
||||
imagePath: result.image_path,
|
||||
})
|
||||
console.log('[REGEN-IMG] Success! New image path:', result.image_path)
|
||||
} catch (err: any) {
|
||||
console.error('[REGEN-IMG] Error:', err)
|
||||
setError(err.message || '重新生成图片失败')
|
||||
} finally {
|
||||
setIsRegeneratingImage(false)
|
||||
@@ -322,6 +366,31 @@ function SelectedFrameDetails() {
|
||||
}
|
||||
}
|
||||
|
||||
const handleAlignPrompt = async () => {
|
||||
if (!storyboard || !selectedFrame) return
|
||||
|
||||
setIsAligningPrompt(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const result = await editorApi.alignPrompt(
|
||||
storyboard.id,
|
||||
selectedFrame.id,
|
||||
narration || selectedFrame.narration
|
||||
)
|
||||
|
||||
// Update local store with new image prompt
|
||||
updateFrame(selectedFrame.id, {
|
||||
imagePrompt: result.image_prompt,
|
||||
})
|
||||
setImagePrompt(result.image_prompt)
|
||||
} catch (err: any) {
|
||||
setError(err.message || '对齐提示词失败')
|
||||
} finally {
|
||||
setIsAligningPrompt(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{error && (
|
||||
@@ -434,6 +503,18 @@ function SelectedFrameDetails() {
|
||||
) : null}
|
||||
重新生成音频
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="outline"
|
||||
className="w-full"
|
||||
onClick={handleAlignPrompt}
|
||||
disabled={isAligningPrompt}
|
||||
>
|
||||
{isAligningPrompt ? (
|
||||
<Loader2 className="h-4 w-4 animate-spin mr-2" />
|
||||
) : null}
|
||||
对齐提示词
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -88,6 +88,23 @@ export function PreviewPlayer() {
|
||||
|
||||
const selectedFrame = storyboard?.frames.find((f) => f.id === selectedFrameId)
|
||||
|
||||
// Sync currentTime when user selects a frame (not during playback)
|
||||
useEffect(() => {
|
||||
if (isPlaying || !storyboard?.frames.length || !selectedFrameId) return
|
||||
|
||||
// Calculate start time of selected frame
|
||||
let startTime = 0
|
||||
for (const frame of storyboard.frames) {
|
||||
if (frame.id === selectedFrameId) break
|
||||
startTime += frame.duration
|
||||
}
|
||||
|
||||
// Only update if different (avoid infinite loop)
|
||||
if (Math.abs(currentTime - startTime) > 0.1) {
|
||||
setCurrentTime(startTime)
|
||||
}
|
||||
}, [selectedFrameId, isPlaying])
|
||||
|
||||
// Audio playback sync
|
||||
useEffect(() => {
|
||||
if (!audioRef.current) return
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useEffect } from 'react'
|
||||
import { useState, useEffect, useRef } from 'react'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Plus, User, Trash2, Edit, Loader2 } from 'lucide-react'
|
||||
import { Plus, User, Trash2, Image, Loader2, Wand2 } from 'lucide-react'
|
||||
import { qualityApi, type Character } from '@/services/quality-api'
|
||||
|
||||
interface CharacterPanelProps {
|
||||
@@ -13,12 +13,14 @@ export function CharacterPanel({ storyboardId }: CharacterPanelProps) {
|
||||
const [characters, setCharacters] = useState<Character[]>([])
|
||||
const [isLoading, setIsLoading] = useState(false)
|
||||
const [isAdding, setIsAdding] = useState(false)
|
||||
const [editingId, setEditingId] = useState<string | null>(null)
|
||||
const [isAnalyzing, setIsAnalyzing] = useState(false)
|
||||
const fileInputRef = useRef<HTMLInputElement>(null)
|
||||
|
||||
// Form state
|
||||
const [name, setName] = useState('')
|
||||
const [appearance, setAppearance] = useState('')
|
||||
const [clothing, setClothing] = useState('')
|
||||
const [refImagePath, setRefImagePath] = useState('')
|
||||
|
||||
useEffect(() => {
|
||||
loadCharacters()
|
||||
@@ -36,8 +38,70 @@ export function CharacterPanel({ storyboardId }: CharacterPanelProps) {
|
||||
}
|
||||
}
|
||||
|
||||
const handleAnalyzeImage = async () => {
|
||||
if (!refImagePath) {
|
||||
alert('请先上传参考图片')
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
setIsAnalyzing(true)
|
||||
const result = await qualityApi.analyzeCharacterImage(storyboardId, refImagePath)
|
||||
|
||||
// Auto-fill form with VLM results
|
||||
if (result.appearance_description) {
|
||||
setAppearance(result.appearance_description)
|
||||
}
|
||||
if (result.clothing_description) {
|
||||
setClothing(result.clothing_description)
|
||||
}
|
||||
|
||||
console.log('[CHARACTER] VLM analysis result:', result)
|
||||
} catch (e) {
|
||||
console.error('Failed to analyze image:', e)
|
||||
alert('图片分析失败,请重试')
|
||||
} finally {
|
||||
setIsAnalyzing(false)
|
||||
}
|
||||
}
|
||||
|
||||
const handleImageUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const file = e.target.files?.[0]
|
||||
if (!file) return
|
||||
|
||||
try {
|
||||
// Upload to server - use quality API endpoint
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
|
||||
const apiBase = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000/api'
|
||||
const response = await fetch(`${apiBase}/quality/upload?storyboard_id=${storyboardId}&type=character`, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
if (response.ok) {
|
||||
const data = await response.json()
|
||||
setRefImagePath(data.path || data.file_path)
|
||||
console.log('[CHARACTER] Image uploaded:', data.path)
|
||||
} else {
|
||||
// Fallback: use local file path for demo
|
||||
const localPath = `output/${storyboardId}/character_${Date.now()}.png`
|
||||
setRefImagePath(localPath)
|
||||
console.log('[CHARACTER] Using fallback path:', localPath)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to upload image:', e)
|
||||
// Fallback for demo
|
||||
setRefImagePath(`output/${storyboardId}/character_ref.png`)
|
||||
}
|
||||
}
|
||||
|
||||
const handleAdd = async () => {
|
||||
if (!name.trim()) return
|
||||
if (!name.trim()) {
|
||||
alert('请输入角色名称')
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
const newChar = await qualityApi.createCharacter(storyboardId, {
|
||||
@@ -46,6 +110,7 @@ export function CharacterPanel({ storyboardId }: CharacterPanelProps) {
|
||||
clothing_description: clothing,
|
||||
distinctive_features: [],
|
||||
character_type: 'person',
|
||||
reference_image_path: refImagePath || undefined,
|
||||
})
|
||||
setCharacters([...characters, newChar])
|
||||
resetForm()
|
||||
@@ -67,8 +132,8 @@ export function CharacterPanel({ storyboardId }: CharacterPanelProps) {
|
||||
setName('')
|
||||
setAppearance('')
|
||||
setClothing('')
|
||||
setRefImagePath('')
|
||||
setIsAdding(false)
|
||||
setEditingId(null)
|
||||
}
|
||||
|
||||
return (
|
||||
@@ -98,10 +163,10 @@ export function CharacterPanel({ storyboardId }: CharacterPanelProps) {
|
||||
key={char.id}
|
||||
className="flex items-center justify-between p-2 bg-muted/50 rounded text-sm"
|
||||
>
|
||||
<div>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="font-medium">{char.name}</div>
|
||||
{char.appearance_description && (
|
||||
<div className="text-xs text-muted-foreground truncate max-w-[150px]">
|
||||
<div className="text-xs text-muted-foreground truncate">
|
||||
{char.appearance_description}
|
||||
</div>
|
||||
)}
|
||||
@@ -109,7 +174,7 @@ export function CharacterPanel({ storyboardId }: CharacterPanelProps) {
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
className="h-6 w-6"
|
||||
className="h-6 w-6 shrink-0"
|
||||
onClick={() => handleDelete(char.id)}
|
||||
>
|
||||
<Trash2 className="h-3 w-3 text-destructive" />
|
||||
@@ -119,7 +184,7 @@ export function CharacterPanel({ storyboardId }: CharacterPanelProps) {
|
||||
|
||||
{characters.length === 0 && !isAdding && (
|
||||
<p className="text-xs text-muted-foreground text-center py-2">
|
||||
暂无角色
|
||||
暂无角色,点击 + 添加
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
@@ -127,19 +192,54 @@ export function CharacterPanel({ storyboardId }: CharacterPanelProps) {
|
||||
|
||||
{isAdding && (
|
||||
<div className="space-y-2 pt-2 border-t">
|
||||
{/* Reference Image Upload */}
|
||||
<div className="flex items-center gap-2">
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
accept="image/*"
|
||||
onChange={handleImageUpload}
|
||||
className="hidden"
|
||||
/>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="flex-1"
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
>
|
||||
<Image className="h-4 w-4 mr-2" />
|
||||
{refImagePath ? '已上传参考图' : '上传参考图'}
|
||||
</Button>
|
||||
<Button
|
||||
variant="secondary"
|
||||
size="sm"
|
||||
onClick={handleAnalyzeImage}
|
||||
disabled={!refImagePath || isAnalyzing}
|
||||
>
|
||||
{isAnalyzing ? (
|
||||
<Loader2 className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<>
|
||||
<Wand2 className="h-4 w-4 mr-1" />
|
||||
分析
|
||||
</>
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<input
|
||||
type="text"
|
||||
value={name}
|
||||
onChange={(e) => setName(e.target.value)}
|
||||
placeholder="角色名称"
|
||||
placeholder="角色名称 *"
|
||||
className="w-full p-2 text-sm border rounded bg-background"
|
||||
/>
|
||||
<input
|
||||
type="text"
|
||||
<textarea
|
||||
value={appearance}
|
||||
onChange={(e) => setAppearance(e.target.value)}
|
||||
placeholder="外貌描述"
|
||||
className="w-full p-2 text-sm border rounded bg-background"
|
||||
placeholder="外貌描述(可通过分析自动生成)"
|
||||
rows={2}
|
||||
className="w-full p-2 text-sm border rounded bg-background resize-none"
|
||||
/>
|
||||
<input
|
||||
type="text"
|
||||
|
||||
@@ -197,6 +197,31 @@ class EditorApiClient {
|
||||
return response.json()
|
||||
}
|
||||
|
||||
/**
|
||||
* Align image prompt with narration - regenerate prompt based on narration
|
||||
*/
|
||||
async alignPrompt(
|
||||
storyboardId: string,
|
||||
frameId: string,
|
||||
narration?: string
|
||||
): Promise<{ image_prompt: string; success: boolean }> {
|
||||
const response = await fetch(
|
||||
`${this.baseUrl}/editor/storyboard/${storyboardId}/frames/${frameId}/align-prompt`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ narration }),
|
||||
}
|
||||
)
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: response.statusText }))
|
||||
throw new Error(error.detail || `Failed to align prompt: ${response.statusText}`)
|
||||
}
|
||||
|
||||
return response.json()
|
||||
}
|
||||
|
||||
/**
|
||||
* Inpaint (局部重绘) image for a frame
|
||||
*/
|
||||
|
||||
@@ -20,6 +20,22 @@ export interface Character {
|
||||
reference_image?: string
|
||||
}
|
||||
|
||||
export interface CharacterAnalysisResult {
|
||||
appearance_description: string
|
||||
clothing_description: string
|
||||
distinctive_features: string[]
|
||||
prompt_description: string
|
||||
}
|
||||
|
||||
export interface CharacterCreateData {
|
||||
name: string
|
||||
appearance_description: string
|
||||
clothing_description: string
|
||||
distinctive_features: string[]
|
||||
character_type: string
|
||||
reference_image_path?: string
|
||||
}
|
||||
|
||||
export interface ContentCheckResult {
|
||||
passed: boolean
|
||||
category: 'safe' | 'sensitive' | 'blocked'
|
||||
@@ -65,7 +81,7 @@ class QualityApiClient {
|
||||
|
||||
async createCharacter(
|
||||
storyboardId: string,
|
||||
data: Omit<Character, 'id'>
|
||||
data: CharacterCreateData
|
||||
): Promise<Character> {
|
||||
const response = await fetch(
|
||||
`${this.baseUrl}/quality/characters/${storyboardId}`,
|
||||
@@ -110,6 +126,24 @@ class QualityApiClient {
|
||||
}
|
||||
}
|
||||
|
||||
async analyzeCharacterImage(
|
||||
storyboardId: string,
|
||||
imagePath: string
|
||||
): Promise<CharacterAnalysisResult> {
|
||||
const response = await fetch(
|
||||
`${this.baseUrl}/quality/characters/${storyboardId}/analyze-image`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ image_path: imagePath }),
|
||||
}
|
||||
)
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to analyze character image')
|
||||
}
|
||||
return response.json()
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Content Filter
|
||||
// ============================================================
|
||||
|
||||
@@ -29,7 +29,7 @@ Usage:
|
||||
if config_manager.validate():
|
||||
print("Config is valid!")
|
||||
"""
|
||||
from .schema import PixelleVideoConfig, LLMConfig, ComfyUIConfig, TTSSubConfig, ImageSubConfig, VideoSubConfig
|
||||
from .schema import PixelleVideoConfig, LLMConfig, VLMConfig, ComfyUIConfig, TTSSubConfig, ImageSubConfig, VideoSubConfig
|
||||
from .manager import ConfigManager
|
||||
from .loader import load_config_dict, save_config_dict
|
||||
|
||||
@@ -38,7 +38,8 @@ config_manager = ConfigManager()
|
||||
|
||||
__all__ = [
|
||||
"PixelleVideoConfig",
|
||||
"LLMConfig",
|
||||
"LLMConfig",
|
||||
"VLMConfig",
|
||||
"ComfyUIConfig",
|
||||
"TTSSubConfig",
|
||||
"ImageSubConfig",
|
||||
|
||||
@@ -26,6 +26,14 @@ class LLMConfig(BaseModel):
|
||||
model: str = Field(default="", description="LLM Model Name")
|
||||
|
||||
|
||||
class VLMConfig(BaseModel):
|
||||
"""VLM (Vision Language Model) configuration for character analysis"""
|
||||
provider: str = Field(default="qwen", description="VLM provider: qwen, glm, openai")
|
||||
api_key: str = Field(default="", description="VLM API Key")
|
||||
base_url: str = Field(default="", description="VLM API Base URL (auto-detected if empty)")
|
||||
model: str = Field(default="", description="VLM Model Name (defaults based on provider)")
|
||||
|
||||
|
||||
class TTSLocalConfig(BaseModel):
|
||||
"""Local TTS configuration (Edge TTS)"""
|
||||
voice: str = Field(default="zh-CN-YunjianNeural", description="Edge TTS voice ID")
|
||||
@@ -92,6 +100,7 @@ class PixelleVideoConfig(BaseModel):
|
||||
"""Pixelle-Video main configuration"""
|
||||
project_name: str = Field(default="Pixelle-Video", description="Project name")
|
||||
llm: LLMConfig = Field(default_factory=LLMConfig)
|
||||
vlm: VLMConfig = Field(default_factory=VLMConfig)
|
||||
comfyui: ComfyUIConfig = Field(default_factory=ComfyUIConfig)
|
||||
template: TemplateConfig = Field(default_factory=TemplateConfig)
|
||||
|
||||
|
||||
@@ -124,7 +124,13 @@ class StandardPipeline(LinearVideoPipeline):
|
||||
else: # fixed
|
||||
self._report_progress(ctx.progress_callback, "splitting_script", 0.05)
|
||||
split_mode = ctx.params.get("split_mode", "paragraph")
|
||||
ctx.narrations = await split_narration_script(text, split_mode=split_mode)
|
||||
target_segments = ctx.params.get("target_segments", 8)
|
||||
ctx.narrations = await split_narration_script(
|
||||
text,
|
||||
split_mode=split_mode,
|
||||
llm_service=self.llm if split_mode == "smart" else None,
|
||||
target_segments=target_segments
|
||||
)
|
||||
logger.info(f"✅ Split script into {len(ctx.narrations)} segments (mode={split_mode})")
|
||||
logger.info(f" Note: n_scenes={n_scenes} is ignored in fixed mode")
|
||||
|
||||
@@ -495,11 +501,26 @@ class StandardPipeline(LinearVideoPipeline):
|
||||
logger.warning("No task_id in storyboard, skipping persistence")
|
||||
return
|
||||
|
||||
# Build metadata
|
||||
input_with_title = ctx.params.copy()
|
||||
input_with_title["text"] = ctx.input_text # Ensure text is included
|
||||
if not input_with_title.get("title"):
|
||||
input_with_title["title"] = storyboard.title
|
||||
# Build metadata - filter out non-serializable objects
|
||||
clean_input = {}
|
||||
for key, value in ctx.params.items():
|
||||
# Skip non-serializable objects like CharacterMemory
|
||||
if key == "character_memory":
|
||||
# Convert to serializable dict if present
|
||||
if value is not None and hasattr(value, 'to_dict'):
|
||||
clean_input["character_memory"] = value.to_dict()
|
||||
elif key == "progress_callback":
|
||||
# Skip callback functions
|
||||
continue
|
||||
elif callable(value):
|
||||
# Skip any callable objects
|
||||
continue
|
||||
else:
|
||||
clean_input[key] = value
|
||||
|
||||
clean_input["text"] = ctx.input_text # Ensure text is included
|
||||
if not clean_input.get("title"):
|
||||
clean_input["title"] = storyboard.title
|
||||
|
||||
metadata = {
|
||||
"task_id": task_id,
|
||||
@@ -507,7 +528,7 @@ class StandardPipeline(LinearVideoPipeline):
|
||||
"completed_at": storyboard.completed_at.isoformat() if storyboard.completed_at else None,
|
||||
"status": "completed",
|
||||
|
||||
"input": input_with_title,
|
||||
"input": clean_input,
|
||||
|
||||
"result": {
|
||||
"video_path": result.video_path,
|
||||
|
||||
@@ -29,6 +29,13 @@ from pixelle_video.prompts.image_generation import (
|
||||
)
|
||||
from pixelle_video.prompts.style_conversion import build_style_conversion_prompt
|
||||
|
||||
# Paragraph merging (two-step: analysis + grouping)
|
||||
from pixelle_video.prompts.paragraph_merging import (
|
||||
build_paragraph_analysis_prompt,
|
||||
build_paragraph_grouping_prompt,
|
||||
build_paragraph_merging_prompt, # Legacy support
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Narration builders
|
||||
@@ -40,6 +47,11 @@ __all__ = [
|
||||
"build_image_prompt_prompt",
|
||||
"build_style_conversion_prompt",
|
||||
|
||||
# Paragraph merging (two-step)
|
||||
"build_paragraph_analysis_prompt",
|
||||
"build_paragraph_grouping_prompt",
|
||||
"build_paragraph_merging_prompt", # Legacy
|
||||
|
||||
# Image style presets
|
||||
"IMAGE_STYLE_PRESETS",
|
||||
"DEFAULT_IMAGE_STYLE",
|
||||
|
||||
@@ -58,31 +58,37 @@ Based on the existing video script, create corresponding **English** image promp
|
||||
# Input Content
|
||||
{narrations_json}
|
||||
|
||||
# ⭐ Core Imagery Extraction (Critical for Relevance)
|
||||
For EACH narration, you MUST:
|
||||
1. **Extract 2-3 core visual imagery/metaphors** that best represent the narration's meaning
|
||||
2. **Identify the emotional tone** (hopeful, melancholic, inspiring, etc.)
|
||||
3. **Determine concrete visual elements** that embody these abstract concepts
|
||||
|
||||
Example thought process:
|
||||
- Narration: "给自己一个不设限的探索时间"
|
||||
- Core Imagery: exploration, freedom, open paths
|
||||
- Emotional Tone: hopeful, adventurous
|
||||
- Visual Elements: open road, person looking at horizon, map with unmarked routes
|
||||
|
||||
# Output Requirements
|
||||
|
||||
## Image Prompt Specifications
|
||||
- Language: **Must use English** (for AI image generation models)
|
||||
- Description structure: scene + character action + emotion + symbolic elements
|
||||
- Description length: Ensure clear, complete, and creative descriptions (recommended 50-100 English words)
|
||||
- **REQUIRED Structure**: [Core imagery] + [Scene description] + [Character action] + [Emotional atmosphere]
|
||||
- Description length: 50-100 English words
|
||||
- **The image prompt MUST directly reflect the extracted core imagery from the narration**
|
||||
|
||||
## Visual Creative Requirements
|
||||
- Each image must accurately reflect the specific content and emotion of the corresponding narration
|
||||
- Use symbolic techniques to visualize abstract concepts (e.g., use paths to represent life choices, chains to represent constraints, etc.)
|
||||
- **Prioritize core visual metaphors** - the main visual elements must embody the narration's key message
|
||||
- Use symbolic techniques to visualize abstract concepts (e.g., paths=choices, chains=constraints, open doors=opportunities)
|
||||
- Scenes should express rich emotions and actions to enhance visual impact
|
||||
- Highlight themes through composition and element arrangement, avoid overly literal representations
|
||||
|
||||
## Key English Vocabulary Reference
|
||||
- Symbolic elements: symbolic elements
|
||||
- Expression: expression / facial expression
|
||||
- Action: action / gesture / movement
|
||||
- Scene: scene / setting
|
||||
- Atmosphere: atmosphere / mood
|
||||
|
||||
## Visual and Copy Coordination Principles
|
||||
- Images should serve the copy, becoming a visual extension of the copy content
|
||||
- Avoid visual elements unrelated to or contradicting the copy content
|
||||
- Choose visual presentation methods that best enhance the persuasiveness of the copy
|
||||
- Ensure the audience can quickly understand the core viewpoint of the copy through images
|
||||
## Visual and Narration Coordination Principles (Most Important)
|
||||
- **Direct semantic connection**: The main visual elements MUST represent the narration's core meaning
|
||||
- **Avoid decorative scenes**: Don't add unrelated beautiful scenery that doesn't support the message
|
||||
- **Ask yourself**: If someone saw only the image, could they guess what the narration is about?
|
||||
- **Test question**: What is the ONE THING this narration is about? Make sure that thing is visible in the image.
|
||||
|
||||
## Creative Guidance
|
||||
1. **Phenomenon Description Copy**: Use intuitive scenes to represent social phenomena
|
||||
@@ -97,8 +103,8 @@ Strictly output in the following JSON format, **image prompts must be in English
|
||||
```json
|
||||
{{
|
||||
"image_prompts": [
|
||||
"[detailed English image prompt following the style requirements]",
|
||||
"[detailed English image prompt following the style requirements]"
|
||||
"[Core imagery visible] + [Scene with semantic connection to narration] + [Character/action reflecting the message] + [Emotional atmosphere]",
|
||||
"[Next image prompt following the same structure]"
|
||||
]
|
||||
}}
|
||||
```
|
||||
@@ -109,14 +115,15 @@ Strictly output in the following JSON format, **image prompts must be in English
|
||||
3. Input is {{"narrations": [narration array]}} format, output is {{"image_prompts": [image prompt array]}} format
|
||||
4. **The output image_prompts array must contain exactly {narrations_count} elements, corresponding one-to-one with the input narrations array**
|
||||
5. **Image prompts must use English** (for AI image generation models)
|
||||
6. Image prompts must accurately reflect the specific content and emotion of the corresponding narration
|
||||
7. Each image must be creative and visually impactful, avoid being monotonous
|
||||
8. Ensure visual scenes can enhance the persuasiveness of the copy and audience understanding
|
||||
6. **⭐ Most Critical: Each image prompt must have DIRECT semantic relevance to its narration**
|
||||
7. Before writing each prompt, mentally extract the core visual metaphor from the narration
|
||||
8. Verify: Could someone understand the narration's message from the image alone?
|
||||
|
||||
Now, please create {narrations_count} corresponding **English** image prompts for the above {narrations_count} narrations. Only output JSON, no other content.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
def build_image_prompt_prompt(
|
||||
narrations: List[str],
|
||||
min_words: int,
|
||||
|
||||
202
pixelle_video/prompts/paragraph_merging.py
Normal file
202
pixelle_video/prompts/paragraph_merging.py
Normal file
@@ -0,0 +1,202 @@
|
||||
# Copyright (C) 2025 AIDC-AI
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Paragraph merging prompt
|
||||
|
||||
For intelligently merging short paragraphs into longer segments suitable for video storyboards.
|
||||
Uses a two-step approach: first analyze, then group.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
|
||||
# Step 1: Analyze text and recommend segment count
|
||||
PARAGRAPH_ANALYSIS_PROMPT = """# 任务定义
|
||||
你是一个专业的视频分镜规划师。请分析以下文本,推荐最佳分镜数量。
|
||||
|
||||
# 核心任务
|
||||
分析文本结构,根据以下原则推荐分镜数量:
|
||||
|
||||
## 分析原则
|
||||
1. **语义边界**:识别场景切换、话题转换、情绪变化点
|
||||
2. **叙事完整性**:保持对话回合完整(问-答不拆分)
|
||||
3. **时长控制**:每个分镜语音时长建议 15-45 秒(约 60-180 字)
|
||||
4. **视觉多样性**:确保分镜之间有足够的画面变化
|
||||
|
||||
## 文本信息
|
||||
- 总段落数:{total_paragraphs}
|
||||
- 预估总字数:{total_chars} 字
|
||||
- 预估总时长:{estimated_duration} 秒
|
||||
|
||||
## 输入段落预览
|
||||
{paragraphs_preview}
|
||||
|
||||
# 输出格式
|
||||
返回 JSON 格式的分析结果:
|
||||
|
||||
```json
|
||||
{{
|
||||
"recommended_segments": 8,
|
||||
"reasoning": "文本包含开场设定、分手对话、争吵升级、离别等多个场景切换点...",
|
||||
"scene_boundaries": [
|
||||
{{"after_paragraph": 3, "reason": "场景从背景介绍转入对话"}},
|
||||
{{"after_paragraph": 7, "reason": "对话情绪升级"}},
|
||||
...
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
# 重要提醒
|
||||
1. recommended_segments 应该在 3-15 之间
|
||||
2. 每个分镜平均字数建议 80-200 字
|
||||
3. scene_boundaries 标记主要的场景切换点,用于后续分组参考
|
||||
4. 只输出 JSON,不要添加其他解释
|
||||
"""
|
||||
|
||||
|
||||
# Step 2: Group paragraphs based on analysis
|
||||
PARAGRAPH_GROUPING_PROMPT = """# 任务定义
|
||||
你是一个专业的文本分段专家。根据分析结果,将段落分组。
|
||||
|
||||
# 核心任务
|
||||
将 {total_paragraphs} 个段落(编号 0 到 {max_index})分成 **{target_segments}** 个分组。
|
||||
|
||||
# 分析建议
|
||||
{analysis_hint}
|
||||
|
||||
# 分组原则
|
||||
1. **语义关联**:将描述同一场景、同一对话回合的段落放在一起
|
||||
2. **对话完整**:一轮完整的对话(问与答)应该在同一分组
|
||||
3. **场景统一**:同一时间、地点发生的事件应该在同一分组
|
||||
4. **长度均衡**:每个分组的字数尽量均衡(目标 80-200 字/分组)
|
||||
5. **顺序保持**:分组内段落必须连续
|
||||
|
||||
# 输入段落
|
||||
{paragraphs_preview}
|
||||
|
||||
# 输出格式
|
||||
返回 JSON 格式,包含每个分组的起始和结束索引(包含)。
|
||||
|
||||
```json
|
||||
{{
|
||||
"groups": [
|
||||
{{"start": 0, "end": 3}},
|
||||
{{"start": 4, "end": 7}},
|
||||
{{"start": 8, "end": 12}}
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
# 重要提醒
|
||||
1. 必须输出正好 {target_segments} 个分组
|
||||
2. 分组必须覆盖所有段落(从 0 到 {max_index})
|
||||
3. 每个分组的 start 必须等于上一个 end + 1
|
||||
4. 只输出 JSON,不要添加其他解释
|
||||
"""
|
||||
|
||||
|
||||
def build_paragraph_analysis_prompt(
|
||||
paragraphs: List[str],
|
||||
) -> str:
|
||||
"""
|
||||
Build prompt for analyzing text and recommending segment count
|
||||
|
||||
Args:
|
||||
paragraphs: List of original paragraphs
|
||||
|
||||
Returns:
|
||||
Formatted prompt for analysis
|
||||
"""
|
||||
# Calculate stats
|
||||
total_chars = sum(len(p) for p in paragraphs)
|
||||
# Estimate: ~250 chars/minute for Chinese speech
|
||||
estimated_duration = int(total_chars / 250 * 60)
|
||||
|
||||
# Create preview for each paragraph (first 50 chars)
|
||||
previews = []
|
||||
for i, para in enumerate(paragraphs):
|
||||
preview = para[:50].replace('\n', ' ')
|
||||
char_count = len(para)
|
||||
if len(para) > 50:
|
||||
preview += "..."
|
||||
previews.append(f"[{i}] ({char_count}字) {preview}")
|
||||
|
||||
paragraphs_preview = "\n".join(previews)
|
||||
|
||||
return PARAGRAPH_ANALYSIS_PROMPT.format(
|
||||
paragraphs_preview=paragraphs_preview,
|
||||
total_paragraphs=len(paragraphs),
|
||||
total_chars=total_chars,
|
||||
estimated_duration=estimated_duration
|
||||
)
|
||||
|
||||
|
||||
def build_paragraph_grouping_prompt(
|
||||
paragraphs: List[str],
|
||||
target_segments: int,
|
||||
analysis_result: dict = None,
|
||||
) -> str:
|
||||
"""
|
||||
Build prompt for grouping paragraphs based on analysis
|
||||
|
||||
Args:
|
||||
paragraphs: List of original paragraphs
|
||||
target_segments: Target number of segments (from analysis)
|
||||
analysis_result: Optional analysis result for context
|
||||
|
||||
Returns:
|
||||
Formatted prompt for grouping
|
||||
"""
|
||||
# Create preview with char counts
|
||||
previews = []
|
||||
for i, para in enumerate(paragraphs):
|
||||
preview = para[:50].replace('\n', ' ')
|
||||
char_count = len(para)
|
||||
if len(para) > 50:
|
||||
preview += "..."
|
||||
previews.append(f"[{i}] ({char_count}字) {preview}")
|
||||
|
||||
paragraphs_preview = "\n".join(previews)
|
||||
|
||||
# Build analysis hint if available
|
||||
analysis_hint = ""
|
||||
if analysis_result:
|
||||
if "reasoning" in analysis_result:
|
||||
analysis_hint += f"分析理由:{analysis_result['reasoning']}\n"
|
||||
if "scene_boundaries" in analysis_result:
|
||||
boundaries = [str(b.get("after_paragraph", "")) for b in analysis_result["scene_boundaries"]]
|
||||
analysis_hint += f"建议场景切换点(段落后):{', '.join(boundaries)}"
|
||||
|
||||
if not analysis_hint:
|
||||
analysis_hint = "无额外分析信息"
|
||||
|
||||
return PARAGRAPH_GROUPING_PROMPT.format(
|
||||
paragraphs_preview=paragraphs_preview,
|
||||
target_segments=target_segments,
|
||||
total_paragraphs=len(paragraphs),
|
||||
max_index=len(paragraphs) - 1,
|
||||
analysis_hint=analysis_hint
|
||||
)
|
||||
|
||||
|
||||
# Legacy support - keep original function name for backward compatibility
|
||||
def build_paragraph_merging_prompt(
|
||||
paragraphs: List[str],
|
||||
target_segments: int = 8,
|
||||
) -> str:
|
||||
"""
|
||||
Legacy function for backward compatibility.
|
||||
Now delegates to build_paragraph_grouping_prompt.
|
||||
"""
|
||||
return build_paragraph_grouping_prompt(paragraphs, target_segments)
|
||||
323
pixelle_video/services/quality/character_analyzer.py
Normal file
323
pixelle_video/services/quality/character_analyzer.py
Normal file
@@ -0,0 +1,323 @@
|
||||
# Copyright (C) 2025 AIDC-AI
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
CharacterAnalyzer - VLM-based character appearance extraction
|
||||
|
||||
Analyzes reference images to extract detailed character descriptions
|
||||
for maintaining visual consistency across video frames.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
|
||||
from loguru import logger
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
|
||||
@dataclass
|
||||
class CharacterAnalysisResult:
|
||||
"""Result of character image analysis"""
|
||||
|
||||
appearance_description: str = "" # Physical features
|
||||
clothing_description: str = "" # What they're wearing
|
||||
distinctive_features: List[str] = None # Unique identifying features
|
||||
|
||||
def __post_init__(self):
|
||||
if self.distinctive_features is None:
|
||||
self.distinctive_features = []
|
||||
|
||||
def to_prompt_description(self) -> str:
|
||||
"""Generate a prompt-ready character description"""
|
||||
parts = []
|
||||
|
||||
if self.appearance_description:
|
||||
parts.append(self.appearance_description)
|
||||
|
||||
if self.clothing_description:
|
||||
parts.append(f"wearing {self.clothing_description}")
|
||||
|
||||
if self.distinctive_features:
|
||||
features = ", ".join(self.distinctive_features)
|
||||
parts.append(f"with {features}")
|
||||
|
||||
return ", ".join(parts) if parts else ""
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"appearance_description": self.appearance_description,
|
||||
"clothing_description": self.clothing_description,
|
||||
"distinctive_features": self.distinctive_features,
|
||||
}
|
||||
|
||||
|
||||
class CharacterAnalyzer:
|
||||
"""
|
||||
VLM-based character appearance analyzer
|
||||
|
||||
Analyzes reference images to extract detailed character descriptions
|
||||
that can be injected into image generation prompts.
|
||||
|
||||
Example:
|
||||
>>> analyzer = CharacterAnalyzer()
|
||||
>>> result = await analyzer.analyze_reference_image("character.png")
|
||||
>>> print(result.appearance_description)
|
||||
"young woman with long black hair, round face, fair skin"
|
||||
>>> print(result.to_prompt_description())
|
||||
"young woman with long black hair, round face, fair skin, wearing blue hoodie, with round glasses"
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize CharacterAnalyzer"""
|
||||
pass
|
||||
|
||||
async def analyze_reference_image(
|
||||
self,
|
||||
image_path: str,
|
||||
) -> CharacterAnalysisResult:
|
||||
"""
|
||||
Analyze a reference image to extract character appearance
|
||||
|
||||
Args:
|
||||
image_path: Path to the reference image
|
||||
|
||||
Returns:
|
||||
CharacterAnalysisResult with extracted descriptions
|
||||
"""
|
||||
logger.info(f"Analyzing character reference image: {image_path}")
|
||||
|
||||
# Check if file exists
|
||||
if not os.path.exists(image_path):
|
||||
logger.warning(f"Image not found: {image_path}")
|
||||
return CharacterAnalysisResult()
|
||||
|
||||
try:
|
||||
# Read and encode image
|
||||
with open(image_path, "rb") as f:
|
||||
image_data = base64.b64encode(f.read()).decode("utf-8")
|
||||
|
||||
# Determine image type
|
||||
ext = os.path.splitext(image_path)[1].lower()
|
||||
media_type = "image/png" if ext == ".png" else "image/jpeg"
|
||||
|
||||
# VLM prompt for character analysis - optimized for storyboard consistency
|
||||
# Focus on CONSTANT features (identity), exclude VARIABLE features (pose/expression)
|
||||
analysis_prompt = """Analyze this character/person for VIDEO STORYBOARD consistency.
|
||||
|
||||
GOAL: Extract features that should remain CONSISTENT across different video frames.
|
||||
The output will be injected into image generation prompts for multiple scenes.
|
||||
|
||||
Extract ONLY these CONSTANT features:
|
||||
1. Identity: gender, approximate age group (child/young/middle-aged/elderly)
|
||||
2. Hair: color, length, style (NOT affected by wind/movement)
|
||||
3. Face: skin tone, face shape (NOT expressions)
|
||||
4. Clothing: type and colors (assume same outfit throughout video)
|
||||
5. Distinctive: glasses, accessories, tattoos, scars, unique marks
|
||||
|
||||
DO NOT include:
|
||||
- Expressions (smile, frown) - changes per scene
|
||||
- Poses/gestures - changes per scene
|
||||
- View angle - determined by scene composition
|
||||
- Lighting/shadows - scene-dependent
|
||||
- Background elements
|
||||
|
||||
Output JSON format (simple strings for direct prompt injection):
|
||||
{
|
||||
"identity": "elderly man" or "young woman" etc,
|
||||
"appearance": "short gray hair, light skin, round face",
|
||||
"clothing": "brown sweater vest over white shirt, dark trousers",
|
||||
"distinctive": ["round glasses", "silver watch"]
|
||||
}
|
||||
|
||||
Output ONLY the JSON, no explanation."""
|
||||
|
||||
# Build multimodal message
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": analysis_prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{media_type};base64,{image_data}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
# Get VLM configuration
|
||||
# Priority: Environment variables > config.yaml > defaults
|
||||
from pixelle_video.config import config_manager
|
||||
|
||||
# VLM config from config.yaml (now part of PixelleVideoConfig)
|
||||
vlm_config = config_manager.config.vlm
|
||||
|
||||
# Environment variables override config.yaml
|
||||
vlm_provider = os.getenv("VLM_PROVIDER") or vlm_config.provider or "qwen"
|
||||
vlm_api_key = os.getenv("VLM_API_KEY") or os.getenv("DASHSCOPE_API_KEY") or vlm_config.api_key
|
||||
vlm_base_url = os.getenv("VLM_BASE_URL") or vlm_config.base_url
|
||||
vlm_model = os.getenv("VLM_MODEL") or vlm_config.model
|
||||
|
||||
# Configure based on provider
|
||||
if vlm_provider == "qwen":
|
||||
# 通义千问 Qwen VL
|
||||
vlm_base_url = vlm_base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
vlm_model = vlm_model or "qwen-vl-plus" # or qwen-vl-max, qwen3-vl-plus
|
||||
logger.info(f"Using Qwen VL: model={vlm_model}")
|
||||
elif vlm_provider == "glm":
|
||||
# 智谱 GLM-4V
|
||||
from pixelle_video.config import config_manager
|
||||
llm_config = config_manager.config.llm
|
||||
vlm_api_key = vlm_api_key or llm_config.api_key
|
||||
vlm_base_url = vlm_base_url or llm_config.base_url
|
||||
vlm_model = vlm_model or "glm-4v-flash"
|
||||
logger.info(f"Using GLM VL: model={vlm_model}")
|
||||
else: # openai or other
|
||||
from pixelle_video.config import config_manager
|
||||
llm_config = config_manager.config.llm
|
||||
vlm_api_key = vlm_api_key or llm_config.api_key
|
||||
vlm_base_url = vlm_base_url or llm_config.base_url
|
||||
vlm_model = vlm_model or llm_config.model
|
||||
logger.info(f"Using {vlm_provider} VL: model={vlm_model}")
|
||||
|
||||
if not vlm_api_key:
|
||||
logger.error("No VLM API key configured. Set VLM_API_KEY or DASHSCOPE_API_KEY environment variable.")
|
||||
return CharacterAnalysisResult()
|
||||
|
||||
# Create OpenAI-compatible client
|
||||
client = AsyncOpenAI(
|
||||
api_key=vlm_api_key,
|
||||
base_url=vlm_base_url
|
||||
)
|
||||
|
||||
# Call VLM
|
||||
response = await client.chat.completions.create(
|
||||
model=vlm_model,
|
||||
messages=messages,
|
||||
temperature=0.3,
|
||||
max_tokens=2000
|
||||
)
|
||||
|
||||
vlm_response = response.choices[0].message.content if response.choices else None
|
||||
|
||||
if vlm_response:
|
||||
logger.debug(f"VLM character analysis response: {vlm_response[:150] if len(vlm_response) > 150 else vlm_response}...")
|
||||
else:
|
||||
logger.warning(f"VLM returned empty content. Full response: {response}")
|
||||
|
||||
# Parse response
|
||||
return self._parse_response(vlm_response)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Character analysis failed: {e}")
|
||||
return CharacterAnalysisResult()
|
||||
|
||||
def _parse_response(self, response: str) -> CharacterAnalysisResult:
|
||||
"""Parse VLM response into CharacterAnalysisResult"""
|
||||
if not response:
|
||||
logger.warning("Empty VLM response")
|
||||
return CharacterAnalysisResult()
|
||||
|
||||
# Log full response for debugging
|
||||
logger.debug(f"Full VLM response:\n{response}")
|
||||
|
||||
try:
|
||||
# Remove markdown code blocks if present
|
||||
cleaned = response.strip()
|
||||
if cleaned.startswith("```json"):
|
||||
cleaned = cleaned[7:]
|
||||
elif cleaned.startswith("```"):
|
||||
cleaned = cleaned[3:]
|
||||
if cleaned.endswith("```"):
|
||||
cleaned = cleaned[:-3]
|
||||
cleaned = cleaned.strip()
|
||||
|
||||
# Try to extract JSON from response
|
||||
match = re.search(r'\{[\s\S]*\}', cleaned)
|
||||
if match:
|
||||
json_str = match.group()
|
||||
logger.debug(f"Extracted JSON: {json_str[:200]}...")
|
||||
data = json.loads(json_str)
|
||||
else:
|
||||
logger.warning(f"No JSON found in response, trying direct parse")
|
||||
data = json.loads(cleaned)
|
||||
|
||||
# Handle nested JSON structures - flatten to strings
|
||||
# New field names: identity, appearance, clothing, distinctive
|
||||
identity = data.get("identity", "")
|
||||
appearance = data.get("appearance", "") or data.get("appearance_description", "")
|
||||
|
||||
if isinstance(appearance, dict):
|
||||
# Flatten nested object to descriptive string
|
||||
parts = []
|
||||
for key, value in appearance.items():
|
||||
if isinstance(value, dict):
|
||||
details = ", ".join(f"{k}: {v}" for k, v in value.items())
|
||||
parts.append(f"{key} ({details})")
|
||||
else:
|
||||
parts.append(f"{key}: {value}")
|
||||
appearance = "; ".join(parts)
|
||||
|
||||
# Combine identity + appearance for full description
|
||||
if identity and appearance:
|
||||
full_appearance = f"{identity}, {appearance}"
|
||||
else:
|
||||
full_appearance = identity or appearance
|
||||
|
||||
clothing = data.get("clothing", "") or data.get("clothing_description", "")
|
||||
if isinstance(clothing, dict):
|
||||
# Flatten nested clothing description
|
||||
parts = []
|
||||
for person, items in clothing.items():
|
||||
if isinstance(items, dict):
|
||||
details = ", ".join(f"{k}: {v}" for k, v in items.items())
|
||||
parts.append(f"{person} ({details})")
|
||||
else:
|
||||
parts.append(f"{person}: {items}")
|
||||
clothing = "; ".join(parts)
|
||||
|
||||
distinctive = data.get("distinctive", []) or data.get("distinctive_features", [])
|
||||
if not isinstance(distinctive, list):
|
||||
distinctive = [str(distinctive)]
|
||||
|
||||
result = CharacterAnalysisResult(
|
||||
appearance_description=full_appearance,
|
||||
clothing_description=clothing,
|
||||
distinctive_features=distinctive,
|
||||
)
|
||||
|
||||
logger.info(f"Character analysis extracted: {result.appearance_description[:80] if result.appearance_description else 'empty'}...")
|
||||
return result
|
||||
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
logger.warning(f"Failed to parse VLM response: {e}")
|
||||
logger.debug(f"Response that failed to parse: {response[:500]}")
|
||||
|
||||
# Try to use the raw response as appearance description (fallback)
|
||||
if response and 20 < len(response) < 500:
|
||||
# Clean up the response
|
||||
fallback = response.strip()
|
||||
if "```" in fallback:
|
||||
fallback = re.sub(r'```.*?```', '', fallback, flags=re.DOTALL).strip()
|
||||
if fallback:
|
||||
logger.info(f"Using raw response as appearance: {fallback[:80]}...")
|
||||
return CharacterAnalysisResult(
|
||||
appearance_description=fallback
|
||||
)
|
||||
|
||||
return CharacterAnalysisResult()
|
||||
@@ -558,6 +558,19 @@ class CharacterMemory:
|
||||
self._characters.clear()
|
||||
self._name_index.clear()
|
||||
logger.info("Character memory cleared")
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to JSON-serializable dictionary"""
|
||||
return {
|
||||
"characters": [char.to_dict() for char in self.characters],
|
||||
"config": {
|
||||
"auto_detect_characters": self.config.auto_detect_characters,
|
||||
"use_llm_detection": self.config.use_llm_detection,
|
||||
"inject_character_prompts": self.config.inject_character_prompts,
|
||||
"use_reference_images": self.config.use_reference_images,
|
||||
"enable_visual_features": self.config.enable_visual_features,
|
||||
}
|
||||
}
|
||||
|
||||
@property
|
||||
def feature_extractor(self):
|
||||
|
||||
@@ -164,28 +164,140 @@ class StyleGuard:
|
||||
async def _extract_with_vlm(self, image_path: str) -> StyleAnchor:
|
||||
"""Extract style using Vision Language Model"""
|
||||
try:
|
||||
# TODO: Implement VLM call when vision-capable LLM is integrated
|
||||
# For now, return a placeholder
|
||||
logger.debug("VLM style extraction: using placeholder (VLM not yet integrated)")
|
||||
if not self.llm_service:
|
||||
logger.warning("No LLM service available, using basic extraction")
|
||||
return self._extract_basic(image_path)
|
||||
|
||||
# Placeholder extraction based on common styles
|
||||
return StyleAnchor(
|
||||
art_style="consistent artistic",
|
||||
color_palette="harmonious colors",
|
||||
lighting="balanced",
|
||||
style_prefix="maintaining visual consistency, same artistic style as previous frames",
|
||||
reference_image=image_path,
|
||||
import base64
|
||||
import os
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
# Read and encode image
|
||||
if not os.path.exists(image_path):
|
||||
logger.warning(f"Image not found: {image_path}")
|
||||
return self._extract_basic(image_path)
|
||||
|
||||
with open(image_path, "rb") as f:
|
||||
image_data = base64.b64encode(f.read()).decode("utf-8")
|
||||
|
||||
# Determine image type
|
||||
ext = os.path.splitext(image_path)[1].lower()
|
||||
media_type = "image/png" if ext == ".png" else "image/jpeg"
|
||||
|
||||
# Style extraction prompt
|
||||
style_prompt = """Analyze this image and extract its visual style characteristics.
|
||||
|
||||
Provide a concise style description that could be used as a prefix for image generation prompts to maintain visual consistency.
|
||||
|
||||
Output format (JSON):
|
||||
{
|
||||
"art_style": "specific art style (e.g., oil painting, digital illustration, anime, photorealistic, watercolor, line art)",
|
||||
"color_palette": "dominant colors and mood (e.g., warm earth tones, vibrant neon, muted pastels)",
|
||||
"lighting": "lighting style (e.g., soft natural light, dramatic shadows, studio lighting)",
|
||||
"texture": "visual texture (e.g., smooth, grainy, brushstroke visible)",
|
||||
"style_prefix": "A complete prompt prefix combining all elements (30-50 words)"
|
||||
}
|
||||
|
||||
Focus on creating a specific, reproducible style_prefix that will generate visually consistent images."""
|
||||
|
||||
# Build multimodal message with image
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": style_prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{media_type};base64,{image_data}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
# Get LLM config for VLM call
|
||||
from pixelle_video.config import config_manager
|
||||
llm_config = config_manager.config.llm
|
||||
|
||||
# Create OpenAI client directly for VLM call
|
||||
client = AsyncOpenAI(
|
||||
api_key=llm_config.api_key,
|
||||
base_url=llm_config.base_url
|
||||
)
|
||||
|
||||
# Call VLM with multimodal message
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model=llm_config.model,
|
||||
messages=messages,
|
||||
temperature=0.3,
|
||||
max_tokens=500
|
||||
)
|
||||
vlm_response = response.choices[0].message.content
|
||||
logger.debug(f"VLM style extraction response: {vlm_response[:100]}...")
|
||||
except Exception as e:
|
||||
logger.warning(f"VLM call failed, using basic extraction: {e}")
|
||||
return self._extract_basic(image_path)
|
||||
|
||||
# Parse response
|
||||
import json
|
||||
import re
|
||||
|
||||
try:
|
||||
# Try to extract JSON from response
|
||||
match = re.search(r'\{[\s\S]*\}', vlm_response)
|
||||
if match:
|
||||
data = json.loads(match.group())
|
||||
else:
|
||||
data = json.loads(vlm_response)
|
||||
|
||||
anchor = StyleAnchor(
|
||||
art_style=data.get("art_style", ""),
|
||||
color_palette=data.get("color_palette", ""),
|
||||
lighting=data.get("lighting", ""),
|
||||
texture=data.get("texture", ""),
|
||||
style_prefix=data.get("style_prefix", ""),
|
||||
reference_image=image_path,
|
||||
)
|
||||
|
||||
logger.info(f"VLM extracted style: {anchor.style_prefix[:80]}...")
|
||||
return anchor
|
||||
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
logger.warning(f"Failed to parse VLM response: {e}")
|
||||
# Use the raw response as style_prefix if it looks reasonable
|
||||
if len(vlm_response) < 200 and len(vlm_response) > 20:
|
||||
return StyleAnchor(
|
||||
style_prefix=vlm_response.strip(),
|
||||
reference_image=image_path,
|
||||
)
|
||||
return self._extract_basic(image_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"VLM style extraction failed: {e}")
|
||||
return self._extract_basic(image_path)
|
||||
|
||||
def _extract_basic(self, image_path: str) -> StyleAnchor:
|
||||
"""Basic style extraction without VLM"""
|
||||
# Return generic style anchor
|
||||
"""Basic style extraction without VLM - analyze filename for hints"""
|
||||
import os
|
||||
|
||||
filename = os.path.basename(image_path).lower()
|
||||
|
||||
# Try to infer style from filename or path
|
||||
style_hints = []
|
||||
|
||||
if "anime" in filename or "cartoon" in filename:
|
||||
style_hints.append("anime style illustration")
|
||||
elif "realistic" in filename or "photo" in filename:
|
||||
style_hints.append("photorealistic style")
|
||||
elif "sketch" in filename or "line" in filename:
|
||||
style_hints.append("sketch style, clean lines")
|
||||
else:
|
||||
style_hints.append("consistent visual style, high quality")
|
||||
|
||||
return StyleAnchor(
|
||||
style_prefix="consistent visual style",
|
||||
style_prefix=", ".join(style_hints),
|
||||
reference_image=image_path,
|
||||
)
|
||||
|
||||
|
||||
@@ -208,7 +208,9 @@ async def generate_narrations_from_content(
|
||||
|
||||
async def split_narration_script(
|
||||
script: str,
|
||||
split_mode: Literal["paragraph", "line", "sentence"] = "paragraph",
|
||||
split_mode: Literal["paragraph", "line", "sentence", "smart"] = "paragraph",
|
||||
llm_service = None,
|
||||
target_segments: int = 8,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Split user-provided narration script into segments
|
||||
@@ -219,6 +221,9 @@ async def split_narration_script(
|
||||
- "paragraph": Split by double newline (\\n\\n), preserve single newlines within paragraphs
|
||||
- "line": Split by single newline (\\n), each line is a segment
|
||||
- "sentence": Split by sentence-ending punctuation (。.!?!?)
|
||||
- "smart": First split by paragraph, then use LLM to intelligently merge related paragraphs
|
||||
llm_service: LLM service instance (required for "smart" mode)
|
||||
target_segments: Target number of segments for "smart" mode (default: 8)
|
||||
|
||||
Returns:
|
||||
List of narration segments
|
||||
@@ -227,7 +232,31 @@ async def split_narration_script(
|
||||
|
||||
narrations = []
|
||||
|
||||
if split_mode == "paragraph":
|
||||
if split_mode == "smart":
|
||||
# Smart mode: first split by paragraph, then merge intelligently
|
||||
if llm_service is None:
|
||||
raise ValueError("llm_service is required for 'smart' split mode")
|
||||
|
||||
# Step 1: Split by paragraph first
|
||||
paragraphs = re.split(r'\n\s*\n', script)
|
||||
paragraphs = [p.strip() for p in paragraphs if p.strip()]
|
||||
logger.info(f" Initial split: {len(paragraphs)} paragraphs")
|
||||
|
||||
# Step 2: Merge intelligently using LLM
|
||||
# If target_segments is None, merge_paragraphs_smart will auto-analyze
|
||||
if target_segments is not None and len(paragraphs) <= target_segments:
|
||||
# No need to merge if already within target
|
||||
logger.info(f" Paragraphs count ({len(paragraphs)}) <= target ({target_segments}), no merge needed")
|
||||
narrations = paragraphs
|
||||
else:
|
||||
narrations = await merge_paragraphs_smart(
|
||||
llm_service=llm_service,
|
||||
paragraphs=paragraphs,
|
||||
target_segments=target_segments # Can be None for auto-analysis
|
||||
)
|
||||
logger.info(f"✅ Smart split: {len(paragraphs)} paragraphs -> {len(narrations)} segments")
|
||||
|
||||
elif split_mode == "paragraph":
|
||||
# Split by double newline (paragraph mode)
|
||||
# Preserve single newlines within paragraphs
|
||||
paragraphs = re.split(r'\n\s*\n', script)
|
||||
@@ -266,6 +295,150 @@ async def split_narration_script(
|
||||
return narrations
|
||||
|
||||
|
||||
async def merge_paragraphs_smart(
|
||||
llm_service,
|
||||
paragraphs: List[str],
|
||||
target_segments: int = None, # Now optional - auto-analyze if not provided
|
||||
max_retries: int = 3,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Use LLM to intelligently merge paragraphs based on semantic relevance.
|
||||
|
||||
Two-step approach:
|
||||
1. If target_segments is not provided, first analyze text to recommend optimal count
|
||||
2. Then group paragraphs based on the target count
|
||||
|
||||
Args:
|
||||
llm_service: LLM service instance
|
||||
paragraphs: List of original paragraphs
|
||||
target_segments: Target number of merged segments (auto-analyzed if None)
|
||||
max_retries: Maximum retry attempts for each step
|
||||
|
||||
Returns:
|
||||
List of merged paragraphs
|
||||
"""
|
||||
from pixelle_video.prompts import (
|
||||
build_paragraph_analysis_prompt,
|
||||
build_paragraph_grouping_prompt
|
||||
)
|
||||
|
||||
# ========================================
|
||||
# Step 1: Analyze and recommend segment count (if not provided)
|
||||
# ========================================
|
||||
if target_segments is None:
|
||||
logger.info(f"Analyzing {len(paragraphs)} paragraphs to recommend segment count...")
|
||||
|
||||
analysis_prompt = build_paragraph_analysis_prompt(paragraphs)
|
||||
analysis_result = None
|
||||
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
response = await llm_service(
|
||||
prompt=analysis_prompt,
|
||||
temperature=0.3,
|
||||
max_tokens=1500
|
||||
)
|
||||
|
||||
logger.debug(f"Analysis response length: {len(response)} chars")
|
||||
|
||||
result = _parse_json(response)
|
||||
|
||||
if "recommended_segments" not in result:
|
||||
raise KeyError("Missing 'recommended_segments' in analysis")
|
||||
|
||||
target_segments = result["recommended_segments"]
|
||||
analysis_result = result
|
||||
|
||||
# Validate range
|
||||
if target_segments < 3:
|
||||
target_segments = 3
|
||||
elif target_segments > 15:
|
||||
target_segments = 15
|
||||
|
||||
reasoning = result.get("reasoning", "N/A")
|
||||
logger.info(f"✅ Analysis complete: recommended {target_segments} segments")
|
||||
logger.info(f" Reasoning: {reasoning[:100]}...")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Analysis attempt {attempt} failed: {e}")
|
||||
if attempt >= max_retries:
|
||||
# Fallback: use simple heuristic
|
||||
target_segments = max(3, min(12, len(paragraphs) // 3))
|
||||
logger.warning(f"Using fallback: {target_segments} segments (paragraphs/3)")
|
||||
analysis_result = None
|
||||
break
|
||||
logger.info("Retrying analysis...")
|
||||
else:
|
||||
analysis_result = None
|
||||
logger.info(f"Using provided target: {target_segments} segments")
|
||||
|
||||
# ========================================
|
||||
# Step 2: Group paragraphs
|
||||
# ========================================
|
||||
logger.info(f"Grouping {len(paragraphs)} paragraphs into {target_segments} segments...")
|
||||
|
||||
grouping_prompt = build_paragraph_grouping_prompt(
|
||||
paragraphs=paragraphs,
|
||||
target_segments=target_segments,
|
||||
analysis_result=analysis_result
|
||||
)
|
||||
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
response = await llm_service(
|
||||
prompt=grouping_prompt,
|
||||
temperature=0.3,
|
||||
max_tokens=2000
|
||||
)
|
||||
|
||||
logger.debug(f"Grouping response length: {len(response)} chars")
|
||||
|
||||
result = _parse_json(response)
|
||||
|
||||
if "groups" not in result:
|
||||
raise KeyError("Invalid response format: missing 'groups'")
|
||||
|
||||
groups = result["groups"]
|
||||
|
||||
# Validate count
|
||||
if len(groups) != target_segments:
|
||||
logger.warning(
|
||||
f"Grouping attempt {attempt}: expected {target_segments} groups, got {len(groups)}"
|
||||
)
|
||||
if attempt < max_retries:
|
||||
continue
|
||||
logger.warning(f"Accepting {len(groups)} groups after {max_retries} attempts")
|
||||
|
||||
# Validate group boundaries
|
||||
for i, group in enumerate(groups):
|
||||
if "start" not in group or "end" not in group:
|
||||
raise ValueError(f"Group {i} missing 'start' or 'end'")
|
||||
if group["start"] > group["end"]:
|
||||
raise ValueError(f"Group {i} has invalid range: start > end")
|
||||
if group["start"] < 0 or group["end"] >= len(paragraphs):
|
||||
raise ValueError(f"Group {i} has out-of-bounds indices")
|
||||
|
||||
# Merge paragraphs based on groups
|
||||
merged = []
|
||||
for group in groups:
|
||||
start, end = group["start"], group["end"]
|
||||
merged_text = "\n\n".join(paragraphs[start:end + 1])
|
||||
merged.append(merged_text)
|
||||
|
||||
logger.info(f"✅ Successfully merged into {len(merged)} segments")
|
||||
return merged
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Grouping attempt {attempt} failed: {e}")
|
||||
if attempt >= max_retries:
|
||||
raise
|
||||
logger.info("Retrying grouping...")
|
||||
|
||||
# Fallback: should not reach here
|
||||
return paragraphs
|
||||
|
||||
|
||||
async def generate_image_prompts(
|
||||
llm_service,
|
||||
narrations: List[str],
|
||||
@@ -489,8 +662,8 @@ def _parse_json(text: str) -> dict:
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Try to find any JSON object in the text
|
||||
json_pattern = r'\{[^{}]*(?:"narrations"|"image_prompts")\s*:\s*\[[^\]]*\][^{}]*\}'
|
||||
# Try to find any JSON object with known keys (including analysis keys)
|
||||
json_pattern = r'\{[^{}]*(?:"narrations"|"image_prompts"|"video_prompts"|"merged_paragraphs"|"groups"|"recommended_segments"|"scene_boundaries")\s*:\s*[^{}]*\}'
|
||||
match = re.search(json_pattern, text, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
@@ -498,6 +671,17 @@ def _parse_json(text: str) -> dict:
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Try to find any JSON object that looks like it contains an array
|
||||
# This is a more aggressive fallback for complex nested arrays
|
||||
json_start = text.find('{')
|
||||
json_end = text.rfind('}')
|
||||
if json_start != -1 and json_end != -1 and json_end > json_start:
|
||||
potential_json = text[json_start:json_end + 1]
|
||||
try:
|
||||
return json.loads(potential_json)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# If all fails, raise error
|
||||
raise json.JSONDecodeError("No valid JSON found", text, 0)
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ def render_content_input():
|
||||
"paragraph": tr("split.mode_paragraph"),
|
||||
"line": tr("split.mode_line"),
|
||||
"sentence": tr("split.mode_sentence"),
|
||||
"smart": tr("split.mode_smart"),
|
||||
}
|
||||
split_mode = st.selectbox(
|
||||
tr("split.mode_label"),
|
||||
@@ -73,8 +74,16 @@ def render_content_input():
|
||||
index=0, # Default to paragraph mode
|
||||
help=tr("split.mode_help")
|
||||
)
|
||||
|
||||
# Show info for smart mode (auto-detect segment count)
|
||||
if split_mode == "smart":
|
||||
st.info(tr("split.smart_auto_hint"))
|
||||
target_segments = None # Auto-detect
|
||||
else:
|
||||
target_segments = None # Not used for other modes
|
||||
else:
|
||||
split_mode = "paragraph" # Default for generate mode (not used)
|
||||
target_segments = None
|
||||
|
||||
# Title input (optional for both modes)
|
||||
title = st.text_input(
|
||||
@@ -105,7 +114,8 @@ def render_content_input():
|
||||
"text": text,
|
||||
"title": title,
|
||||
"n_scenes": n_scenes,
|
||||
"split_mode": split_mode
|
||||
"split_mode": split_mode,
|
||||
"target_segments": target_segments
|
||||
}
|
||||
|
||||
else:
|
||||
|
||||
@@ -26,6 +26,10 @@ from web.utils.async_helpers import run_async
|
||||
from pixelle_video.models.progress import ProgressEvent
|
||||
from pixelle_video.config import config_manager
|
||||
|
||||
# Get ports from environment
|
||||
API_PORT = os.getenv("API_PORT", "8000")
|
||||
EDITOR_PORT = os.getenv("EDITOR_PORT", "3000")
|
||||
|
||||
|
||||
def render_output_preview(pixelle_video, video_params):
|
||||
"""Render output preview section (right column)"""
|
||||
@@ -48,6 +52,7 @@ def render_single_output(pixelle_video, video_params):
|
||||
title = video_params.get("title")
|
||||
n_scenes = video_params.get("n_scenes", 5)
|
||||
split_mode = video_params.get("split_mode", "paragraph")
|
||||
target_segments = video_params.get("target_segments", 8)
|
||||
bgm_path = video_params.get("bgm_path")
|
||||
bgm_volume = video_params.get("bgm_volume", 0.2)
|
||||
|
||||
@@ -112,6 +117,7 @@ def render_single_output(pixelle_video, video_params):
|
||||
"title": title if title else None,
|
||||
"n_scenes": n_scenes,
|
||||
"split_mode": split_mode,
|
||||
"target_segments": target_segments,
|
||||
"media_workflow": workflow_key,
|
||||
"frame_template": frame_template,
|
||||
"prompt_prefix": prompt_prefix,
|
||||
@@ -135,7 +141,7 @@ def render_single_output(pixelle_video, video_params):
|
||||
|
||||
# Submit to async API
|
||||
response = requests.post(
|
||||
"http://localhost:8000/api/video/generate/async",
|
||||
f"http://localhost:{API_PORT}/api/video/generate/async",
|
||||
json=api_payload,
|
||||
timeout=30
|
||||
)
|
||||
@@ -218,6 +224,7 @@ def render_single_output(pixelle_video, video_params):
|
||||
"title": title if title else None,
|
||||
"n_scenes": n_scenes,
|
||||
"split_mode": split_mode,
|
||||
"target_segments": target_segments,
|
||||
"media_workflow": workflow_key,
|
||||
"frame_template": frame_template,
|
||||
"prompt_prefix": prompt_prefix,
|
||||
@@ -309,7 +316,7 @@ def render_single_output(pixelle_video, video_params):
|
||||
pass
|
||||
|
||||
if task_id:
|
||||
editor_url = f"http://localhost:3000/editor?storyboard_id={task_id}"
|
||||
editor_url = f"http://localhost:{EDITOR_PORT}/editor?storyboard_id={task_id}"
|
||||
st.markdown(
|
||||
f'''
|
||||
<a href="{editor_url}" target="_blank" style="text-decoration: none;">
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
"split.mode_paragraph": "📄 By Paragraph (\\n\\n)",
|
||||
"split.mode_line": "📝 By Line (\\n)",
|
||||
"split.mode_sentence": "✂️ By Sentence (。.!?)",
|
||||
"split.mode_smart": "🧠 Smart Merge (AI Grouping)",
|
||||
"split.smart_auto_hint": "🤖 AI will analyze text structure, recommend optimal segment count, and intelligently merge related paragraphs (dialogues, same scene)",
|
||||
"input.content": "Content",
|
||||
"input.content_placeholder": "Used directly without modification (split by strategy below)\nExample:\nHello everyone, today I'll share three study tips.\n\nThe first tip is focus training, meditate for 10 minutes daily.\n\nThe second tip is active recall, review immediately after learning.",
|
||||
"input.content_help": "Provide your own content for video generation",
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
"split.mode_paragraph": "📄 按段落(\\n\\n)",
|
||||
"split.mode_line": "📝 按行(\\n)",
|
||||
"split.mode_sentence": "✂️ 按句号(。.!?)",
|
||||
"split.mode_smart": "🧠 智能合并(AI 分组)",
|
||||
"split.smart_auto_hint": "🤖 AI 将自动分析文本结构,推荐最佳分镜数量,并智能合并相关段落(对话、同一场景)",
|
||||
"input.content": "内容",
|
||||
"input.content_placeholder": "直接使用,不做改写(根据下方分割方式切分)\n例如:\n大家好,今天跟你分享三个学习技巧。\n\n第一个技巧是专注力训练,每天冥想10分钟。\n\n第二个技巧是主动回忆,学完立即复述。",
|
||||
"input.content_help": "提供您自己的内容用于视频生成",
|
||||
|
||||
@@ -33,6 +33,9 @@ from web.components.header import render_header
|
||||
from web.i18n import tr
|
||||
from web.utils.async_helpers import run_async
|
||||
|
||||
# Get ports from environment
|
||||
EDITOR_PORT = os.getenv("EDITOR_PORT", "3000")
|
||||
|
||||
# Page config
|
||||
st.set_page_config(
|
||||
page_title="History - Pixelle-Video",
|
||||
@@ -363,7 +366,7 @@ def render_task_detail_modal(task_id: str, pixelle_video):
|
||||
)
|
||||
|
||||
# Open in Editor button
|
||||
editor_url = f"http://localhost:3000/editor?storyboard_id={task_id}"
|
||||
editor_url = f"http://localhost:{EDITOR_PORT}/editor?storyboard_id={task_id}"
|
||||
st.markdown(
|
||||
f'''
|
||||
<a href="{editor_url}" target="_blank" style="text-decoration: none;">
|
||||
|
||||
@@ -22,6 +22,7 @@ Features:
|
||||
import streamlit as st
|
||||
import requests
|
||||
import time
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
from web.i18n import tr, get_language
|
||||
@@ -33,8 +34,12 @@ st.set_page_config(
|
||||
layout="wide",
|
||||
)
|
||||
|
||||
# Get ports from environment
|
||||
API_PORT = os.getenv("API_PORT", "8000")
|
||||
EDITOR_PORT = os.getenv("EDITOR_PORT", "3000")
|
||||
|
||||
# API endpoint
|
||||
API_BASE = "http://localhost:8000/api"
|
||||
API_BASE = f"http://localhost:{API_PORT}/api"
|
||||
|
||||
|
||||
def get_all_tasks():
|
||||
@@ -183,7 +188,7 @@ def render_task_card(task):
|
||||
with col_a:
|
||||
st.success("✨ 视频生成成功")
|
||||
with col_b:
|
||||
editor_url = f"http://localhost:3000/editor?storyboard_id={task_id}"
|
||||
editor_url = f"http://localhost:{EDITOR_PORT}/editor?storyboard_id={task_id}"
|
||||
st.markdown(
|
||||
f'''
|
||||
<a href="{editor_url}" target="_blank" style="text-decoration: none;">
|
||||
|
||||
Reference in New Issue
Block a user