feat(P1): Add align-prompt feature for better text-image relevance
This commit is contained in:
@@ -41,6 +41,8 @@ from api.schemas.editor import (
|
||||
ExportRequest,
|
||||
ExportResponse,
|
||||
ExportStatusResponse,
|
||||
AlignPromptRequest,
|
||||
AlignPromptResponse,
|
||||
)
|
||||
from fastapi import BackgroundTasks
|
||||
import asyncio
|
||||
@@ -598,6 +600,98 @@ async def regenerate_frame_audio(
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/storyboard/{storyboard_id}/frames/{frame_id}/align-prompt",
|
||||
response_model=AlignPromptResponse
|
||||
)
|
||||
async def align_frame_prompt(
|
||||
storyboard_id: str = Path(..., description="Storyboard/task ID"),
|
||||
frame_id: str = Path(..., description="Frame ID"),
|
||||
request: AlignPromptRequest = None
|
||||
):
|
||||
"""
|
||||
Align image prompt with narration
|
||||
|
||||
Regenerates the image prompt based on the frame's narration using
|
||||
enhanced core imagery extraction for better semantic relevance.
|
||||
"""
|
||||
if storyboard_id not in _storyboard_cache:
|
||||
raise HTTPException(status_code=404, detail=f"Storyboard {storyboard_id} not found")
|
||||
|
||||
storyboard = _storyboard_cache[storyboard_id]
|
||||
frames = storyboard["frames"]
|
||||
|
||||
# Find frame
|
||||
target_frame = None
|
||||
for frame in frames:
|
||||
if frame["id"] == frame_id:
|
||||
target_frame = frame
|
||||
break
|
||||
|
||||
if not target_frame:
|
||||
raise HTTPException(status_code=404, detail=f"Frame {frame_id} not found")
|
||||
|
||||
# Get narration to use
|
||||
narration = request.narration if request and request.narration else target_frame.get("narration", "")
|
||||
|
||||
if not narration:
|
||||
raise HTTPException(status_code=400, detail="No narration text available")
|
||||
|
||||
try:
|
||||
from api.dependencies import get_pixelle_video
|
||||
|
||||
pixelle_video = await get_pixelle_video()
|
||||
|
||||
# Use LLM to generate aligned image prompt
|
||||
from pixelle_video.prompts import build_image_prompt_prompt
|
||||
|
||||
prompt = build_image_prompt_prompt(
|
||||
narrations=[narration],
|
||||
min_words=30,
|
||||
max_words=60
|
||||
)
|
||||
|
||||
response = await pixelle_video.llm(
|
||||
prompt=prompt,
|
||||
temperature=0.7,
|
||||
max_tokens=500
|
||||
)
|
||||
|
||||
# Parse response
|
||||
import json
|
||||
import re
|
||||
|
||||
# Try to extract JSON
|
||||
try:
|
||||
result = json.loads(response)
|
||||
except json.JSONDecodeError:
|
||||
# Try markdown code block
|
||||
match = re.search(r'```(?:json)?\s*([\s\S]+?)\s*```', response)
|
||||
if match:
|
||||
result = json.loads(match.group(1))
|
||||
else:
|
||||
raise ValueError("Failed to parse LLM response")
|
||||
|
||||
if "image_prompts" not in result or len(result["image_prompts"]) == 0:
|
||||
raise ValueError("No image prompts in response")
|
||||
|
||||
new_prompt = result["image_prompts"][0]
|
||||
|
||||
# Update frame
|
||||
target_frame["image_prompt"] = new_prompt
|
||||
_storyboard_cache[storyboard_id] = storyboard
|
||||
|
||||
logger.info(f"Aligned image prompt for frame {frame_id}")
|
||||
|
||||
return AlignPromptResponse(
|
||||
image_prompt=new_prompt,
|
||||
success=True
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Prompt alignment failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post(
|
||||
"/storyboard/{storyboard_id}/frames/{frame_id}/inpaint",
|
||||
response_model=InpaintResponse
|
||||
|
||||
@@ -144,3 +144,12 @@ class ExportStatusResponse(BaseModel):
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class AlignPromptRequest(BaseModel):
|
||||
"""Request to align image prompt with narration"""
|
||||
narration: Optional[str] = Field(None, description="Override narration text")
|
||||
|
||||
|
||||
class AlignPromptResponse(BaseModel):
|
||||
"""Response after aligning prompt"""
|
||||
image_prompt: str
|
||||
success: bool = True
|
||||
|
||||
Reference in New Issue
Block a user