feat(P1): Add align-prompt feature for better text-image relevance
This commit is contained in:
@@ -41,6 +41,8 @@ from api.schemas.editor import (
|
||||
ExportRequest,
|
||||
ExportResponse,
|
||||
ExportStatusResponse,
|
||||
AlignPromptRequest,
|
||||
AlignPromptResponse,
|
||||
)
|
||||
from fastapi import BackgroundTasks
|
||||
import asyncio
|
||||
@@ -598,6 +600,98 @@ async def regenerate_frame_audio(
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/storyboard/{storyboard_id}/frames/{frame_id}/align-prompt",
|
||||
response_model=AlignPromptResponse
|
||||
)
|
||||
async def align_frame_prompt(
|
||||
storyboard_id: str = Path(..., description="Storyboard/task ID"),
|
||||
frame_id: str = Path(..., description="Frame ID"),
|
||||
request: AlignPromptRequest = None
|
||||
):
|
||||
"""
|
||||
Align image prompt with narration
|
||||
|
||||
Regenerates the image prompt based on the frame's narration using
|
||||
enhanced core imagery extraction for better semantic relevance.
|
||||
"""
|
||||
if storyboard_id not in _storyboard_cache:
|
||||
raise HTTPException(status_code=404, detail=f"Storyboard {storyboard_id} not found")
|
||||
|
||||
storyboard = _storyboard_cache[storyboard_id]
|
||||
frames = storyboard["frames"]
|
||||
|
||||
# Find frame
|
||||
target_frame = None
|
||||
for frame in frames:
|
||||
if frame["id"] == frame_id:
|
||||
target_frame = frame
|
||||
break
|
||||
|
||||
if not target_frame:
|
||||
raise HTTPException(status_code=404, detail=f"Frame {frame_id} not found")
|
||||
|
||||
# Get narration to use
|
||||
narration = request.narration if request and request.narration else target_frame.get("narration", "")
|
||||
|
||||
if not narration:
|
||||
raise HTTPException(status_code=400, detail="No narration text available")
|
||||
|
||||
try:
|
||||
from api.dependencies import get_pixelle_video
|
||||
|
||||
pixelle_video = await get_pixelle_video()
|
||||
|
||||
# Use LLM to generate aligned image prompt
|
||||
from pixelle_video.prompts import build_image_prompt_prompt
|
||||
|
||||
prompt = build_image_prompt_prompt(
|
||||
narrations=[narration],
|
||||
min_words=30,
|
||||
max_words=60
|
||||
)
|
||||
|
||||
response = await pixelle_video.llm(
|
||||
prompt=prompt,
|
||||
temperature=0.7,
|
||||
max_tokens=500
|
||||
)
|
||||
|
||||
# Parse response
|
||||
import json
|
||||
import re
|
||||
|
||||
# Try to extract JSON
|
||||
try:
|
||||
result = json.loads(response)
|
||||
except json.JSONDecodeError:
|
||||
# Try markdown code block
|
||||
match = re.search(r'```(?:json)?\s*([\s\S]+?)\s*```', response)
|
||||
if match:
|
||||
result = json.loads(match.group(1))
|
||||
else:
|
||||
raise ValueError("Failed to parse LLM response")
|
||||
|
||||
if "image_prompts" not in result or len(result["image_prompts"]) == 0:
|
||||
raise ValueError("No image prompts in response")
|
||||
|
||||
new_prompt = result["image_prompts"][0]
|
||||
|
||||
# Update frame
|
||||
target_frame["image_prompt"] = new_prompt
|
||||
_storyboard_cache[storyboard_id] = storyboard
|
||||
|
||||
logger.info(f"Aligned image prompt for frame {frame_id}")
|
||||
|
||||
return AlignPromptResponse(
|
||||
image_prompt=new_prompt,
|
||||
success=True
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Prompt alignment failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post(
|
||||
"/storyboard/{storyboard_id}/frames/{frame_id}/inpaint",
|
||||
response_model=InpaintResponse
|
||||
|
||||
@@ -144,3 +144,12 @@ class ExportStatusResponse(BaseModel):
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class AlignPromptRequest(BaseModel):
|
||||
"""Request to align image prompt with narration"""
|
||||
narration: Optional[str] = Field(None, description="Override narration text")
|
||||
|
||||
|
||||
class AlignPromptResponse(BaseModel):
|
||||
"""Response after aligning prompt"""
|
||||
image_prompt: str
|
||||
success: bool = True
|
||||
|
||||
@@ -229,6 +229,7 @@ function SelectedFrameDetails() {
|
||||
const [isSaving, setIsSaving] = useState(false)
|
||||
const [isRegeneratingImage, setIsRegeneratingImage] = useState(false)
|
||||
const [isRegeneratingAudio, setIsRegeneratingAudio] = useState(false)
|
||||
const [isAligningPrompt, setIsAligningPrompt] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
// Update local state when frame changes
|
||||
@@ -322,6 +323,31 @@ function SelectedFrameDetails() {
|
||||
}
|
||||
}
|
||||
|
||||
const handleAlignPrompt = async () => {
|
||||
if (!storyboard || !selectedFrame) return
|
||||
|
||||
setIsAligningPrompt(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const result = await editorApi.alignPrompt(
|
||||
storyboard.id,
|
||||
selectedFrame.id,
|
||||
narration || selectedFrame.narration
|
||||
)
|
||||
|
||||
// Update local store with new image prompt
|
||||
updateFrame(selectedFrame.id, {
|
||||
imagePrompt: result.image_prompt,
|
||||
})
|
||||
setImagePrompt(result.image_prompt)
|
||||
} catch (err: any) {
|
||||
setError(err.message || '对齐提示词失败')
|
||||
} finally {
|
||||
setIsAligningPrompt(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{error && (
|
||||
@@ -434,6 +460,18 @@ function SelectedFrameDetails() {
|
||||
) : null}
|
||||
重新生成音频
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="outline"
|
||||
className="w-full"
|
||||
onClick={handleAlignPrompt}
|
||||
disabled={isAligningPrompt}
|
||||
>
|
||||
{isAligningPrompt ? (
|
||||
<Loader2 className="h-4 w-4 animate-spin mr-2" />
|
||||
) : null}
|
||||
对齐提示词
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -197,6 +197,31 @@ class EditorApiClient {
|
||||
return response.json()
|
||||
}
|
||||
|
||||
/**
|
||||
* Align image prompt with narration - regenerate prompt based on narration
|
||||
*/
|
||||
async alignPrompt(
|
||||
storyboardId: string,
|
||||
frameId: string,
|
||||
narration?: string
|
||||
): Promise<{ image_prompt: string; success: boolean }> {
|
||||
const response = await fetch(
|
||||
`${this.baseUrl}/editor/storyboard/${storyboardId}/frames/${frameId}/align-prompt`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ narration }),
|
||||
}
|
||||
)
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: response.statusText }))
|
||||
throw new Error(error.detail || `Failed to align prompt: ${response.statusText}`)
|
||||
}
|
||||
|
||||
return response.json()
|
||||
}
|
||||
|
||||
/**
|
||||
* Inpaint (局部重绘) image for a frame
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user