feat(P1): Add align-prompt feature for better text-image relevance

This commit is contained in:
empty
2026-01-06 23:29:41 +08:00
parent 2978622f7f
commit 1d343e55ba
4 changed files with 166 additions and 0 deletions

View File

@@ -41,6 +41,8 @@ from api.schemas.editor import (
ExportRequest, ExportRequest,
ExportResponse, ExportResponse,
ExportStatusResponse, ExportStatusResponse,
AlignPromptRequest,
AlignPromptResponse,
) )
from fastapi import BackgroundTasks from fastapi import BackgroundTasks
import asyncio import asyncio
@@ -598,6 +600,98 @@ async def regenerate_frame_audio(
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@router.post(
"/storyboard/{storyboard_id}/frames/{frame_id}/align-prompt",
response_model=AlignPromptResponse
)
async def align_frame_prompt(
storyboard_id: str = Path(..., description="Storyboard/task ID"),
frame_id: str = Path(..., description="Frame ID"),
request: AlignPromptRequest = None
):
"""
Align image prompt with narration
Regenerates the image prompt based on the frame's narration using
enhanced core imagery extraction for better semantic relevance.
"""
if storyboard_id not in _storyboard_cache:
raise HTTPException(status_code=404, detail=f"Storyboard {storyboard_id} not found")
storyboard = _storyboard_cache[storyboard_id]
frames = storyboard["frames"]
# Find frame
target_frame = None
for frame in frames:
if frame["id"] == frame_id:
target_frame = frame
break
if not target_frame:
raise HTTPException(status_code=404, detail=f"Frame {frame_id} not found")
# Get narration to use
narration = request.narration if request and request.narration else target_frame.get("narration", "")
if not narration:
raise HTTPException(status_code=400, detail="No narration text available")
try:
from api.dependencies import get_pixelle_video
pixelle_video = await get_pixelle_video()
# Use LLM to generate aligned image prompt
from pixelle_video.prompts import build_image_prompt_prompt
prompt = build_image_prompt_prompt(
narrations=[narration],
min_words=30,
max_words=60
)
response = await pixelle_video.llm(
prompt=prompt,
temperature=0.7,
max_tokens=500
)
# Parse response
import json
import re
# Try to extract JSON
try:
result = json.loads(response)
except json.JSONDecodeError:
# Try markdown code block
match = re.search(r'```(?:json)?\s*([\s\S]+?)\s*```', response)
if match:
result = json.loads(match.group(1))
else:
raise ValueError("Failed to parse LLM response")
if "image_prompts" not in result or len(result["image_prompts"]) == 0:
raise ValueError("No image prompts in response")
new_prompt = result["image_prompts"][0]
# Update frame
target_frame["image_prompt"] = new_prompt
_storyboard_cache[storyboard_id] = storyboard
logger.info(f"Aligned image prompt for frame {frame_id}")
return AlignPromptResponse(
image_prompt=new_prompt,
success=True
)
except Exception as e:
logger.error(f"Prompt alignment failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post( @router.post(
"/storyboard/{storyboard_id}/frames/{frame_id}/inpaint", "/storyboard/{storyboard_id}/frames/{frame_id}/inpaint",
response_model=InpaintResponse response_model=InpaintResponse

View File

@@ -144,3 +144,12 @@ class ExportStatusResponse(BaseModel):
error: Optional[str] = None error: Optional[str] = None
class AlignPromptRequest(BaseModel):
"""Request to align image prompt with narration"""
narration: Optional[str] = Field(None, description="Override narration text")
class AlignPromptResponse(BaseModel):
"""Response after aligning prompt"""
image_prompt: str
success: bool = True

View File

@@ -229,6 +229,7 @@ function SelectedFrameDetails() {
const [isSaving, setIsSaving] = useState(false) const [isSaving, setIsSaving] = useState(false)
const [isRegeneratingImage, setIsRegeneratingImage] = useState(false) const [isRegeneratingImage, setIsRegeneratingImage] = useState(false)
const [isRegeneratingAudio, setIsRegeneratingAudio] = useState(false) const [isRegeneratingAudio, setIsRegeneratingAudio] = useState(false)
const [isAligningPrompt, setIsAligningPrompt] = useState(false)
const [error, setError] = useState<string | null>(null) const [error, setError] = useState<string | null>(null)
// Update local state when frame changes // Update local state when frame changes
@@ -322,6 +323,31 @@ function SelectedFrameDetails() {
} }
} }
const handleAlignPrompt = async () => {
if (!storyboard || !selectedFrame) return
setIsAligningPrompt(true)
setError(null)
try {
const result = await editorApi.alignPrompt(
storyboard.id,
selectedFrame.id,
narration || selectedFrame.narration
)
// Update local store with new image prompt
updateFrame(selectedFrame.id, {
imagePrompt: result.image_prompt,
})
setImagePrompt(result.image_prompt)
} catch (err: any) {
setError(err.message || '对齐提示词失败')
} finally {
setIsAligningPrompt(false)
}
}
return ( return (
<div className="space-y-4"> <div className="space-y-4">
{error && ( {error && (
@@ -434,6 +460,18 @@ function SelectedFrameDetails() {
) : null} ) : null}
</Button> </Button>
<Button
size="sm"
variant="outline"
className="w-full"
onClick={handleAlignPrompt}
disabled={isAligningPrompt}
>
{isAligningPrompt ? (
<Loader2 className="h-4 w-4 animate-spin mr-2" />
) : null}
</Button>
</div> </div>
)} )}
</div> </div>

View File

@@ -197,6 +197,31 @@ class EditorApiClient {
return response.json() return response.json()
} }
/**
* Align image prompt with narration - regenerate prompt based on narration
*/
async alignPrompt(
storyboardId: string,
frameId: string,
narration?: string
): Promise<{ image_prompt: string; success: boolean }> {
const response = await fetch(
`${this.baseUrl}/editor/storyboard/${storyboardId}/frames/${frameId}/align-prompt`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ narration }),
}
)
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: response.statusText }))
throw new Error(error.detail || `Failed to align prompt: ${response.statusText}`)
}
return response.json()
}
/** /**
* Inpaint (局部重绘) image for a frame * Inpaint (局部重绘) image for a frame
*/ */