feat(P1): Add align-prompt feature for better text-image relevance
This commit is contained in:
@@ -41,6 +41,8 @@ from api.schemas.editor import (
|
|||||||
ExportRequest,
|
ExportRequest,
|
||||||
ExportResponse,
|
ExportResponse,
|
||||||
ExportStatusResponse,
|
ExportStatusResponse,
|
||||||
|
AlignPromptRequest,
|
||||||
|
AlignPromptResponse,
|
||||||
)
|
)
|
||||||
from fastapi import BackgroundTasks
|
from fastapi import BackgroundTasks
|
||||||
import asyncio
|
import asyncio
|
||||||
@@ -598,6 +600,98 @@ async def regenerate_frame_audio(
|
|||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/storyboard/{storyboard_id}/frames/{frame_id}/align-prompt",
|
||||||
|
response_model=AlignPromptResponse
|
||||||
|
)
|
||||||
|
async def align_frame_prompt(
|
||||||
|
storyboard_id: str = Path(..., description="Storyboard/task ID"),
|
||||||
|
frame_id: str = Path(..., description="Frame ID"),
|
||||||
|
request: AlignPromptRequest = None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Align image prompt with narration
|
||||||
|
|
||||||
|
Regenerates the image prompt based on the frame's narration using
|
||||||
|
enhanced core imagery extraction for better semantic relevance.
|
||||||
|
"""
|
||||||
|
if storyboard_id not in _storyboard_cache:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Storyboard {storyboard_id} not found")
|
||||||
|
|
||||||
|
storyboard = _storyboard_cache[storyboard_id]
|
||||||
|
frames = storyboard["frames"]
|
||||||
|
|
||||||
|
# Find frame
|
||||||
|
target_frame = None
|
||||||
|
for frame in frames:
|
||||||
|
if frame["id"] == frame_id:
|
||||||
|
target_frame = frame
|
||||||
|
break
|
||||||
|
|
||||||
|
if not target_frame:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Frame {frame_id} not found")
|
||||||
|
|
||||||
|
# Get narration to use
|
||||||
|
narration = request.narration if request and request.narration else target_frame.get("narration", "")
|
||||||
|
|
||||||
|
if not narration:
|
||||||
|
raise HTTPException(status_code=400, detail="No narration text available")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from api.dependencies import get_pixelle_video
|
||||||
|
|
||||||
|
pixelle_video = await get_pixelle_video()
|
||||||
|
|
||||||
|
# Use LLM to generate aligned image prompt
|
||||||
|
from pixelle_video.prompts import build_image_prompt_prompt
|
||||||
|
|
||||||
|
prompt = build_image_prompt_prompt(
|
||||||
|
narrations=[narration],
|
||||||
|
min_words=30,
|
||||||
|
max_words=60
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await pixelle_video.llm(
|
||||||
|
prompt=prompt,
|
||||||
|
temperature=0.7,
|
||||||
|
max_tokens=500
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse response
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Try to extract JSON
|
||||||
|
try:
|
||||||
|
result = json.loads(response)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Try markdown code block
|
||||||
|
match = re.search(r'```(?:json)?\s*([\s\S]+?)\s*```', response)
|
||||||
|
if match:
|
||||||
|
result = json.loads(match.group(1))
|
||||||
|
else:
|
||||||
|
raise ValueError("Failed to parse LLM response")
|
||||||
|
|
||||||
|
if "image_prompts" not in result or len(result["image_prompts"]) == 0:
|
||||||
|
raise ValueError("No image prompts in response")
|
||||||
|
|
||||||
|
new_prompt = result["image_prompts"][0]
|
||||||
|
|
||||||
|
# Update frame
|
||||||
|
target_frame["image_prompt"] = new_prompt
|
||||||
|
_storyboard_cache[storyboard_id] = storyboard
|
||||||
|
|
||||||
|
logger.info(f"Aligned image prompt for frame {frame_id}")
|
||||||
|
|
||||||
|
return AlignPromptResponse(
|
||||||
|
image_prompt=new_prompt,
|
||||||
|
success=True
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Prompt alignment failed: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
"/storyboard/{storyboard_id}/frames/{frame_id}/inpaint",
|
"/storyboard/{storyboard_id}/frames/{frame_id}/inpaint",
|
||||||
response_model=InpaintResponse
|
response_model=InpaintResponse
|
||||||
|
|||||||
@@ -144,3 +144,12 @@ class ExportStatusResponse(BaseModel):
|
|||||||
error: Optional[str] = None
|
error: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class AlignPromptRequest(BaseModel):
|
||||||
|
"""Request to align image prompt with narration"""
|
||||||
|
narration: Optional[str] = Field(None, description="Override narration text")
|
||||||
|
|
||||||
|
|
||||||
|
class AlignPromptResponse(BaseModel):
|
||||||
|
"""Response after aligning prompt"""
|
||||||
|
image_prompt: str
|
||||||
|
success: bool = True
|
||||||
|
|||||||
@@ -229,6 +229,7 @@ function SelectedFrameDetails() {
|
|||||||
const [isSaving, setIsSaving] = useState(false)
|
const [isSaving, setIsSaving] = useState(false)
|
||||||
const [isRegeneratingImage, setIsRegeneratingImage] = useState(false)
|
const [isRegeneratingImage, setIsRegeneratingImage] = useState(false)
|
||||||
const [isRegeneratingAudio, setIsRegeneratingAudio] = useState(false)
|
const [isRegeneratingAudio, setIsRegeneratingAudio] = useState(false)
|
||||||
|
const [isAligningPrompt, setIsAligningPrompt] = useState(false)
|
||||||
const [error, setError] = useState<string | null>(null)
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
|
||||||
// Update local state when frame changes
|
// Update local state when frame changes
|
||||||
@@ -322,6 +323,31 @@ function SelectedFrameDetails() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const handleAlignPrompt = async () => {
|
||||||
|
if (!storyboard || !selectedFrame) return
|
||||||
|
|
||||||
|
setIsAligningPrompt(true)
|
||||||
|
setError(null)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await editorApi.alignPrompt(
|
||||||
|
storyboard.id,
|
||||||
|
selectedFrame.id,
|
||||||
|
narration || selectedFrame.narration
|
||||||
|
)
|
||||||
|
|
||||||
|
// Update local store with new image prompt
|
||||||
|
updateFrame(selectedFrame.id, {
|
||||||
|
imagePrompt: result.image_prompt,
|
||||||
|
})
|
||||||
|
setImagePrompt(result.image_prompt)
|
||||||
|
} catch (err: any) {
|
||||||
|
setError(err.message || '对齐提示词失败')
|
||||||
|
} finally {
|
||||||
|
setIsAligningPrompt(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
{error && (
|
{error && (
|
||||||
@@ -434,6 +460,18 @@ function SelectedFrameDetails() {
|
|||||||
) : null}
|
) : null}
|
||||||
重新生成音频
|
重新生成音频
|
||||||
</Button>
|
</Button>
|
||||||
|
<Button
|
||||||
|
size="sm"
|
||||||
|
variant="outline"
|
||||||
|
className="w-full"
|
||||||
|
onClick={handleAlignPrompt}
|
||||||
|
disabled={isAligningPrompt}
|
||||||
|
>
|
||||||
|
{isAligningPrompt ? (
|
||||||
|
<Loader2 className="h-4 w-4 animate-spin mr-2" />
|
||||||
|
) : null}
|
||||||
|
对齐提示词
|
||||||
|
</Button>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -197,6 +197,31 @@ class EditorApiClient {
|
|||||||
return response.json()
|
return response.json()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Align image prompt with narration - regenerate prompt based on narration
|
||||||
|
*/
|
||||||
|
async alignPrompt(
|
||||||
|
storyboardId: string,
|
||||||
|
frameId: string,
|
||||||
|
narration?: string
|
||||||
|
): Promise<{ image_prompt: string; success: boolean }> {
|
||||||
|
const response = await fetch(
|
||||||
|
`${this.baseUrl}/editor/storyboard/${storyboardId}/frames/${frameId}/align-prompt`,
|
||||||
|
{
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ narration }),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const error = await response.json().catch(() => ({ detail: response.statusText }))
|
||||||
|
throw new Error(error.detail || `Failed to align prompt: ${response.statusText}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.json()
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inpaint (局部重绘) image for a frame
|
* Inpaint (局部重绘) image for a frame
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user