From 1d343e55ba7b50d27ef0104ce370c8fb2cec9a45 Mon Sep 17 00:00:00 2001 From: empty Date: Tue, 6 Jan 2026 23:29:41 +0800 Subject: [PATCH] feat(P1): Add align-prompt feature for better text-image relevance --- api/routers/editor.py | 94 +++++++++++++++++++++++++++++ api/schemas/editor.py | 9 +++ frontend/src/app/editor/page.tsx | 38 ++++++++++++ frontend/src/services/editor-api.ts | 25 ++++++++ 4 files changed, 166 insertions(+) diff --git a/api/routers/editor.py b/api/routers/editor.py index e2cc3a2..6707c1d 100644 --- a/api/routers/editor.py +++ b/api/routers/editor.py @@ -41,6 +41,8 @@ from api.schemas.editor import ( ExportRequest, ExportResponse, ExportStatusResponse, + AlignPromptRequest, + AlignPromptResponse, ) from fastapi import BackgroundTasks import asyncio @@ -598,6 +600,98 @@ async def regenerate_frame_audio( raise HTTPException(status_code=500, detail=str(e)) +@router.post( + "/storyboard/{storyboard_id}/frames/{frame_id}/align-prompt", + response_model=AlignPromptResponse +) +async def align_frame_prompt( + storyboard_id: str = Path(..., description="Storyboard/task ID"), + frame_id: str = Path(..., description="Frame ID"), + request: AlignPromptRequest = None +): + """ + Align image prompt with narration + + Regenerates the image prompt based on the frame's narration using + enhanced core imagery extraction for better semantic relevance. + """ + if storyboard_id not in _storyboard_cache: + raise HTTPException(status_code=404, detail=f"Storyboard {storyboard_id} not found") + + storyboard = _storyboard_cache[storyboard_id] + frames = storyboard["frames"] + + # Find frame + target_frame = None + for frame in frames: + if frame["id"] == frame_id: + target_frame = frame + break + + if not target_frame: + raise HTTPException(status_code=404, detail=f"Frame {frame_id} not found") + + # Get narration to use + narration = request.narration if request and request.narration else target_frame.get("narration", "") + + if not narration: + raise HTTPException(status_code=400, detail="No narration text available") + + try: + from api.dependencies import get_pixelle_video + + pixelle_video = await get_pixelle_video() + + # Use LLM to generate aligned image prompt + from pixelle_video.prompts import build_image_prompt_prompt + + prompt = build_image_prompt_prompt( + narrations=[narration], + min_words=30, + max_words=60 + ) + + response = await pixelle_video.llm( + prompt=prompt, + temperature=0.7, + max_tokens=500 + ) + + # Parse response + import json + import re + + # Try to extract JSON + try: + result = json.loads(response) + except json.JSONDecodeError: + # Try markdown code block + match = re.search(r'```(?:json)?\s*([\s\S]+?)\s*```', response) + if match: + result = json.loads(match.group(1)) + else: + raise ValueError("Failed to parse LLM response") + + if "image_prompts" not in result or len(result["image_prompts"]) == 0: + raise ValueError("No image prompts in response") + + new_prompt = result["image_prompts"][0] + + # Update frame + target_frame["image_prompt"] = new_prompt + _storyboard_cache[storyboard_id] = storyboard + + logger.info(f"Aligned image prompt for frame {frame_id}") + + return AlignPromptResponse( + image_prompt=new_prompt, + success=True + ) + + except Exception as e: + logger.error(f"Prompt alignment failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + @router.post( "/storyboard/{storyboard_id}/frames/{frame_id}/inpaint", response_model=InpaintResponse diff --git a/api/schemas/editor.py b/api/schemas/editor.py index c6ff412..010b634 100644 --- a/api/schemas/editor.py +++ b/api/schemas/editor.py @@ -144,3 +144,12 @@ class ExportStatusResponse(BaseModel): error: Optional[str] = None +class AlignPromptRequest(BaseModel): + """Request to align image prompt with narration""" + narration: Optional[str] = Field(None, description="Override narration text") + + +class AlignPromptResponse(BaseModel): + """Response after aligning prompt""" + image_prompt: str + success: bool = True diff --git a/frontend/src/app/editor/page.tsx b/frontend/src/app/editor/page.tsx index beffee8..3ea1468 100644 --- a/frontend/src/app/editor/page.tsx +++ b/frontend/src/app/editor/page.tsx @@ -229,6 +229,7 @@ function SelectedFrameDetails() { const [isSaving, setIsSaving] = useState(false) const [isRegeneratingImage, setIsRegeneratingImage] = useState(false) const [isRegeneratingAudio, setIsRegeneratingAudio] = useState(false) + const [isAligningPrompt, setIsAligningPrompt] = useState(false) const [error, setError] = useState(null) // Update local state when frame changes @@ -322,6 +323,31 @@ function SelectedFrameDetails() { } } + const handleAlignPrompt = async () => { + if (!storyboard || !selectedFrame) return + + setIsAligningPrompt(true) + setError(null) + + try { + const result = await editorApi.alignPrompt( + storyboard.id, + selectedFrame.id, + narration || selectedFrame.narration + ) + + // Update local store with new image prompt + updateFrame(selectedFrame.id, { + imagePrompt: result.image_prompt, + }) + setImagePrompt(result.image_prompt) + } catch (err: any) { + setError(err.message || '对齐提示词失败') + } finally { + setIsAligningPrompt(false) + } + } + return (
{error && ( @@ -434,6 +460,18 @@ function SelectedFrameDetails() { ) : null} 重新生成音频 +
)} diff --git a/frontend/src/services/editor-api.ts b/frontend/src/services/editor-api.ts index d2ffaf2..5674d99 100644 --- a/frontend/src/services/editor-api.ts +++ b/frontend/src/services/editor-api.ts @@ -197,6 +197,31 @@ class EditorApiClient { return response.json() } + /** + * Align image prompt with narration - regenerate prompt based on narration + */ + async alignPrompt( + storyboardId: string, + frameId: string, + narration?: string + ): Promise<{ image_prompt: string; success: boolean }> { + const response = await fetch( + `${this.baseUrl}/editor/storyboard/${storyboardId}/frames/${frameId}/align-prompt`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ narration }), + } + ) + + if (!response.ok) { + const error = await response.json().catch(() => ({ detail: response.statusText })) + throw new Error(error.detail || `Failed to align prompt: ${response.statusText}`) + } + + return response.json() + } + /** * Inpaint (局部重绘) image for a frame */