分镜支持视频功能

This commit is contained in:
puke
2025-11-11 20:38:31 +08:00
parent cf9321feac
commit 0e2b6b17d0
17 changed files with 1225 additions and 321 deletions

View File

@@ -0,0 +1,61 @@
# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Media generation result models
"""
from typing import Literal, Optional
from pydantic import BaseModel, Field
class MediaResult(BaseModel):
"""
Media generation result from workflow execution
Supports both image and video outputs from ComfyUI workflows.
The media_type indicates what kind of media was generated.
Attributes:
media_type: Type of media generated ("image" or "video")
url: URL or path to the generated media
duration: Duration in seconds (only for video, None for image)
Examples:
# Image result
MediaResult(media_type="image", url="http://example.com/image.png")
# Video result
MediaResult(media_type="video", url="http://example.com/video.mp4", duration=5.2)
"""
media_type: Literal["image", "video"] = Field(
description="Type of generated media"
)
url: str = Field(
description="URL or path to the generated media file"
)
duration: Optional[float] = Field(
None,
description="Duration in seconds (only applicable for video)"
)
@property
def is_image(self) -> bool:
"""Check if this is an image result"""
return self.media_type == "image"
@property
def is_video(self) -> bool:
"""Check if this is a video result"""
return self.media_type == "video"

View File

@@ -57,16 +57,18 @@ class StoryboardFrame:
"""Single storyboard frame"""
index: int # Frame index (0-based)
narration: str # Narration text
image_prompt: str # Image generation prompt
image_prompt: str # Image generation prompt (can be None for text-only or video)
# Generated resource paths
audio_path: Optional[str] = None # Audio file path
image_path: Optional[str] = None # Original image path
composed_image_path: Optional[str] = None # Composed image path (with subtitles)
video_segment_path: Optional[str] = None # Video segment path
audio_path: Optional[str] = None # Audio file path (narration)
media_type: Optional[str] = None # Media type: "image" or "video" (None if no media)
image_path: Optional[str] = None # Original image path (for image type)
video_path: Optional[str] = None # Original video path (for video type, before composition)
composed_image_path: Optional[str] = None # Composed image path (with subtitles, for image type)
video_segment_path: Optional[str] = None # Final video segment path
# Metadata
duration: float = 0.0 # Audio duration (seconds)
duration: float = 0.0 # Frame duration (seconds, from audio or video)
created_at: Optional[datetime] = None
def __post_init__(self):