开发基于视频素材生成视频的webui功能

This commit is contained in:
puke
2025-12-04 15:23:13 +08:00
parent 5c52696e6f
commit 007a39c03a
6 changed files with 172 additions and 79 deletions

View File

@@ -472,7 +472,9 @@ Generate the video script now:"""
context.narrations = all_narrations
# Get template dimensions
template_name = "1080x1920/image_pure.html"
# Use asset_default.html template which supports both image and video assets
# (conditionally shows background image or provides transparent overlay)
template_name = "1080x1920/asset_default.html"
# Extract dimensions from template name (e.g., "1080x1920")
try:
dims = template_name.split("/")[0].split("x")
@@ -524,9 +526,20 @@ Generate the video script now:"""
created_at=datetime.now()
)
# Store matched asset path in the frame
frame.image_path = scene["matched_asset"]
frame.media_type = "image"
# Get asset path and determine actual media type from asset_index
asset_path = scene["matched_asset"]
asset_metadata = self.asset_index.get(asset_path, {})
asset_type = asset_metadata.get("type", "image") # Default to image if not found
# Set media type and path based on actual asset type
if asset_type == "video":
frame.media_type = "video"
frame.video_path = asset_path
logger.debug(f"Scene {i}: Using video asset: {Path(asset_path).name}")
else:
frame.media_type = "image"
frame.image_path = asset_path
logger.debug(f"Scene {i}: Using image asset: {Path(asset_path).name}")
# Store scene info for later audio generation
frame._scene_data = scene # Temporary storage for multi-narration

View File

@@ -73,8 +73,8 @@ class FrameProcessor:
frame_num = frame.index + 1
# Determine if this frame needs image generation
# If image_path is already set (e.g. asset-based pipeline), we consider it "needs image" but skip generation
has_existing_image = frame.image_path is not None
# If image_path or video_path is already set (e.g. asset-based pipeline), we consider it "has existing media" but skip generation
has_existing_media = frame.image_path is not None or frame.video_path is not None
needs_generation = frame.image_prompt is not None
try:
@@ -93,7 +93,6 @@ class FrameProcessor:
else:
logger.debug(f" 1/4: Using existing audio: {frame.audio_path}")
# Step 2: Generate media (image or video, conditional)
# Step 2: Generate media (image or video, conditional)
if needs_generation:
if progress_callback:
@@ -106,8 +105,12 @@ class FrameProcessor:
action="media"
))
await self._step_generate_media(frame, config)
elif has_existing_image:
logger.debug(f" 2/4: Using existing image: {frame.image_path}")
elif has_existing_media:
# Log appropriate message based on media type
if frame.video_path:
logger.debug(f" 2/4: Using existing video: {frame.video_path}")
else:
logger.debug(f" 2/4: Using existing image: {frame.image_path}")
else:
frame.image_path = None
frame.media_type = None
@@ -117,7 +120,7 @@ class FrameProcessor:
if progress_callback:
progress_callback(ProgressEvent(
event_type="frame_step",
progress=0.50 if (needs_generation or has_existing_image) else 0.33,
progress=0.50 if (needs_generation or has_existing_media) else 0.33,
frame_current=frame_num,
frame_total=total_frames,
step=3,
@@ -129,7 +132,7 @@ class FrameProcessor:
if progress_callback:
progress_callback(ProgressEvent(
event_type="frame_step",
progress=0.75 if (needs_generation or has_existing_image) else 0.67,
progress=0.75 if (needs_generation or has_existing_media) else 0.67,
frame_current=frame_num,
frame_total=total_frames,
step=4,
@@ -313,12 +316,14 @@ class FrameProcessor:
# Generate frame using HTML (size is auto-parsed from template path)
generator = HTMLFrameGenerator(template_path)
logger.debug(f"Generating frame with image: '{frame.image_path}' (type: {type(frame.image_path)})")
# Use video_path for video media, image_path for images
media_path = frame.video_path if frame.media_type == "video" else frame.image_path
logger.debug(f"Generating frame with media: '{media_path}' (type: {frame.media_type})")
composed_path = await generator.generate_frame(
title=storyboard.title,
text=frame.narration,
image=frame.image_path,
image=media_path, # HTMLFrameGenerator handles both image and video paths
ext=ext,
output_path=output_path
)
@@ -372,7 +377,8 @@ class FrameProcessor:
os.unlink(temp_video_with_overlay)
elif frame.media_type == "image" or frame.media_type is None:
# Image workflow: create video from image + audio
# Image workflow: Use composed image directly
# The asset_default.html template includes the image in the composition
logger.debug(f" → Using image-based composition")
segment_path = video_service.create_video_from_image(

View File

@@ -32,9 +32,9 @@ class VideoAnalysisService(ComfyBaseService):
Uses ComfyKit to execute video understanding workflows.
Returns detailed textual descriptions of video content.
Convention: workflows follow {source}/video_understanding.json pattern
- runninghub/video_understanding.json (default, cloud-based)
- selfhost/video_understanding.json (local ComfyUI, future)
Convention: workflows follow {source}/analyse_video.json pattern
- runninghub/analyse_video.json (default, cloud-based)
- selfhost/analyse_video.json (local ComfyUI, future)
Usage:
# Use default (runninghub cloud)
@@ -50,7 +50,7 @@ class VideoAnalysisService(ComfyBaseService):
workflows = pixelle_video.video_analysis.list_workflows()
"""
WORKFLOW_PREFIX = "video_understanding"
WORKFLOW_PREFIX = "analyse_video"
WORKFLOWS_DIR = "workflows"
def __init__(self, config: dict, core=None):
@@ -114,8 +114,8 @@ class VideoAnalysisService(ComfyBaseService):
# 2. Resolve workflow path using convention
if workflow is None:
# Use standardized naming: {source}/video_understanding.json
workflow = resolve_workflow_path("video_understanding", source)
# Use standardized naming: {source}/analyse_video.json
workflow = resolve_workflow_path("analyse_video", source)
logger.info(f"Using {source} workflow: {workflow}")
# 3. Resolve workflow (returns structured info)

View File

@@ -1,5 +1,6 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="template:media-width" content="1024">
@@ -9,17 +10,16 @@
margin: 0;
padding: 0;
}
body {
margin: 0;
padding: 0;
width: 1080px;
height: 1920px;
font-family: 'PingFang SC', 'Source Han Sans', 'Microsoft YaHei', sans-serif;
background: #000;
overflow: hidden;
}
.page-container {
width: 1080px;
height: 1920px;
@@ -27,7 +27,10 @@
overflow: hidden;
}
/* 1. Background Image Layer (垫底图片) */
/* 1. Background Media Layer (背景媒体层)
- For image assets: displays the image
- For video assets: hidden (video is composited in later step)
*/
.background-layer {
position: absolute;
top: 0;
@@ -44,10 +47,15 @@
display: block;
}
/* Hide background layer when no image (video mode) */
.background-layer:empty {
display: none;
}
/* 2. Gradient Overlay (渐变遮罩)
Ensures text readability regardless of image brightness
Ensures text readability regardless of background brightness
Top: Darker for Title
Middle: Transparent for Image visibility
Middle: Transparent for Media visibility
Bottom: Darker for Subtitles
*/
.gradient-overlay {
@@ -57,13 +65,11 @@
width: 100%;
height: 100%;
z-index: 1;
background: linear-gradient(
to bottom,
rgba(0,0,0,0.6) 0%,
rgba(0,0,0,0.1) 25%,
rgba(0,0,0,0.1) 60%,
rgba(0,0,0,0.8) 100%
);
background: linear-gradient(to bottom,
rgba(0, 0, 0, 0.6) 0%,
rgba(0, 0, 0, 0.1) 25%,
rgba(0, 0, 0, 0.1) 60%,
rgba(0, 0, 0, 0.8) 100%);
}
/* 3. Content Layer (内容层) */
@@ -72,7 +78,8 @@
z-index: 2;
width: 100%;
height: 100%;
padding: 120px 80px 0px 80px; /* Top, Right, Bottom, Left */
padding: 120px 80px 0px 80px;
/* Top, Right, Bottom, Left */
box-sizing: border-box;
display: flex;
flex-direction: column;
@@ -85,7 +92,7 @@
font-size: 80px;
font-weight: 700;
line-height: 1.2;
text-shadow: 0 4px 12px rgba(0,0,0,0.5);
text-shadow: 0 4px 12px rgba(0, 0, 0, 0.5);
margin-bottom: 40px;
text-align: center;
}
@@ -110,16 +117,20 @@
font-weight: 500;
line-height: 1.6;
text-align: center;
text-shadow: 0 2px 8px rgba(0,0,0,0.6);
text-shadow: 0 2px 8px rgba(0, 0, 0, 0.6);
backdrop-filter: blur(4px);
}
</style>
</head>
<body>
<div class="page-container">
<!-- Background Image -->
<div class="background-layer">
<img src="{{image}}" alt="Background">
<!-- Background Media Layer
- For image assets: contains <img> tag
- For video assets: empty (hidden by CSS)
-->
<div class="background-layer" id="bg-layer">
<!-- Image will be inserted here for image assets only -->
</div>
<!-- Shadow Overlay for Text Readability -->
@@ -141,5 +152,23 @@
</div>
</div>
</div>
<script>
// Conditionally add image if provided
(function () {
var imageUrl = "{{image}}";
var bgLayer = document.getElementById('bg-layer');
// Only add img tag if image URL is provided and not empty
if (imageUrl && imageUrl.trim() !== "" && imageUrl !== "None") {
var img = document.createElement('img');
img.src = imageUrl;
img.alt = "Background";
bgLayer.appendChild(img);
}
// Otherwise, bg-layer stays empty and gets hidden by CSS
})();
</script>
</body>
</html>

View File

@@ -1,41 +1,4 @@
{
"3": {
"inputs": {
"model": "microsoft/Florence-2-large",
"precision": "fp16",
"attention": "sdpa",
"convert_to_safetensors": false
},
"class_type": "DownloadAndLoadFlorence2Model",
"_meta": {
"title": "DownloadAndLoadFlorence2Model"
}
},
"4": {
"inputs": {
"text_input": "",
"task": "more_detailed_caption",
"fill_mask": true,
"keep_model_loaded": false,
"max_new_tokens": 1024,
"num_beams": 3,
"do_sample": true,
"output_mask_select": "",
"seed": 853848678279928,
"image": [
"5",
0
],
"florence2_model": [
"3",
0
]
},
"class_type": "Florence2Run",
"_meta": {
"title": "Florence2Run"
}
},
"5": {
"inputs": {
"image": "06.JPG"
@@ -47,15 +10,34 @@
},
"6": {
"inputs": {
"text": "The image shows a white cat sitting on a black and white striped stool against a white wall. The cat is wearing a blue knitted sweater and is looking directly at the camera with a curious expression. Its ears are perked up and its eyes are wide open, giving it an alert and inquisitive look. The background is plain white, making the cat the focal point of the image.",
"anything": [
"4",
2
"7",
0
]
},
"class_type": "easy showAnything",
"_meta": {
"title": "Show Any"
}
},
"7": {
"inputs": {
"model_name": "Qwen3-VL-8B-Instruct",
"quantization": "None (FP16)",
"attention_mode": "auto",
"preset_prompt": "🖼️ Detailed Description",
"custom_prompt": "",
"max_tokens": 512,
"keep_model_loaded": true,
"seed": 1,
"image": [
"5",
0
]
},
"class_type": "AILab_QwenVL",
"_meta": {
"title": "QwenVL"
}
}
}

View File

@@ -0,0 +1,63 @@
{
"9": {
"inputs": {
"prompt": "详细描述这个视频500字以内"
},
"class_type": "CR Prompt Text",
"_meta": {
"title": "⚙️ CR Prompt Text"
}
},
"14": {
"inputs": {
"video": "01_segment.mp4",
"force_rate": 0,
"custom_width": 0,
"custom_height": 0,
"frame_load_cap": 0,
"skip_first_frames": 0,
"select_every_nth": 1,
"format": "AnimateDiff"
},
"class_type": "VHS_LoadVideo",
"_meta": {
"title": "$video.video"
}
},
"18": {
"inputs": {
"text": "这是一个静态插画风格的宣传图或信息图表。画面中央是一位坐在办公桌前、面带微笑的年轻人,他正专注地使用笔记本电脑工作。他的身后是一扇大窗户,窗外透进温暖柔和的光线,营造出温馨舒适的居家氛围;窗边挂着浅色窗帘,墙上悬挂着一个黑色圆形时钟,指针指向约六点十分——暗示下班后的时间段。\n\n从年轻人头顶上方飘落许多金色硬币每枚都印有美元符号“$”,象征被动收入源源不断流入。桌子两侧各摆放一盆高大的绿植(叶片宽大),增添自然气息与生活感。桌面简洁整洁,仅放一台银灰色笔记本和一个小花瓶作为装饰。\n\n图片顶部用粗体黑字写着标题“如何新增被动收入”。底部则有一句引言式文案“下班后时间其实能创造新收入”强调利用业余时间实现财务自由的可能性。左下角标注了创作者信息@Pixelle.AI并注明其为开源多模态AI创意代理工具右下角显示作品类型“Pixelle-Video”。\n\n整体色调以米黄、灰棕为主配以橙金点缀视觉上既专业又不失亲和力适合用于财经类内容推广或个人理财教育场景。构图平衡对称突出主题的同时传递积极向上的价值观——即通过智慧投资时间和技能在非正式工时也能收获财富回报。",
"anything": [
"19",
0
]
},
"class_type": "easy showAnything",
"_meta": {
"title": "Show Any"
}
},
"19": {
"inputs": {
"model_name": "Qwen3-VL-8B-Instruct",
"quantization": "None (FP16)",
"attention_mode": "auto",
"preset_prompt": "📹 Video Summary",
"custom_prompt": [
"9",
0
],
"max_tokens": 1024,
"keep_model_loaded": true,
"seed": 582488656,
"video": [
"14",
0
]
},
"class_type": "AILab_QwenVL",
"_meta": {
"title": "QwenVL"
}
}
}