模板中媒体尺寸改为预置方案

2025-11-12 17:19:06 +08:00
parent 64dcca204e
commit 7443cbf9c2
31 changed files with 576 additions and 90 deletions
--- a/pixelle_video/pipelines/custom.py
+++ b/pixelle_video/pipelines/custom.py
@@ -92,8 +92,7 @@ class CustomPipeline(BasePipeline):
        ref_audio: Optional[str] = None,
        
        image_workflow: Optional[str] = None,
-        image_width: int = 1024,
-        image_height: int = 1024,
+        # Note: image_width and image_height are now auto-determined from template
        
        frame_template: Optional[str] = None,
        video_fps: int = 30,
@@ -161,6 +160,10 @@ class CustomPipeline(BasePipeline):
        generator = HTMLFrameGenerator(template_path)
        template_requires_image = generator.requires_image()
        
+        # Read media size from template meta tags
+        image_width, image_height = generator.get_media_size()
+        logger.info(f"📐 Media size from template: {image_width}x{image_height}")
+        
        if template_requires_image:
            logger.info(f"📸 Template requires image generation")
        else:
--- a/pixelle_video/pipelines/standard.py
+++ b/pixelle_video/pipelines/standard.py
@@ -94,8 +94,7 @@ class StandardPipeline(BasePipeline):
        max_image_prompt_words: int = 60,
        
        # === Image Parameters ===
-        image_width: int = 1024,
-        image_height: int = 1024,
+        # Note: image_width and image_height are now auto-determined from template meta tags
        image_workflow: Optional[str] = None,
        
        # === Video Parameters ===
@@ -151,9 +150,8 @@ class StandardPipeline(BasePipeline):
            min_image_prompt_words: Min image prompt length
            max_image_prompt_words: Max image prompt length
            
-            image_width: Generated image width (default 1024)
-            image_height: Generated image height (default 1024)
            image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)
+                           Note: Image/video size is now auto-determined from template meta tags
            
            video_fps: Video frame rate (default 30)
            
@@ -239,6 +237,16 @@ class StandardPipeline(BasePipeline):
            template_config = self.core.config.get("template", {})
            frame_template = template_config.get("default_template", "1080x1920/default.html")
        
+        # Read media size from template meta tags
+        from pixelle_video.services.frame_html import HTMLFrameGenerator
+        from pixelle_video.utils.template_util import resolve_template_path
+        
+        template_path = resolve_template_path(frame_template)
+        temp_generator = HTMLFrameGenerator(template_path)
+        image_width, image_height = temp_generator.get_media_size()
+        
+        logger.info(f"📐 Media size from template: {image_width}x{image_height}")
+        
        # Create storyboard config
        config = StoryboardConfig(
            task_id=task_id,
--- a/pixelle_video/services/frame_html.py
+++ b/pixelle_video/services/frame_html.py
@@ -141,6 +141,58 @@ class HTMLFrameGenerator:
        logger.debug(f"Template loaded: {len(content)} chars")
        return content
    
+    def _parse_media_size_from_meta(self) -> tuple[Optional[int], Optional[int]]:
+        """
+        Parse media size from meta tags in template
+        
+        Looks for meta tags:
+        - <meta name="template:media-width" content="1024">
+        - <meta name="template:media-height" content="1024">
+        
+        Returns:
+            Tuple of (width, height) or (None, None) if not found
+        """
+        from bs4 import BeautifulSoup
+        
+        try:
+            soup = BeautifulSoup(self.template, 'html.parser')
+            
+            # Find width and height meta tags
+            width_meta = soup.find('meta', attrs={'name': 'template:media-width'})
+            height_meta = soup.find('meta', attrs={'name': 'template:media-height'})
+            
+            if width_meta and height_meta:
+                width = int(width_meta.get('content', 0))
+                height = int(height_meta.get('content', 0))
+                
+                if width > 0 and height > 0:
+                    logger.debug(f"Found media size in meta tags: {width}x{height}")
+                    return width, height
+            
+            return None, None
+            
+        except Exception as e:
+            logger.warning(f"Failed to parse media size from meta tags: {e}")
+            return None, None
+    
+    def get_media_size(self) -> tuple[int, int]:
+        """
+        Get media size for image/video generation
+        
+        Returns media size specified in template meta tags.
+        
+        Returns:
+            Tuple of (width, height)
+        """
+        media_width, media_height = self._parse_media_size_from_meta()
+        
+        if media_width and media_height:
+            return media_width, media_height
+        
+        # Fallback to default if not specified (should not happen with properly configured templates)
+        logger.warning(f"No media size meta tags found in template {self.template_path}, using fallback 1024x1024")
+        return 1024, 1024
+    
    def parse_template_parameters(self) -> Dict[str, Dict[str, Any]]:
        """
        Parse custom parameters from HTML template
--- a/pixelle_video/services/video.py
+++ b/pixelle_video/services/video.py
@@ -224,20 +224,43 @@ class VideoService:
                   -map "[v]" -map "[a]" output.mp4
        """
        try:
-            inputs = [ffmpeg.input(v) for v in videos]
-            (
-                ffmpeg
-                .concat(*inputs, v=1, a=1)
-                .output(output)
-                .overwrite_output()
-                .run(capture_stdout=True, capture_stderr=True)
+            # Build filter_complex string manually
+            n = len(videos)
+            
+            # Build input stream labels: [0:v][0:a][1:v][1:a]...
+            stream_spec = "".join([f"[{i}:v][{i}:a]" for i in range(n)])
+            filter_complex = f"{stream_spec}concat=n={n}:v=1:a=1[v][a]"
+            
+            # Build ffmpeg command
+            cmd = ['ffmpeg']
+            for video in videos:
+                cmd.extend(['-i', video])
+            cmd.extend([
+                '-filter_complex', filter_complex,
+                '-map', '[v]',
+                '-map', '[a]',
+                '-y',  # Overwrite output
+                output
+            ])
+            
+            # Run command
+            import subprocess
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                check=True
            )
+            
            logger.success(f"Videos concatenated successfully: {output}")
            return output
-        except ffmpeg.Error as e:
-            error_msg = e.stderr.decode() if e.stderr else str(e)
+        except subprocess.CalledProcessError as e:
+            error_msg = e.stderr if e.stderr else str(e)
            logger.error(f"FFmpeg concat filter error: {error_msg}")
            raise RuntimeError(f"Failed to concatenate videos: {error_msg}")
+        except Exception as e:
+            logger.error(f"Concatenation error: {e}")
+            raise RuntimeError(f"Failed to concatenate videos: {e}")
    
    def _get_video_duration(self, video: str) -> float:
        """Get video duration in seconds"""
@@ -382,10 +405,17 @@ class VideoService:
                # Concatenate original video with black padding
                video_stream = ffmpeg.concat(video_stream, black_input.video, v=1, a=0)
        
-        # Prepare audio stream
+        # Prepare audio stream (pad if needed to match target duration)
        input_audio = ffmpeg.input(audio)
        audio_stream = input_audio.audio.filter('volume', audio_volume)
        
+        # Pad audio with silence if video is longer
+        if video_duration > audio_duration:
+            pad_duration = video_duration - audio_duration
+            logger.info(f"Video is longer, padding audio with {pad_duration:.2f}s silence")
+            # Use apad to add silence at the end
+            audio_stream = audio_stream.filter('apad', whole_dur=target_duration)
+        
        if not video_has_audio:
            logger.info(f"Video has no audio stream, adding audio track")
            # Video is silent, just add the audio
@@ -398,8 +428,7 @@ class VideoService:
                        output,
                        vcodec='libx264',  # Re-encode video if padded
                        acodec='aac',
-                        audio_bitrate='192k',
-                        t=target_duration  # Trim to target duration
+                        audio_bitrate='192k'
                    )
                    .overwrite_output()
                    .run(capture_stdout=True, capture_stderr=True)
@@ -426,8 +455,7 @@ class VideoService:
                        output,
                        vcodec='libx264',  # Re-encode video if padded
                        acodec='aac',
-                        audio_bitrate='192k',
-                        t=target_duration  # Trim to target duration
+                        audio_bitrate='192k'
                    )
                    .overwrite_output()
                    .run(capture_stdout=True, capture_stderr=True)
@@ -452,8 +480,7 @@ class VideoService:
                        output,
                        vcodec='libx264',  # Re-encode video if padded
                        acodec='aac',
-                        audio_bitrate='192k',
-                        t=target_duration  # Trim to target duration
+                        audio_bitrate='192k'
                    )
                    .overwrite_output()
                    .run(capture_stdout=True, capture_stderr=True)