AI-Video/pixelle_video/pipelines/standard.py

"""
Standard Video Generation Pipeline

Standard workflow for generating short videos from topic or fixed script.
This is the default pipeline that replicates the original VideoGeneratorService logic.
"""

from datetime import datetime
from pathlib import Path
from typing import Optional, Callable, Literal

from loguru import logger

from pixelle_video.pipelines.base import BasePipeline
from pixelle_video.models.progress import ProgressEvent
from pixelle_video.models.storyboard import (
    Storyboard,
    StoryboardFrame,
    StoryboardConfig,
    ContentMetadata,
    VideoGenerationResult
)
from pixelle_video.utils.content_generators import (
    generate_title,
    generate_narrations_from_topic,
    split_narration_script,
    generate_image_prompts,
)


class StandardPipeline(BasePipeline):
    """
    Standard video generation pipeline

    Workflow:
    1. Generate/determine title
    2. Generate narrations (from topic or split fixed script)
    3. Generate image prompts for each narration
    4. For each frame:
       - Generate audio (TTS)
       - Generate image
       - Compose frame with template
       - Create video segment
    5. Concatenate all segments
    6. Add BGM (optional)

    Supports two modes:
    - "generate": LLM generates narrations from topic
    - "fixed": Use provided script as-is (each line = one narration)
    """

    async def __call__(
        self,
        # === Input ===
        text: str,

        # === Processing Mode ===
        mode: Literal["generate", "fixed"] = "generate",

        # === Optional Title ===
        title: Optional[str] = None,

        # === Basic Config ===
        n_scenes: int = 5,  # Only used in generate mode; ignored in fixed mode
        voice_id: str = "[Chinese] zh-CN Yunjian",
        tts_workflow: Optional[str] = None,
        tts_speed: float = 1.2,
        ref_audio: Optional[str] = None,  # Reference audio for voice cloning
        output_path: Optional[str] = None,

        # === LLM Parameters ===
        min_narration_words: int = 5,
        max_narration_words: int = 20,
        min_image_prompt_words: int = 30,
        max_image_prompt_words: int = 60,

        # === Image Parameters ===
        image_width: int = 1024,
        image_height: int = 1024,
        image_workflow: Optional[str] = None,

        # === Video Parameters ===
        video_fps: int = 30,

        # === Frame Template (determines video size) ===
        frame_template: Optional[str] = None,

        # === Image Style ===
        prompt_prefix: Optional[str] = None,

        # === BGM Parameters ===
        bgm_path: Optional[str] = None,
        bgm_volume: float = 0.2,
        bgm_mode: Literal["once", "loop"] = "loop",

        # === Advanced Options ===
        content_metadata: Optional[ContentMetadata] = None,
        progress_callback: Optional[Callable[[ProgressEvent], None]] = None,
    ) -> VideoGenerationResult:
        """
        Generate short video from text input

        Args:
            text: Text input (required)
                  - For generate mode: topic/theme (e.g., "如何提高学习效率")
                  - For fixed mode: complete narration script (each line is a narration)

            mode: Processing mode (default "generate")
                  - "generate": LLM generates narrations from topic, creates n_scenes
                  - "fixed": Use existing script as-is, each line becomes a narration

                  Note: In fixed mode, n_scenes is ignored (uses actual line count)

            title: Video title (optional)
                   - If provided, use it as the video title
                   - If not provided:
                     * generate mode → use text as title
                     * fixed mode → LLM generates title from script

            n_scenes: Number of storyboard scenes (default 5)
                      Only effective in generate mode; ignored in fixed mode

            voice_id: TTS voice ID (default "[Chinese] zh-CN Yunjian")
            tts_workflow: TTS workflow filename (e.g., "tts_edge.json", None = use default)
            tts_speed: TTS speed multiplier (1.0 = normal, 1.2 = 20% faster, default 1.2)
            ref_audio: Reference audio path for voice cloning (optional)
            output_path: Output video path (auto-generated if None)

            min_narration_words: Min narration length (generate mode only)
            max_narration_words: Max narration length (generate mode only)
            min_image_prompt_words: Min image prompt length
            max_image_prompt_words: Max image prompt length

            image_width: Generated image width (default 1024)
            image_height: Generated image height (default 1024)
            image_workflow: Image workflow filename (e.g., "image_flux.json", None = use default)

            video_fps: Video frame rate (default 30)

            frame_template: HTML template path with size (None = use default "1080x1920/default.html")
                           Format: "SIZExSIZE/template.html" (e.g., "1080x1920/default.html", "1920x1080/modern.html")
                           Video size is automatically determined from template path

            prompt_prefix: Image prompt prefix (overrides config.yaml if provided)
                          e.g., "anime style, vibrant colors" or "" for no prefix

            bgm_path: BGM path (filename like "default.mp3", custom path, or None)
            bgm_volume: BGM volume 0.0-1.0 (default 0.2)
            bgm_mode: BGM mode "once" or "loop" (default "loop")

            content_metadata: Content metadata (optional, for display)
            progress_callback: Progress callback function(ProgressEvent)

        Returns:
            VideoGenerationResult with video path and metadata
        """
        # ========== Step 0: Process text and determine title ==========
        logger.info(f"🚀 Starting StandardPipeline in '{mode}' mode")
        logger.info(f"   Text length: {len(text)} chars")

        # Determine final title
        if title:
            final_title = title
            logger.info(f"   Title: '{title}' (user-specified)")
        else:
            self._report_progress(progress_callback, "generating_title", 0.01)
            if mode == "generate":
                final_title = await generate_title(self.llm, text, strategy="auto")
                logger.info(f"   Title: '{final_title}' (auto-generated)")
            else:  # fixed
                final_title = await generate_title(self.llm, text, strategy="llm")
                logger.info(f"   Title: '{final_title}' (LLM-generated)")

        # ========== Step 0.5: Create isolated task directory ==========
        from pixelle_video.utils.os_util import (
            create_task_output_dir,
            get_task_final_video_path
        )

        task_dir, task_id = create_task_output_dir()
        logger.info(f"📁 Task directory created: {task_dir}")
        logger.info(f"   Task ID: {task_id}")

        # Determine final video path
        user_specified_output = None
        if output_path is None:
            output_path = get_task_final_video_path(task_id)
        else:
            user_specified_output = output_path
            output_path = get_task_final_video_path(task_id)
            logger.info(f"   Will copy final video to: {user_specified_output}")

        # Create storyboard config
        config = StoryboardConfig(
            task_id=task_id,
            n_storyboard=n_scenes,
            min_narration_words=min_narration_words,
            max_narration_words=max_narration_words,
            min_image_prompt_words=min_image_prompt_words,
            max_image_prompt_words=max_image_prompt_words,
            video_fps=video_fps,
            voice_id=voice_id,
            tts_workflow=tts_workflow,
            tts_speed=tts_speed,
            ref_audio=ref_audio,
            image_width=image_width,
            image_height=image_height,
            image_workflow=image_workflow,
            frame_template=frame_template or "1080x1920/default.html"
        )

        # Create storyboard
        storyboard = Storyboard(
            title=final_title,
            config=config,
            content_metadata=content_metadata,
            created_at=datetime.now()
        )

        try:
            # ========== Step 1: Generate/Split narrations ==========
            if mode == "generate":
                self._report_progress(progress_callback, "generating_narrations", 0.05)
                narrations = await generate_narrations_from_topic(
                    self.llm,
                    topic=text,
                    n_scenes=n_scenes,
                    min_words=min_narration_words,
                    max_words=max_narration_words
                )
                logger.info(f"✅ Generated {len(narrations)} narrations")
            else:  # fixed
                self._report_progress(progress_callback, "splitting_script", 0.05)
                narrations = await split_narration_script(text)
                logger.info(f"✅ Split script into {len(narrations)} segments (by lines)")
                logger.info(f"   Note: n_scenes={n_scenes} is ignored in fixed mode")

            # ========== Step 2: Generate image prompts ==========
            self._report_progress(progress_callback, "generating_image_prompts", 0.15)

            # Override prompt_prefix if provided
            original_prefix = None
            if prompt_prefix is not None:
                image_config = self.core.config.get("comfyui", {}).get("image", {})
                original_prefix = image_config.get("prompt_prefix")
                image_config["prompt_prefix"] = prompt_prefix
                logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")

            try:
                # Create progress callback wrapper for image prompt generation
                def image_prompt_progress(completed: int, total: int, message: str):
                    batch_progress = completed / total if total > 0 else 0
                    overall_progress = 0.15 + (batch_progress * 0.15)
                    self._report_progress(
                        progress_callback,
                        "generating_image_prompts",
                        overall_progress,
                        extra_info=message
                    )

                # Generate base image prompts
                base_image_prompts = await generate_image_prompts(
                    self.llm,
                    narrations=narrations,
                    min_words=min_image_prompt_words,
                    max_words=max_image_prompt_words,
                    progress_callback=image_prompt_progress
                )

                # Apply prompt prefix
                from pixelle_video.utils.prompt_helper import build_image_prompt
                image_config = self.core.config.get("comfyui", {}).get("image", {})
                prompt_prefix_to_use = prompt_prefix if prompt_prefix is not None else image_config.get("prompt_prefix", "")

                image_prompts = []
                for base_prompt in base_image_prompts:
                    final_prompt = build_image_prompt(base_prompt, prompt_prefix_to_use)
                    image_prompts.append(final_prompt)

            finally:
                # Restore original prompt_prefix
                if original_prefix is not None:
                    image_config["prompt_prefix"] = original_prefix

            logger.info(f"✅ Generated {len(image_prompts)} image prompts")

            # ========== Step 3: Create frames ==========
            for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
                frame = StoryboardFrame(
                    index=i,
                    narration=narration,
                    image_prompt=image_prompt,
                    created_at=datetime.now()
                )
                storyboard.frames.append(frame)

            # ========== Step 4: Process each frame ==========
            for i, frame in enumerate(storyboard.frames):
                base_progress = 0.2
                frame_range = 0.6
                per_frame_progress = frame_range / len(storyboard.frames)

                # Create frame-specific progress callback
                def frame_progress_callback(event: ProgressEvent):
                    overall_progress = base_progress + (per_frame_progress * i) + (per_frame_progress * event.progress)
                    if progress_callback:
                        adjusted_event = ProgressEvent(
                            event_type=event.event_type,
                            progress=overall_progress,
                            frame_current=event.frame_current,
                            frame_total=event.frame_total,
                            step=event.step,
                            action=event.action
                        )
                        progress_callback(adjusted_event)

                # Report frame start
                self._report_progress(
                    progress_callback,
                    "processing_frame",
                    base_progress + (per_frame_progress * i),
                    frame_current=i+1,
                    frame_total=len(storyboard.frames)
                )

                processed_frame = await self.core.frame_processor(
                    frame=frame,
                    storyboard=storyboard,
                    config=config,
                    total_frames=len(storyboard.frames),
                    progress_callback=frame_progress_callback
                )
                storyboard.total_duration += processed_frame.duration
                logger.info(f"✅ Frame {i+1} completed ({processed_frame.duration:.2f}s)")

            # ========== Step 5: Concatenate videos ==========
            self._report_progress(progress_callback, "concatenating", 0.85)
            segment_paths = [frame.video_segment_path for frame in storyboard.frames]

            from pixelle_video.services.video import VideoService
            video_service = VideoService()

            final_video_path = video_service.concat_videos(
                videos=segment_paths,
                output=output_path,
                bgm_path=bgm_path,
                bgm_volume=bgm_volume,
                bgm_mode=bgm_mode
            )

            storyboard.final_video_path = final_video_path
            storyboard.completed_at = datetime.now()

            # Copy to user-specified path if provided
            if user_specified_output:
                import shutil
                Path(user_specified_output).parent.mkdir(parents=True, exist_ok=True)
                shutil.copy2(final_video_path, user_specified_output)
                logger.info(f"📹 Final video copied to: {user_specified_output}")
                final_video_path = user_specified_output
                storyboard.final_video_path = user_specified_output

            logger.success(f"🎬 Video generation completed: {final_video_path}")

            # ========== Step 6: Create result ==========
            self._report_progress(progress_callback, "completed", 1.0)

            video_path_obj = Path(final_video_path)
            file_size = video_path_obj.stat().st_size

            result = VideoGenerationResult(
                video_path=final_video_path,
                storyboard=storyboard,
                duration=storyboard.total_duration,
                file_size=file_size
            )

            logger.info(f"✅ Generated video: {final_video_path}")
            logger.info(f"   Duration: {storyboard.total_duration:.2f}s")
            logger.info(f"   Size: {file_size / (1024*1024):.2f} MB")
            logger.info(f"   Frames: {len(storyboard.frames)}")

            return result

        except Exception as e:
            logger.error(f"❌ Video generation failed: {e}")
            raise