AI-Video/reelforge/services/video_generator.py

"""
Video Generator Service

End-to-end service for generating short videos from content.
"""

from datetime import datetime
from pathlib import Path
from typing import Optional, Callable, Literal

from loguru import logger

from reelforge.models.progress import ProgressEvent
from reelforge.models.storyboard import (
    Storyboard,
    StoryboardFrame,
    StoryboardConfig,
    ContentMetadata,
    VideoGenerationResult
)


class VideoGeneratorService:
    """
    Video generation service

    Orchestrates the complete pipeline:
    1. Generate narrations (LLM)
    2. Generate image prompts (LLM)
    3. Process each frame (TTS + Image + Compose + Video)
    4. Concatenate all segments
    5. Add BGM (optional)
    """

    def __init__(self, reelforge_core):
        """
        Initialize video generator service

        Args:
            reelforge_core: ReelForgeCore instance
        """
        self.core = reelforge_core

    async def __call__(
        self,
        # === Input ===
        text: str,

        # === Processing Mode ===
        mode: Literal["generate", "fixed"] = "generate",

        # === Optional Title ===
        title: Optional[str] = None,

        # === Basic Config ===
        n_scenes: int = 5,  # Only used in generate mode; ignored in fixed mode
        voice_id: str = "zh-CN-YunjianNeural",
        output_path: Optional[str] = None,

        # === LLM Parameters ===
        min_narration_words: int = 5,
        max_narration_words: int = 20,
        min_image_prompt_words: int = 30,
        max_image_prompt_words: int = 60,

        # === Image Parameters ===
        image_width: int = 1024,
        image_height: int = 1024,
        image_preset: Optional[str] = None,

        # === Video Parameters ===
        video_width: int = 1080,
        video_height: int = 1920,
        video_fps: int = 30,

        # === Frame Template ===
        frame_template: Optional[str] = None,

        # === Image Style ===
        prompt_prefix: Optional[str] = None,

        # === BGM Parameters ===
        bgm_path: Optional[str] = None,
        bgm_volume: float = 0.2,
        bgm_mode: Literal["once", "loop"] = "loop",

        # === Advanced Options ===
        content_metadata: Optional[ContentMetadata] = None,
        progress_callback: Optional[Callable[[ProgressEvent], None]] = None,
    ) -> VideoGenerationResult:
        """
        Generate short video from text input

        Args:
            text: Text input (required)
                  - For generate mode: topic/theme (e.g., "如何提高学习效率")
                  - For fixed mode: complete narration script (will be split into frames)

            mode: Processing mode (default "generate")
                  - "generate": LLM generates narrations from topic/theme, creates n_scenes
                  - "fixed": Split existing script into frames, preserves original text

                  Note: In fixed mode, n_scenes is ignored (uses actual split count)

            title: Video title (optional)
                   - If provided, use it as the video title
                   - If not provided:
                     * generate mode → use text as title
                     * fixed mode → LLM generates title from script

            n_scenes: Number of storyboard scenes (default 5)
                      Only effective in generate mode; ignored in fixed mode

            voice_id: TTS voice ID (default "zh-CN-YunjianNeural")
            output_path: Output video path (auto-generated if None)

            min_narration_words: Min narration length (generate mode only)
            max_narration_words: Max narration length (generate mode only)
            min_image_prompt_words: Min image prompt length
            max_image_prompt_words: Max image prompt length

            image_width: Generated image width (default 1024)
            image_height: Generated image height (default 1024)
            image_preset: Image workflow preset (e.g., "flux", "sdxl", None = use default)

            video_width: Final video width (default 1080)
            video_height: Final video height (default 1920)
            video_fps: Video frame rate (default 30)

            frame_template: HTML template filename or path (None = use PIL)
                           e.g., "default.html", "modern.html", "neon.html", or custom path

            prompt_prefix: Image prompt prefix (overrides config.yaml if provided)
                          e.g., "anime style, vibrant colors" or "" for no prefix

            bgm_path: BGM path (filename like "default.mp3", custom path, or None)
            bgm_volume: BGM volume 0.0-1.0 (default 0.2)
            bgm_mode: BGM mode "once" or "loop" (default "loop")

            content_metadata: Content metadata (optional, for display)
            progress_callback: Progress callback function(message, progress)

        Returns:
            VideoGenerationResult with video path and metadata

        Examples:
            # Generate mode: LLM creates narrations from topic
            >>> result = await reelforge.generate_video(
            ...     text="如何在信息爆炸时代保持深度思考",
            ...     mode="generate",
            ...     n_scenes=5,
            ...     bgm_path="default"
            ... )

            # Fixed mode: Use existing script (split by paragraphs)
            >>> script = '''大家好，今天跟你分享三个学习技巧
            ...
            ... 第一个技巧是专注力训练，每天冥想10分钟
            ...
            ... 第二个技巧是主动回忆，学完立即复述'''
            >>> result = await reelforge.generate_video(
            ...     text=script,
            ...     mode="fixed",
            ...     title="三个学习技巧"
            ... )

            # Fixed mode: Use existing script (split by sentences)
            >>> result = await reelforge.generate_video(
            ...     text="第一点是专注。第二点是复述。第三点是重复。",
            ...     mode="fixed"
            ... )
            >>> print(result.video_path)
        """
        # ========== Step 0: Process text and determine title ==========
        logger.info(f"🚀 Starting video generation in '{mode}' mode")
        logger.info(f"   Text length: {len(text)} chars")

        # Determine final title (priority: user-specified > auto-generated)
        if title:
            # User specified title, use it directly
            final_title = title
            logger.info(f"   Title: '{title}' (user-specified)")
        else:
            # Auto-generate title based on mode
            if mode == "generate":
                # Use text as title (it's a topic/theme)
                final_title = text[:20] if len(text) > 20 else text
                logger.info(f"   Title: '{final_title}' (from text)")
            else:  # fixed
                # Generate title from script using LLM
                self._report_progress(progress_callback, "generating_title", 0.01)
                final_title = await self._generate_title_from_content(text)
                logger.info(f"   Title: '{final_title}' (LLM-generated)")

        # Auto-generate output path if not provided
        if output_path is None:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            # Use first 10 chars of final_title for filename
            safe_name = final_title[:10].replace('/', '_').replace(' ', '_')
            output_path = f"output/{timestamp}_{safe_name}.mp4"

        # Ensure output directory exists
        Path(output_path).parent.mkdir(parents=True, exist_ok=True)

        # Create storyboard config
        config = StoryboardConfig(
            n_storyboard=n_scenes,
            min_narration_words=min_narration_words,
            max_narration_words=max_narration_words,
            min_image_prompt_words=min_image_prompt_words,
            max_image_prompt_words=max_image_prompt_words,
            video_width=video_width,
            video_height=video_height,
            video_fps=video_fps,
            voice_id=voice_id,
            image_width=image_width,
            image_height=image_height,
            image_preset=image_preset,
            frame_template=frame_template
        )

        # Create storyboard
        storyboard = Storyboard(
            topic=final_title,  # Use final_title as video title
            config=config,
            content_metadata=content_metadata,
            created_at=datetime.now()
        )

        # Store storyboard in core for access in storyboard processor
        self.core._current_storyboard = storyboard

        try:
            # ========== Step 1: Generate/Split narrations ==========
            if mode == "generate":
                # Generate narrations using LLM
                self._report_progress(progress_callback, "generating_narrations", 0.05)
                narrations = await self.core.narration_generator.generate_narrations(
                    config=config,
                    source_type="topic",
                    content_metadata=None,
                    topic=text,
                    content=None
                )
                logger.info(f"✅ Generated {len(narrations)} narrations")
            else:  # fixed
                # Split fixed script using LLM (preserves original text)
                self._report_progress(progress_callback, "splitting_script", 0.05)
                narrations = await self._split_narration_script(text, config)
                logger.info(f"✅ Split script into {len(narrations)} segments")
                logger.info(f"   Note: n_scenes={n_scenes} is ignored in fixed mode")

            # Step 2: Generate image prompts
            self._report_progress(progress_callback, "generating_image_prompts", 0.15)

            # Override prompt_prefix if provided (temporarily modify config)
            original_prefix = None
            if prompt_prefix is not None:
                image_config = self.core.config.get("image", {})
                original_prefix = image_config.get("prompt_prefix")
                image_config["prompt_prefix"] = prompt_prefix
                logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")

            try:
                # Create progress callback wrapper for image prompt generation (15%-30% range)
                def image_prompt_progress(completed: int, total: int, message: str):
                    # Map batch progress to 15%-30% range
                    batch_progress = completed / total if total > 0 else 0
                    overall_progress = 0.15 + (batch_progress * 0.15)  # 15% -> 30%
                    self._report_progress(
                        progress_callback,
                        "generating_image_prompts",
                        overall_progress,
                        extra_info=message
                    )

                image_prompts = await self.core.image_prompt_generator.generate_image_prompts(
                    narrations=narrations,
                    config=config,
                    progress_callback=image_prompt_progress
                )
            finally:
                # Restore original prompt_prefix
                if original_prefix is not None:
                    image_config["prompt_prefix"] = original_prefix
            logger.info(f"✅ Generated {len(image_prompts)} image prompts")

            # Step 3: Create frames
            for i, (narration, image_prompt) in enumerate(zip(narrations, image_prompts)):
                frame = StoryboardFrame(
                    index=i,
                    narration=narration,
                    image_prompt=image_prompt,
                    created_at=datetime.now()
                )
                storyboard.frames.append(frame)

            # Step 4: Process each frame
            for i, frame in enumerate(storyboard.frames):
                # Calculate fine-grained progress for this frame
                base_progress = 0.2  # Frames processing starts at 20%
                frame_range = 0.6    # Frames processing takes 60% (20%-80%)
                per_frame_progress = frame_range / len(storyboard.frames)

                # Create frame-specific progress callback
                def frame_progress_callback(event: ProgressEvent):
                    """Report sub-step progress within current frame"""
                    # Calculate overall progress: base + previous frames + current frame progress
                    overall_progress = base_progress + (per_frame_progress * i) + (per_frame_progress * event.progress)
                    # Forward the event with adjusted overall progress
                    if progress_callback:
                        adjusted_event = ProgressEvent(
                            event_type=event.event_type,
                            progress=overall_progress,
                            frame_current=event.frame_current,
                            frame_total=event.frame_total,
                            step=event.step,
                            action=event.action
                        )
                        progress_callback(adjusted_event)

                # Report frame start
                self._report_progress(
                    progress_callback,
                    "processing_frame",
                    base_progress + (per_frame_progress * i),
                    frame_current=i+1,
                    frame_total=len(storyboard.frames)
                )

                processed_frame = await self.core.storyboard_processor.process_frame(
                    frame=frame,
                    config=config,
                    total_frames=len(storyboard.frames),
                    progress_callback=frame_progress_callback
                )
                storyboard.total_duration += processed_frame.duration
                logger.info(f"✅ Frame {i+1} completed ({processed_frame.duration:.2f}s)")

            # Step 5: Concatenate videos
            self._report_progress(progress_callback, "concatenating", 0.85)
            segment_paths = [frame.video_segment_path for frame in storyboard.frames]

            from reelforge.services.video import VideoService
            video_service = VideoService()

            final_video_path = video_service.concat_videos(
                videos=segment_paths,
                output=output_path,
                bgm_path=bgm_path,
                bgm_volume=bgm_volume,
                bgm_mode=bgm_mode
            )

            storyboard.final_video_path = final_video_path
            storyboard.completed_at = datetime.now()

            logger.success(f"🎬 Video generation completed: {final_video_path}")

            # Step 6: Create result
            self._report_progress(progress_callback, "finalizing", 1.0)

            video_path_obj = Path(final_video_path)
            file_size = video_path_obj.stat().st_size

            result = VideoGenerationResult(
                video_path=final_video_path,
                storyboard=storyboard,
                duration=storyboard.total_duration,
                file_size=file_size
            )

            logger.info(f"✅ Generated video: {final_video_path}")
            logger.info(f"   Duration: {storyboard.total_duration:.2f}s")
            logger.info(f"   Size: {file_size / (1024*1024):.2f} MB")
            logger.info(f"   Frames: {len(storyboard.frames)}")

            return result

        except Exception as e:
            logger.error(f"❌ Video generation failed: {e}")
            raise

    def _report_progress(
        self,
        callback: Optional[Callable[[ProgressEvent], None]],
        event_type: str,
        progress: float,
        **kwargs
    ):
        """
        Report progress via callback

        Args:
            callback: Progress callback function
            event_type: Type of progress event
            progress: Progress value (0.0-1.0)
            **kwargs: Additional event-specific parameters (frame_current, frame_total, etc.)
        """
        if callback:
            event = ProgressEvent(event_type=event_type, progress=progress, **kwargs)
            callback(event)
            logger.debug(f"Progress: {progress*100:.0f}% - {event_type}")
        else:
            logger.debug(f"Progress: {progress*100:.0f}% - {event_type}")

    def _parse_json(self, text: str) -> dict:
        """
        Parse JSON from text, with fallback to extract JSON from markdown code blocks

        Args:
            text: Text containing JSON

        Returns:
            Parsed JSON dict
        """
        import json
        import re

        # Try direct parsing first
        try:
            return json.loads(text)
        except json.JSONDecodeError:
            pass

        # Try to extract JSON from markdown code block
        json_pattern = r'```(?:json)?\s*([\s\S]+?)\s*```'
        match = re.search(json_pattern, text, re.DOTALL)
        if match:
            try:
                return json.loads(match.group(1))
            except json.JSONDecodeError:
                pass

        # Try to find any JSON object in the text (flexible pattern for narrations)
        json_pattern = r'\{[^{}]*"narrations"\s*:\s*\[[^\]]*\][^{}]*\}'
        match = re.search(json_pattern, text, re.DOTALL)
        if match:
            try:
                return json.loads(match.group(0))
            except json.JSONDecodeError:
                pass

        # If all fails, raise error
        raise json.JSONDecodeError("No valid JSON found", text, 0)

    async def _split_narration_script(self, script: str, config: StoryboardConfig) -> list[str]:
        """
        Split user-provided narration script into segments (programmatic splitting).

        Priority:
        1. Split by major punctuation (newline, 。！？；)
        2. If segment > max_len, split by comma (，)
        3. If still > max_len, keep original (no force split)
        4. Merge segments < min_len with next segment

        Args:
            script: Fixed narration script
            config: Storyboard configuration (for length guidelines)

        Returns:
            List of narration segments
        """
        import re

        min_len = config.min_narration_words
        max_len = config.max_narration_words

        logger.info(f"Splitting script (length: {len(script)} chars) with target: {min_len}-{max_len} chars")

        # Step 1: Split by major punctuation (newline, period, exclamation, question mark, semicolon)
        major_delimiters = r'[\n。！？；]'
        parts = re.split(f'({major_delimiters})', script)

        # Reconstruct sentences (text only, remove trailing punctuation)
        sentences = []
        for i in range(0, len(parts)-1, 2):
            text = parts[i].strip()
            if text:
                sentences.append(text)
        # Handle last part if no delimiter
        if len(parts) % 2 == 1 and parts[-1].strip():
            sentences.append(parts[-1].strip())

        logger.debug(f"After major split: {len(sentences)} sentences")

        # Step 2: For segments > max_len, try splitting by comma
        final_segments = []
        for sentence in sentences:
            sent_len = len(sentence)

            # If within range or short, keep as is
            if sent_len <= max_len:
                final_segments.append(sentence)
                continue

            # Too long: try splitting by comma
            comma_parts = re.split(r'(，)', sentence)
            sub_segments = []
            current = ""

            for part in comma_parts:
                if part == '，':
                    continue

                if not current:
                    current = part
                elif len(current + part) <= max_len:
                    current += part
                else:
                    # Current segment is ready
                    if current:
                        sub_segments.append(current.strip())
                    current = part

            # Add last segment
            if current:
                sub_segments.append(current.strip())

            # If comma splitting worked (resulted in multiple segments), use it
            if sub_segments and len(sub_segments) > 1:
                final_segments.extend(sub_segments)
            else:
                # Keep original sentence even if > max_len
                logger.debug(f"Keeping long segment ({sent_len} chars): {sentence[:30]}...")
                final_segments.append(sentence)

        # Step 3: Merge segments that are too short
        merged_segments = []
        i = 0
        while i < len(final_segments):
            segment = final_segments[i]

            # If too short and not the last one, try merging with next
            if len(segment) < min_len and i < len(final_segments) - 1:
                next_segment = final_segments[i + 1]
                merged = segment + "，" + next_segment

                # If merged result is within max_len, use it
                if len(merged) <= max_len:
                    merged_segments.append(merged)
                    i += 2  # Skip next segment
                    continue

            # Otherwise keep as is
            merged_segments.append(segment)
            i += 1

        # Clean up
        result = [s.strip() for s in merged_segments if s.strip()]

        # Log statistics
        lengths = [len(s) for s in result]
        logger.info(f"Script split into {len(result)} segments")
        if lengths:
            logger.info(f"  Min: {min(lengths)} chars, Max: {max(lengths)} chars, Avg: {sum(lengths)//len(lengths)} chars")

            in_range = sum(1 for l in lengths if min_len <= l <= max_len)
            too_short = sum(1 for l in lengths if l < min_len)
            too_long = sum(1 for l in lengths if l > max_len)

            logger.info(f"  In range ({min_len}-{max_len}): {in_range}/{len(result)} ({in_range*100//len(result)}%)")
            if too_short:
                logger.info(f"  Too short (< {min_len}): {too_short}/{len(result)} ({too_short*100//len(result)}%)")
            if too_long:
                logger.info(f"  Too long (> {max_len}): {too_long}/{len(result)} ({too_long*100//len(result)}%)")

        return result

    async def _generate_title_from_content(self, content: str) -> str:
        """
        Generate a short, attractive title from user content using LLM

        Args:
            content: User-provided content

        Returns:
            Generated title (10 characters or less)
        """
        from reelforge.prompts import build_title_generation_prompt

        # Build prompt using template
        prompt = build_title_generation_prompt(content, max_length=500)

        # Call LLM to generate title
        response = await self.core.llm(
            prompt=prompt,
            temperature=0.7,
            max_tokens=50
        )

        # Clean up response
        title = response.strip()

        # Remove quotes if present
        if title.startswith('"') and title.endswith('"'):
            title = title[1:-1]
        if title.startswith("'") and title.endswith("'"):
            title = title[1:-1]

        # Limit to 20 chars max (safety)
        if len(title) > 20:
            title = title[:20]

        return title