AI-Video/pixelle_video/services/persistence.py

# Copyright (C) 2025 AIDC-AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Persistence Service

Handles task metadata and storyboard persistence to filesystem.
"""

import json
from pathlib import Path
from typing import List, Optional, Dict, Any
from datetime import datetime
from loguru import logger

from pixelle_video.models.storyboard import Storyboard, StoryboardFrame, StoryboardConfig, ContentMetadata


class PersistenceService:
    """
    Task persistence service using filesystem (JSON)

    File structure:
        output/
        └── {task_id}/
            ├── metadata.json          # Task metadata (input, result, config)
            ├── storyboard.json        # Storyboard data (frames, prompts)
            ├── final.mp4
            └── frames/
                ├── 01_audio.mp3
                ├── 01_image.png
                └── ...

    Usage:
        persistence = PersistenceService()

        # Save metadata
        await persistence.save_task_metadata(task_id, metadata)

        # Save storyboard
        await persistence.save_storyboard(task_id, storyboard)

        # Load task
        metadata = await persistence.load_task_metadata(task_id)
        storyboard = await persistence.load_storyboard(task_id)

        # List all tasks
        tasks = await persistence.list_tasks(status="completed", limit=50)
    """

    def __init__(self, output_dir: str = "output"):
        """
        Initialize persistence service

        Args:
            output_dir: Base output directory (default: "output")
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)

        # Index file for fast listing
        self.index_file = self.output_dir / ".index.json"
        self._ensure_index()

    def get_task_dir(self, task_id: str) -> Path:
        """Get task directory path"""
        return self.output_dir / task_id

    def get_metadata_path(self, task_id: str) -> Path:
        """Get metadata.json path"""
        return self.get_task_dir(task_id) / "metadata.json"

    def get_storyboard_path(self, task_id: str) -> Path:
        """Get storyboard.json path"""
        return self.get_task_dir(task_id) / "storyboard.json"

    # ========================================================================
    # Metadata Operations
    # ========================================================================

    async def save_task_metadata(
        self,
        task_id: str,
        metadata: Dict[str, Any]
    ):
        """
        Save task metadata to filesystem

        Args:
            task_id: Task ID
            metadata: Metadata dict with structure:
                {
                    "task_id": str,
                    "created_at": str,
                    "completed_at": str (optional),
                    "status": str,
                    "input": dict,
                    "result": dict (optional),
                    "config": dict
                }
        """
        try:
            task_dir = self.get_task_dir(task_id)
            task_dir.mkdir(parents=True, exist_ok=True)

            metadata_path = self.get_metadata_path(task_id)

            # Ensure task_id is set
            metadata["task_id"] = task_id

            # Convert datetime objects to ISO format strings
            if "created_at" in metadata and isinstance(metadata["created_at"], datetime):
                metadata["created_at"] = metadata["created_at"].isoformat()
            if "completed_at" in metadata and isinstance(metadata["completed_at"], datetime):
                metadata["completed_at"] = metadata["completed_at"].isoformat()

            with open(metadata_path, "w", encoding="utf-8") as f:
                json.dump(metadata, f, indent=2, ensure_ascii=False)

            logger.debug(f"Saved task metadata: {task_id}")

            # Update index
            await self._update_index_for_task(task_id, metadata)

        except Exception as e:
            logger.error(f"Failed to save task metadata {task_id}: {e}")
            raise

    async def load_task_metadata(self, task_id: str) -> Optional[Dict[str, Any]]:
        """
        Load task metadata from filesystem

        Args:
            task_id: Task ID

        Returns:
            Metadata dict or None if not found
        """
        try:
            metadata_path = self.get_metadata_path(task_id)

            if not metadata_path.exists():
                return None

            with open(metadata_path, "r", encoding="utf-8") as f:
                metadata = json.load(f)

            return metadata

        except Exception as e:
            logger.error(f"Failed to load task metadata {task_id}: {e}")
            return None

    async def update_task_status(
        self,
        task_id: str,
        status: str,
        error: Optional[str] = None
    ):
        """
        Update task status in metadata

        Args:
            task_id: Task ID
            status: New status (pending, running, completed, failed, cancelled)
            error: Error message (optional, for failed status)
        """
        try:
            metadata = await self.load_task_metadata(task_id)
            if not metadata:
                logger.warning(f"Cannot update status: task {task_id} not found")
                return

            metadata["status"] = status

            if status in ["completed", "failed", "cancelled"]:
                metadata["completed_at"] = datetime.now().isoformat()

            if error:
                metadata["error"] = error

            await self.save_task_metadata(task_id, metadata)

        except Exception as e:
            logger.error(f"Failed to update task status {task_id}: {e}")

    # ========================================================================
    # Storyboard Operations
    # ========================================================================

    async def save_storyboard(
        self,
        task_id: str,
        storyboard: Storyboard
    ):
        """
        Save storyboard to filesystem

        Args:
            task_id: Task ID
            storyboard: Storyboard instance
        """
        try:
            task_dir = self.get_task_dir(task_id)
            task_dir.mkdir(parents=True, exist_ok=True)

            storyboard_path = self.get_storyboard_path(task_id)

            # Convert storyboard to dict
            storyboard_dict = self._storyboard_to_dict(storyboard)

            with open(storyboard_path, "w", encoding="utf-8") as f:
                json.dump(storyboard_dict, f, indent=2, ensure_ascii=False)

            logger.debug(f"Saved storyboard: {task_id}")

        except Exception as e:
            logger.error(f"Failed to save storyboard {task_id}: {e}")
            raise

    async def load_storyboard(self, task_id: str) -> Optional[Storyboard]:
        """
        Load storyboard from filesystem

        Args:
            task_id: Task ID

        Returns:
            Storyboard instance or None if not found
        """
        try:
            storyboard_path = self.get_storyboard_path(task_id)

            if not storyboard_path.exists():
                return None

            with open(storyboard_path, "r", encoding="utf-8") as f:
                storyboard_dict = json.load(f)

            # Convert dict to storyboard
            storyboard = self._dict_to_storyboard(storyboard_dict)

            return storyboard

        except Exception as e:
            logger.error(f"Failed to load storyboard {task_id}: {e}")
            return None

    # ========================================================================
    # Task Listing & Querying
    # ========================================================================

    async def list_tasks(
        self,
        status: Optional[str] = None,
        limit: int = 50,
        offset: int = 0
    ) -> List[Dict[str, Any]]:
        """
        List tasks with optional filtering

        Args:
            status: Filter by status (pending, running, completed, failed, cancelled)
            limit: Maximum number of tasks to return
            offset: Number of tasks to skip

        Returns:
            List of metadata dicts, sorted by created_at descending
        """
        try:
            tasks = []

            # Scan all task directories
            for task_dir in self.output_dir.iterdir():
                if not task_dir.is_dir():
                    continue

                metadata_path = task_dir / "metadata.json"
                if not metadata_path.exists():
                    continue

                try:
                    with open(metadata_path, "r", encoding="utf-8") as f:
                        metadata = json.load(f)

                    # Filter by status
                    if status and metadata.get("status") != status:
                        continue

                    tasks.append(metadata)

                except Exception as e:
                    logger.warning(f"Failed to load metadata from {task_dir}: {e}")
                    continue

            # Sort by created_at descending
            tasks.sort(key=lambda t: t.get("created_at", ""), reverse=True)

            # Apply pagination
            return tasks[offset:offset + limit]

        except Exception as e:
            logger.error(f"Failed to list tasks: {e}")
            return []

    async def task_exists(self, task_id: str) -> bool:
        """Check if task exists"""
        return self.get_task_dir(task_id).exists()

    async def delete_task(self, task_id: str):
        """
        Delete task directory and all files

        Args:
            task_id: Task ID
        """
        try:
            task_dir = self.get_task_dir(task_id)

            if task_dir.exists():
                import shutil
                shutil.rmtree(task_dir)
                logger.info(f"Deleted task: {task_id}")

        except Exception as e:
            logger.error(f"Failed to delete task {task_id}: {e}")
            raise

    # ========================================================================
    # Serialization Helpers
    # ========================================================================

    def _storyboard_to_dict(self, storyboard: Storyboard) -> Dict[str, Any]:
        """Convert Storyboard to dict for JSON serialization"""
        return {
            "title": storyboard.title,
            "config": self._config_to_dict(storyboard.config),
            "frames": [self._frame_to_dict(frame) for frame in storyboard.frames],
            "content_metadata": self._content_metadata_to_dict(storyboard.content_metadata) if storyboard.content_metadata else None,
            "final_video_path": storyboard.final_video_path,
            "total_duration": storyboard.total_duration,
            "created_at": storyboard.created_at.isoformat() if storyboard.created_at else None,
            "completed_at": storyboard.completed_at.isoformat() if storyboard.completed_at else None,
        }

    def _dict_to_storyboard(self, data: Dict[str, Any]) -> Storyboard:
        """Convert dict to Storyboard instance"""
        return Storyboard(
            title=data["title"],
            config=self._dict_to_config(data["config"]),
            frames=[self._dict_to_frame(frame_data) for frame_data in data["frames"]],
            content_metadata=self._dict_to_content_metadata(data["content_metadata"]) if data.get("content_metadata") else None,
            final_video_path=data.get("final_video_path"),
            total_duration=data.get("total_duration", 0.0),
            created_at=datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None,
            completed_at=datetime.fromisoformat(data["completed_at"]) if data.get("completed_at") else None,
        )

    def _config_to_dict(self, config: StoryboardConfig) -> Dict[str, Any]:
        """Convert StoryboardConfig to dict"""
        return {
            "task_id": config.task_id,
            "n_storyboard": config.n_storyboard,
            "min_narration_words": config.min_narration_words,
            "max_narration_words": config.max_narration_words,
            "min_image_prompt_words": config.min_image_prompt_words,
            "max_image_prompt_words": config.max_image_prompt_words,
            "video_fps": config.video_fps,
            "tts_inference_mode": config.tts_inference_mode,
            "voice_id": config.voice_id,
            "tts_workflow": config.tts_workflow,
            "tts_speed": config.tts_speed,
            "ref_audio": config.ref_audio,
            "media_width": config.media_width,
            "media_height": config.media_height,
            "media_workflow": config.media_workflow,
            "frame_template": config.frame_template,
            "template_params": config.template_params,
        }

    def _dict_to_config(self, data: Dict[str, Any]) -> StoryboardConfig:
        """Convert dict to StoryboardConfig"""
        return StoryboardConfig(
            task_id=data.get("task_id"),
            n_storyboard=data.get("n_storyboard", 5),
            min_narration_words=data.get("min_narration_words", 5),
            max_narration_words=data.get("max_narration_words", 20),
            min_image_prompt_words=data.get("min_image_prompt_words", 30),
            max_image_prompt_words=data.get("max_image_prompt_words", 60),
            video_fps=data.get("video_fps", 30),
            tts_inference_mode=data.get("tts_inference_mode", "local"),
            voice_id=data.get("voice_id"),
            tts_workflow=data.get("tts_workflow"),
            tts_speed=data.get("tts_speed"),
            ref_audio=data.get("ref_audio"),
            media_width=data.get("media_width", data.get("image_width", 1024)),  # Backward compatibility
            media_height=data.get("media_height", data.get("image_height", 1024)),  # Backward compatibility
            media_workflow=data.get("media_workflow", data.get("image_workflow")),  # Backward compatibility
            frame_template=data.get("frame_template", "1080x1920/default.html"),
            template_params=data.get("template_params"),
        )

    def _frame_to_dict(self, frame: StoryboardFrame) -> Dict[str, Any]:
        """Convert StoryboardFrame to dict"""
        return {
            "index": frame.index,
            "narration": frame.narration,
            "image_prompt": frame.image_prompt,
            "audio_path": frame.audio_path,
            "media_type": frame.media_type,
            "image_path": frame.image_path,
            "video_path": frame.video_path,
            "composed_image_path": frame.composed_image_path,
            "video_segment_path": frame.video_segment_path,
            "duration": frame.duration,
            "created_at": frame.created_at.isoformat() if frame.created_at else None,
        }

    def _dict_to_frame(self, data: Dict[str, Any]) -> StoryboardFrame:
        """Convert dict to StoryboardFrame"""
        return StoryboardFrame(
            index=data["index"],
            narration=data["narration"],
            image_prompt=data["image_prompt"],
            audio_path=data.get("audio_path"),
            media_type=data.get("media_type"),
            image_path=data.get("image_path"),
            video_path=data.get("video_path"),
            composed_image_path=data.get("composed_image_path"),
            video_segment_path=data.get("video_segment_path"),
            duration=data.get("duration", 0.0),
            created_at=datetime.fromisoformat(data["created_at"]) if data.get("created_at") else None,
        )

    def _content_metadata_to_dict(self, metadata: ContentMetadata) -> Dict[str, Any]:
        """Convert ContentMetadata to dict"""
        return {
            "title": metadata.title,
            "author": metadata.author,
            "subtitle": metadata.subtitle,
            "genre": metadata.genre,
            "summary": metadata.summary,
            "publication_year": metadata.publication_year,
            "cover_url": metadata.cover_url,
        }

    def _dict_to_content_metadata(self, data: Dict[str, Any]) -> ContentMetadata:
        """Convert dict to ContentMetadata"""
        return ContentMetadata(
            title=data["title"],
            author=data.get("author"),
            subtitle=data.get("subtitle"),
            genre=data.get("genre"),
            summary=data.get("summary"),
            publication_year=data.get("publication_year"),
            cover_url=data.get("cover_url"),
        )

    # ========================================================================
    # Index Management (for fast listing)
    # ========================================================================

    def _ensure_index(self):
        """Ensure index file exists, create if not"""
        if not self.index_file.exists():
            self._save_index({"version": "1.0", "tasks": []})

    def _load_index(self) -> Dict[str, Any]:
        """Load index from file"""
        try:
            with open(self.index_file, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception as e:
            logger.error(f"Failed to load index: {e}")
            return {"version": "1.0", "tasks": []}

    def _save_index(self, index_data: Dict[str, Any]):
        """Save index to file"""
        try:
            index_data["last_updated"] = datetime.now().isoformat()
            with open(self.index_file, "w", encoding="utf-8") as f:
                json.dump(index_data, f, ensure_ascii=False, indent=2)
        except Exception as e:
            logger.error(f"Failed to save index: {e}")

    async def _update_index_for_task(self, task_id: str, metadata: Dict[str, Any]):
        """Update index entry for a specific task"""
        index = self._load_index()

        # Try to get title from multiple sources
        title = metadata.get("input", {}).get("title")
        if not title or title == "":
            # Try to get title from storyboard if input title is empty
            storyboard = await self.load_storyboard(task_id)
            if storyboard and storyboard.title:
                title = storyboard.title
            else:
                # Fall back to using input text preview
                input_text = metadata.get("input", {}).get("text", "")
                if input_text:
                    # Use first 30 characters of input text as title
                    title = input_text[:30] + ("..." if len(input_text) > 30 else "")
                else:
                    title = "Untitled"

        # Extract key info for index
        index_entry = {
            "task_id": task_id,
            "created_at": metadata.get("created_at"),
            "completed_at": metadata.get("completed_at"),
            "status": metadata.get("status", "unknown"),
            "title": title,
            "duration": metadata.get("result", {}).get("duration", 0),
            "n_frames": metadata.get("result", {}).get("n_frames", 0),
            "file_size": metadata.get("result", {}).get("file_size", 0),
            "video_path": metadata.get("result", {}).get("video_path"),
        }

        # Update or append
        tasks = index.get("tasks", [])
        existing_idx = next((i for i, t in enumerate(tasks) if t["task_id"] == task_id), None)

        if existing_idx is not None:
            tasks[existing_idx] = index_entry
        else:
            tasks.append(index_entry)

        index["tasks"] = tasks
        self._save_index(index)

    async def rebuild_index(self):
        """Rebuild index by scanning all task directories"""
        logger.info("Rebuilding task index...")
        index = {"version": "1.0", "tasks": []}

        # Scan all directories
        for task_dir in self.output_dir.iterdir():
            if not task_dir.is_dir() or task_dir.name.startswith("."):
                continue

            task_id = task_dir.name
            metadata = await self.load_task_metadata(task_id)

            if metadata:
                # Try to get title from multiple sources
                title = metadata.get("input", {}).get("title")
                if not title or title == "":
                    # Try to get title from storyboard if input title is empty
                    storyboard = await self.load_storyboard(task_id)
                    if storyboard and storyboard.title:
                        title = storyboard.title
                    else:
                        # Fall back to using input text preview
                        input_text = metadata.get("input", {}).get("text", "")
                        if input_text:
                            # Use first 30 characters of input text as title
                            title = input_text[:30] + ("..." if len(input_text) > 30 else "")
                        else:
                            title = "Untitled"

                # Add to index
                index["tasks"].append({
                    "task_id": task_id,
                    "created_at": metadata.get("created_at"),
                    "completed_at": metadata.get("completed_at"),
                    "status": metadata.get("status", "unknown"),
                    "title": title,
                    "duration": metadata.get("result", {}).get("duration", 0),
                    "n_frames": metadata.get("result", {}).get("n_frames", 0),
                    "file_size": metadata.get("result", {}).get("file_size", 0),
                    "video_path": metadata.get("result", {}).get("video_path"),
                })

        self._save_index(index)
        logger.info(f"Index rebuilt: {len(index['tasks'])} tasks")

    # ========================================================================
    # Paginated Listing
    # ========================================================================

    async def list_tasks_paginated(
        self,
        page: int = 1,
        page_size: int = 20,
        status: Optional[str] = None,
        sort_by: str = "created_at",
        sort_order: str = "desc"
    ) -> Dict[str, Any]:
        """
        List tasks with pagination

        Args:
            page: Page number (1-indexed)
            page_size: Items per page
            status: Filter by status (optional)
            sort_by: Sort field (created_at, completed_at, title, duration)
            sort_order: Sort order (asc, desc)

        Returns:
            {
                "tasks": [...],          # List of task summaries
                "total": 100,            # Total matching tasks
                "page": 1,               # Current page
                "page_size": 20,         # Items per page
                "total_pages": 5         # Total pages
            }
        """
        index = self._load_index()
        tasks = index.get("tasks", [])

        # Filter by status
        if status:
            tasks = [t for t in tasks if t.get("status") == status]

        # Sort
        reverse = (sort_order == "desc")
        if sort_by in ["created_at", "completed_at"]:
            tasks.sort(
                key=lambda t: datetime.fromisoformat(t.get(sort_by, "1970-01-01T00:00:00")),
                reverse=reverse
            )
        elif sort_by in ["title", "duration", "n_frames"]:
            tasks.sort(key=lambda t: t.get(sort_by, ""), reverse=reverse)

        # Paginate
        total = len(tasks)
        total_pages = (total + page_size - 1) // page_size
        start_idx = (page - 1) * page_size
        end_idx = start_idx + page_size
        page_tasks = tasks[start_idx:end_idx]

        return {
            "tasks": page_tasks,
            "total": total,
            "page": page,
            "page_size": page_size,
            "total_pages": total_pages,
        }

    # ========================================================================
    # Statistics
    # ========================================================================

    async def get_statistics(self) -> Dict[str, Any]:
        """
        Get statistics about all tasks

        Returns:
            {
                "total_tasks": 100,
                "completed": 95,
                "failed": 5,
                "total_duration": 3600.5,  # seconds
                "total_size": 1024000000,  # bytes
            }
        """
        index = self._load_index()
        tasks = index.get("tasks", [])

        stats = {
            "total_tasks": len(tasks),
            "completed": len([t for t in tasks if t.get("status") == "completed"]),
            "failed": len([t for t in tasks if t.get("status") == "failed"]),
            "total_duration": sum(t.get("duration", 0) for t in tasks),
            "total_size": sum(t.get("file_size", 0) for t in tasks),
        }

        return stats

    # ========================================================================
    # Delete Task
    # ========================================================================

    async def delete_task(self, task_id: str) -> bool:
        """
        Delete a task and all its files

        Args:
            task_id: Task ID to delete

        Returns:
            True if successful, False otherwise
        """
        try:
            import shutil

            task_dir = self.get_task_dir(task_id)
            if task_dir.exists():
                shutil.rmtree(task_dir)
                logger.info(f"Deleted task directory: {task_dir}")

            # Update index
            index = self._load_index()
            tasks = index.get("tasks", [])
            tasks = [t for t in tasks if t["task_id"] != task_id]
            index["tasks"] = tasks
            self._save_index(index)

            return True
        except Exception as e:
            logger.error(f"Failed to delete task {task_id}: {e}")
            return False