to-live-photo/Sources/LivePhotoCore/AIEnhancer/TiledImageProcessor.swift

//
//  WholeImageProcessor.swift
//  LivePhotoCore
//
//  Processes images for Real-ESRGAN model with fixed 512x512 input.
//  Handles scaling, padding, and cropping to preserve original aspect ratio.
//

import CoreGraphics
import CoreVideo
import Foundation
import os

/// Processes images for the Real-ESRGAN model
/// The model requires fixed 512x512 input and outputs 2048x2048
struct WholeImageProcessor {
    private let logger = Logger(subsystem: "LivePhotoCore", category: "WholeImageProcessor")

    /// Process an image through the AI model
    /// - Parameters:
    ///   - inputImage: Input CGImage to enhance
    ///   - processor: RealESRGAN processor for inference
    ///   - progress: Optional progress callback
    /// - Returns: Enhanced image with original aspect ratio preserved
    func processImage(
        _ inputImage: CGImage,
        processor: RealESRGANProcessor,
        progress: AIEnhanceProgress?
    ) async throws -> CGImage {
        let originalWidth = inputImage.width
        let originalHeight = inputImage.height

        logger.info("Processing \(originalWidth)x\(originalHeight) image")
        progress?(0.1)

        // Step 1: Scale and pad to 512x512
        let (paddedImage, scaleFactor, paddingInfo) = try prepareInputImage(inputImage)
        progress?(0.2)

        // Step 2: Convert to CVPixelBuffer
        let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(paddedImage)
        progress?(0.3)

        // Step 3: Run inference
        let outputData = try await processor.processImage(pixelBuffer)
        progress?(0.8)

        // Step 4: Convert output to CGImage
        let outputImage = try createCGImage(
            from: outputData,
            width: RealESRGANProcessor.outputSize,
            height: RealESRGANProcessor.outputSize
        )
        progress?(0.9)

        // Step 5: Crop padding and scale to target size
        let finalImage = try extractAndScaleOutput(
            outputImage,
            originalWidth: originalWidth,
            originalHeight: originalHeight,
            scaleFactor: scaleFactor,
            paddingInfo: paddingInfo
        )
        progress?(1.0)

        logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)")
        return finalImage
    }

    // MARK: - Private Helpers

    /// Padding information for later extraction
    private struct PaddingInfo {
        let paddedX: Int      // X offset of original content in padded image
        let paddedY: Int      // Y offset of original content in padded image
        let scaledWidth: Int  // Width of original content after scaling
        let scaledHeight: Int // Height of original content after scaling
    }

    /// Prepare input image: scale to fit 1280x1280 while preserving aspect ratio, then pad
    private func prepareInputImage(_ image: CGImage) throws -> (CGImage, CGFloat, PaddingInfo) {
        let inputSize = RealESRGANProcessor.inputSize
        let originalWidth = CGFloat(image.width)
        let originalHeight = CGFloat(image.height)

        // Calculate scale to fit within inputSize x inputSize
        let scale = min(
            CGFloat(inputSize) / originalWidth,
            CGFloat(inputSize) / originalHeight
        )

        let scaledWidth = Int(originalWidth * scale)
        let scaledHeight = Int(originalHeight * scale)

        // Calculate padding to center the image
        let paddingX = (inputSize - scaledWidth) / 2
        let paddingY = (inputSize - scaledHeight) / 2

        logger.info("Scaling \(Int(originalWidth))x\(Int(originalHeight)) -> \(scaledWidth)x\(scaledHeight), padding: (\(paddingX), \(paddingY))")

        // Create padded context
        let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB()
        guard let context = CGContext(
            data: nil,
            width: inputSize,
            height: inputSize,
            bitsPerComponent: 8,
            bytesPerRow: inputSize * 4,
            space: colorSpace,
            bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
        ) else {
            throw AIEnhanceError.inputImageInvalid
        }

        // Fill with black (or neutral color)
        context.setFillColor(gray: 0.0, alpha: 1.0)
        context.fill(CGRect(x: 0, y: 0, width: inputSize, height: inputSize))

        // Draw scaled image centered
        // Note: CGContext has origin at bottom-left, so we need to flip Y coordinate
        let drawRect = CGRect(x: paddingX, y: paddingY, width: scaledWidth, height: scaledHeight)
        context.draw(image, in: drawRect)

        guard let paddedImage = context.makeImage() else {
            throw AIEnhanceError.inputImageInvalid
        }

        let paddingInfo = PaddingInfo(
            paddedX: paddingX,
            paddedY: paddingY,
            scaledWidth: scaledWidth,
            scaledHeight: scaledHeight
        )

        return (paddedImage, scale, paddingInfo)
    }

    /// Extract the enhanced content area and scale to final size
    private func extractAndScaleOutput(
        _ outputImage: CGImage,
        originalWidth: Int,
        originalHeight: Int,
        scaleFactor: CGFloat,
        paddingInfo: PaddingInfo
    ) throws -> CGImage {
        let modelScale = RealESRGANProcessor.scaleFactor

        // Calculate crop region in output image (4x the padding info)
        let cropX = paddingInfo.paddedX * modelScale
        let cropY = paddingInfo.paddedY * modelScale
        let cropWidth = paddingInfo.scaledWidth * modelScale
        let cropHeight = paddingInfo.scaledHeight * modelScale

        logger.info("Cropping output at (\(cropX), \(cropY)) size \(cropWidth)x\(cropHeight)")

        // Crop the content area
        let cropRect = CGRect(x: cropX, y: cropY, width: cropWidth, height: cropHeight)
        guard let croppedImage = outputImage.cropping(to: cropRect) else {
            throw AIEnhanceError.inferenceError("Failed to crop output image")
        }

        // Calculate final target size (4x original, capped at reasonable limit while preserving aspect ratio)
        let maxDimension = 4320  // Cap at ~4K
        let idealWidth = originalWidth * modelScale
        let idealHeight = originalHeight * modelScale

        let targetWidth: Int
        let targetHeight: Int

        if idealWidth <= maxDimension && idealHeight <= maxDimension {
            // Both dimensions fit within limit
            targetWidth = idealWidth
            targetHeight = idealHeight
        } else {
            // Scale down to fit within maxDimension while preserving aspect ratio
            let scale = min(Double(maxDimension) / Double(idealWidth), Double(maxDimension) / Double(idealHeight))
            targetWidth = Int(Double(idealWidth) * scale)
            targetHeight = Int(Double(idealHeight) * scale)
        }

        // If cropped image is already the right size, return it
        if croppedImage.width == targetWidth && croppedImage.height == targetHeight {
            return croppedImage
        }

        // Scale to target size
        let colorSpace = croppedImage.colorSpace ?? CGColorSpaceCreateDeviceRGB()
        guard let context = CGContext(
            data: nil,
            width: targetWidth,
            height: targetHeight,
            bitsPerComponent: 8,
            bytesPerRow: targetWidth * 4,
            space: colorSpace,
            bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
        ) else {
            throw AIEnhanceError.inferenceError("Failed to create output context")
        }

        context.interpolationQuality = .high
        context.draw(croppedImage, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight))

        guard let finalImage = context.makeImage() else {
            throw AIEnhanceError.inferenceError("Failed to create final image")
        }

        logger.info("Final image size: \(finalImage.width)x\(finalImage.height)")
        return finalImage
    }

    /// Create CGImage from RGBA pixel data
    private func createCGImage(from pixels: [UInt8], width: Int, height: Int) throws -> CGImage {
        let colorSpace = CGColorSpaceCreateDeviceRGB()
        let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue)

        guard
            let provider = CGDataProvider(data: Data(pixels) as CFData),
            let image = CGImage(
                width: width,
                height: height,
                bitsPerComponent: 8,
                bitsPerPixel: 32,
                bytesPerRow: width * 4,
                space: colorSpace,
                bitmapInfo: bitmapInfo,
                provider: provider,
                decode: nil,
                shouldInterpolate: true,
                intent: .defaultIntent
            )
        else {
            throw AIEnhanceError.inferenceError("Failed to create output image")
        }

        return image
    }
}

// Keep the old name as a typealias for compatibility
typealias TiledImageProcessor = WholeImageProcessor