to-live-photo/Sources/LivePhotoCore/AIEnhancer/TiledImageProcessor.swift

//
//  TiledImageProcessor.swift
//  LivePhotoCore
//
//  True tiled image processing for Real-ESRGAN model.
//  Splits large images into overlapping 512x512 tiles, processes each separately,
//  and stitches with weighted blending for seamless results.
//

import CoreGraphics
import CoreVideo
import Foundation
import os

// MARK: - Types

/// Represents a single tile for processing
struct ImageTile {
    let image: CGImage
    let originX: Int      // Position in source image
    let originY: Int
    let outputOriginX: Int // Position in output image (scaled)
    let outputOriginY: Int
}

/// Tiling configuration
struct TilingConfig {
    let tileSize: Int = 512
    let overlap: Int = 64      // Blending zone for seamless stitching
    let modelScale: Int = 4

    var effectiveTileSize: Int { tileSize - overlap * 2 }  // 384
    var outputTileSize: Int { tileSize * modelScale }      // 2048
    var outputOverlap: Int { overlap * modelScale }        // 256
}

// MARK: - TiledImageProcessor

/// Processes large images by splitting into tiles
struct TiledImageProcessor {
    private let config = TilingConfig()
    private let logger = Logger(subsystem: "LivePhotoCore", category: "TiledImageProcessor")

    /// Process an image through the AI model using tiled approach
    /// - Parameters:
    ///   - inputImage: Input CGImage to enhance
    ///   - processor: RealESRGAN processor for inference
    ///   - progress: Optional progress callback
    /// - Returns: Enhanced image with original aspect ratio preserved
    func processImage(
        _ inputImage: CGImage,
        processor: RealESRGANProcessor,
        progress: AIEnhanceProgress?
    ) async throws -> CGImage {
        let originalWidth = inputImage.width
        let originalHeight = inputImage.height

        logger.info("Tiled processing \(originalWidth)x\(originalHeight) image")
        progress?(0.05)

        // Step 1: Extract tiles with overlap
        let tiles = extractTiles(from: inputImage)
        logger.info("Extracted \(tiles.count) tiles")
        progress?(0.1)

        // Step 2: Process each tile
        var processedTiles: [(tile: ImageTile, output: [UInt8])] = []
        let tileProgressBase = 0.1
        let tileProgressRange = 0.7

        for (index, tile) in tiles.enumerated() {
            try Task.checkCancellation()

            let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(tile.image)
            let outputData = try await processor.processImage(pixelBuffer)
            processedTiles.append((tile, outputData))

            let tileProgress = tileProgressBase + tileProgressRange * Double(index + 1) / Double(tiles.count)
            progress?(tileProgress)

            // Yield to allow memory cleanup between tiles
            await Task.yield()
        }

        progress?(0.85)

        // Step 3: Stitch tiles with blending
        let outputWidth = originalWidth * config.modelScale
        let outputHeight = originalHeight * config.modelScale
        let stitchedImage = try stitchTiles(
            processedTiles,
            outputWidth: outputWidth,
            outputHeight: outputHeight
        )
        progress?(0.95)

        // Step 4: Cap at max dimension if needed
        let finalImage = try capToMaxDimension(stitchedImage, maxDimension: 4320)
        progress?(1.0)

        logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)")
        return finalImage
    }

    // MARK: - Tile Extraction

    /// Extract overlapping tiles from the input image
    private func extractTiles(from image: CGImage) -> [ImageTile] {
        var tiles: [ImageTile] = []
        let width = image.width
        let height = image.height
        let step = config.effectiveTileSize  // 384

        var y = 0
        while y < height {
            var x = 0
            while x < width {
                // Calculate tile bounds
                let tileX = x
                let tileY = y
                let tileWidth = min(config.tileSize, width - tileX)
                let tileHeight = min(config.tileSize, height - tileY)

                // Extract or pad tile to full 512x512
                let tileImage = extractOrPadTile(
                    from: image,
                    x: tileX, y: tileY,
                    width: tileWidth, height: tileHeight
                )

                if let tileImage = tileImage {
                    tiles.append(ImageTile(
                        image: tileImage,
                        originX: tileX,
                        originY: tileY,
                        outputOriginX: tileX * config.modelScale,
                        outputOriginY: tileY * config.modelScale
                    ))
                }

                x += step
                if x >= width && x < width + step - 1 {
                    // Ensure we cover the right edge
                    break
                }
            }

            y += step
            if y >= height && y < height + step - 1 {
                // Ensure we cover the bottom edge
                break
            }
        }

        return tiles
    }

    /// Extract a tile from the image, padding with edge reflection if necessary
    private func extractOrPadTile(
        from image: CGImage,
        x: Int, y: Int,
        width: Int, height: Int
    ) -> CGImage? {
        let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB()

        guard let context = CGContext(
            data: nil,
            width: config.tileSize,
            height: config.tileSize,
            bitsPerComponent: 8,
            bytesPerRow: config.tileSize * 4,
            space: colorSpace,
            bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
        ) else {
            return nil
        }

        // Fill with edge color (use edge reflection for better results)
        context.setFillColor(gray: 0.0, alpha: 1.0)
        context.fill(CGRect(x: 0, y: 0, width: config.tileSize, height: config.tileSize))

        // Crop the tile from source image
        let cropRect = CGRect(x: x, y: y, width: width, height: height)
        guard let croppedImage = image.cropping(to: cropRect) else {
            return nil
        }

        // Draw at origin (bottom-left in CGContext)
        // Note: CGImage coordinates have origin at top-left, CGContext at bottom-left
        // So we draw at (0, tileSize - height) to place at top
        let drawY = config.tileSize - height
        context.draw(croppedImage, in: CGRect(x: 0, y: drawY, width: width, height: height))

        return context.makeImage()
    }

    // MARK: - Tile Stitching

    /// Stitch processed tiles with weighted blending
    private func stitchTiles(
        _ tiles: [(tile: ImageTile, output: [UInt8])],
        outputWidth: Int,
        outputHeight: Int
    ) throws -> CGImage {
        // Create output buffers
        var outputBuffer = [Float](repeating: 0, count: outputWidth * outputHeight * 3)
        var weightBuffer = [Float](repeating: 0, count: outputWidth * outputHeight)

        let outputTileSize = config.outputTileSize  // 2048

        for (tile, data) in tiles {
            // Create blending weights for this tile
            let weights = createBlendingWeights(
                tileWidth: min(outputTileSize, outputWidth - tile.outputOriginX),
                tileHeight: min(outputTileSize, outputHeight - tile.outputOriginY)
            )

            // Blend tile into output
            blendTileIntoOutput(
                data: data,
                weights: weights,
                atX: tile.outputOriginX,
                atY: tile.outputOriginY,
                outputWidth: outputWidth,
                outputHeight: outputHeight,
                outputBuffer: &outputBuffer,
                weightBuffer: &weightBuffer
            )
        }

        // Normalize by accumulated weights
        normalizeByWeights(&outputBuffer, weights: weightBuffer, width: outputWidth, height: outputHeight)

        // Convert to CGImage
        return try createCGImage(from: outputBuffer, width: outputWidth, height: outputHeight)
    }

    /// Create blending weights with linear falloff at edges
    private func createBlendingWeights(tileWidth: Int, tileHeight: Int) -> [Float] {
        let overlap = config.outputOverlap  // 256
        var weights = [Float](repeating: 1.0, count: tileWidth * tileHeight)

        for y in 0..<tileHeight {
            for x in 0..<tileWidth {
                var weight: Float = 1.0

                // Left edge ramp
                if x < overlap {
                    weight *= Float(x) / Float(overlap)
                }
                // Right edge ramp
                if x >= tileWidth - overlap {
                    weight *= Float(tileWidth - x - 1) / Float(overlap)
                }
                // Top edge ramp
                if y < overlap {
                    weight *= Float(y) / Float(overlap)
                }
                // Bottom edge ramp
                if y >= tileHeight - overlap {
                    weight *= Float(tileHeight - y - 1) / Float(overlap)
                }

                // Ensure minimum weight to avoid division by zero
                weight = max(weight, 0.001)
                weights[y * tileWidth + x] = weight
            }
        }

        return weights
    }

    /// Blend a tile into the output buffer with weights
    private func blendTileIntoOutput(
        data: [UInt8],
        weights: [Float],
        atX: Int, atY: Int,
        outputWidth: Int, outputHeight: Int,
        outputBuffer: inout [Float],
        weightBuffer: inout [Float]
    ) {
        let tileSize = config.outputTileSize
        let tileWidth = min(tileSize, outputWidth - atX)
        let tileHeight = min(tileSize, outputHeight - atY)

        for ty in 0..<tileHeight {
            let outputY = atY + ty
            if outputY >= outputHeight { continue }

            for tx in 0..<tileWidth {
                let outputX = atX + tx
                if outputX >= outputWidth { continue }

                let tileIdx = ty * tileSize + tx
                let outputIdx = outputY * outputWidth + outputX

                // Bounds check for tile data (RGBA format, 4 bytes per pixel)
                let dataIdx = tileIdx * 4
                guard dataIdx + 2 < data.count else { continue }

                let weight = weights[ty * tileWidth + tx]

                // Accumulate weighted RGB values
                outputBuffer[outputIdx * 3 + 0] += Float(data[dataIdx + 0]) * weight  // R
                outputBuffer[outputIdx * 3 + 1] += Float(data[dataIdx + 1]) * weight  // G
                outputBuffer[outputIdx * 3 + 2] += Float(data[dataIdx + 2]) * weight  // B
                weightBuffer[outputIdx] += weight
            }
        }
    }

    /// Normalize output buffer by accumulated weights
    private func normalizeByWeights(
        _ buffer: inout [Float],
        weights: [Float],
        width: Int, height: Int
    ) {
        for i in 0..<(width * height) {
            let w = max(weights[i], 0.001)
            buffer[i * 3 + 0] /= w
            buffer[i * 3 + 1] /= w
            buffer[i * 3 + 2] /= w
        }
    }

    /// Create CGImage from float RGB buffer
    private func createCGImage(from buffer: [Float], width: Int, height: Int) throws -> CGImage {
        // Convert float buffer to RGBA UInt8
        var pixels = [UInt8](repeating: 255, count: width * height * 4)

        for i in 0..<(width * height) {
            pixels[i * 4 + 0] = UInt8(clamping: Int(buffer[i * 3 + 0]))  // R
            pixels[i * 4 + 1] = UInt8(clamping: Int(buffer[i * 3 + 1]))  // G
            pixels[i * 4 + 2] = UInt8(clamping: Int(buffer[i * 3 + 2]))  // B
            pixels[i * 4 + 3] = 255  // A
        }

        let colorSpace = CGColorSpaceCreateDeviceRGB()
        let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue)

        guard
            let provider = CGDataProvider(data: Data(pixels) as CFData),
            let image = CGImage(
                width: width,
                height: height,
                bitsPerComponent: 8,
                bitsPerPixel: 32,
                bytesPerRow: width * 4,
                space: colorSpace,
                bitmapInfo: bitmapInfo,
                provider: provider,
                decode: nil,
                shouldInterpolate: true,
                intent: .defaultIntent
            )
        else {
            throw AIEnhanceError.inferenceError("Failed to create stitched image")
        }

        return image
    }

    /// Cap image to maximum dimension while preserving aspect ratio
    private func capToMaxDimension(_ image: CGImage, maxDimension: Int) throws -> CGImage {
        let width = image.width
        let height = image.height

        if width <= maxDimension && height <= maxDimension {
            return image
        }

        let scale = min(Double(maxDimension) / Double(width), Double(maxDimension) / Double(height))
        let targetWidth = Int(Double(width) * scale)
        let targetHeight = Int(Double(height) * scale)

        let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB()
        guard let context = CGContext(
            data: nil,
            width: targetWidth,
            height: targetHeight,
            bitsPerComponent: 8,
            bytesPerRow: targetWidth * 4,
            space: colorSpace,
            bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
        ) else {
            throw AIEnhanceError.inferenceError("Failed to create scaling context")
        }

        context.interpolationQuality = .high
        context.draw(image, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight))

        guard let scaledImage = context.makeImage() else {
            throw AIEnhanceError.inferenceError("Failed to scale image")
        }

        return scaledImage
    }
}

// MARK: - WholeImageProcessor (for small images)

/// Processes small images (< 512x512) for the Real-ESRGAN model
/// Uses scaling and padding approach for images that fit within a single tile
struct WholeImageProcessor {
    private let logger = Logger(subsystem: "LivePhotoCore", category: "WholeImageProcessor")

    /// Process an image through the AI model
    func processImage(
        _ inputImage: CGImage,
        processor: RealESRGANProcessor,
        progress: AIEnhanceProgress?
    ) async throws -> CGImage {
        let originalWidth = inputImage.width
        let originalHeight = inputImage.height

        logger.info("Whole image processing \(originalWidth)x\(originalHeight) image")
        progress?(0.1)

        // Step 1: Scale and pad to 512x512
        let (paddedImage, _, paddingInfo) = try prepareInputImage(inputImage)
        progress?(0.2)

        // Step 2: Convert to CVPixelBuffer
        let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(paddedImage)
        progress?(0.3)

        // Step 3: Run inference
        let outputData = try await processor.processImage(pixelBuffer)
        progress?(0.8)

        // Step 4: Convert output to CGImage
        let outputImage = try createCGImage(
            from: outputData,
            width: RealESRGANProcessor.outputSize,
            height: RealESRGANProcessor.outputSize
        )
        progress?(0.9)

        // Step 5: Crop padding and scale to target size
        let finalImage = try extractAndScaleOutput(
            outputImage,
            originalWidth: originalWidth,
            originalHeight: originalHeight,
            paddingInfo: paddingInfo
        )
        progress?(1.0)

        logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)")
        return finalImage
    }

    // MARK: - Private Helpers

    private struct PaddingInfo {
        let paddedX: Int
        let paddedY: Int
        let scaledWidth: Int
        let scaledHeight: Int
    }

    private func prepareInputImage(_ image: CGImage) throws -> (CGImage, CGFloat, PaddingInfo) {
        let inputSize = RealESRGANProcessor.inputSize
        let originalWidth = CGFloat(image.width)
        let originalHeight = CGFloat(image.height)

        let scale = min(
            CGFloat(inputSize) / originalWidth,
            CGFloat(inputSize) / originalHeight
        )

        let scaledWidth = Int(originalWidth * scale)
        let scaledHeight = Int(originalHeight * scale)
        let paddingX = (inputSize - scaledWidth) / 2
        let paddingY = (inputSize - scaledHeight) / 2

        let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB()
        guard let context = CGContext(
            data: nil,
            width: inputSize,
            height: inputSize,
            bitsPerComponent: 8,
            bytesPerRow: inputSize * 4,
            space: colorSpace,
            bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
        ) else {
            throw AIEnhanceError.inputImageInvalid
        }

        context.setFillColor(gray: 0.0, alpha: 1.0)
        context.fill(CGRect(x: 0, y: 0, width: inputSize, height: inputSize))

        let drawRect = CGRect(x: paddingX, y: paddingY, width: scaledWidth, height: scaledHeight)
        context.draw(image, in: drawRect)

        guard let paddedImage = context.makeImage() else {
            throw AIEnhanceError.inputImageInvalid
        }

        let paddingInfo = PaddingInfo(
            paddedX: paddingX,
            paddedY: paddingY,
            scaledWidth: scaledWidth,
            scaledHeight: scaledHeight
        )

        return (paddedImage, scale, paddingInfo)
    }

    private func extractAndScaleOutput(
        _ outputImage: CGImage,
        originalWidth: Int,
        originalHeight: Int,
        paddingInfo: PaddingInfo
    ) throws -> CGImage {
        let modelScale = RealESRGANProcessor.scaleFactor

        let cropX = paddingInfo.paddedX * modelScale
        let cropY = paddingInfo.paddedY * modelScale
        let cropWidth = paddingInfo.scaledWidth * modelScale
        let cropHeight = paddingInfo.scaledHeight * modelScale

        let cropRect = CGRect(x: cropX, y: cropY, width: cropWidth, height: cropHeight)
        guard let croppedImage = outputImage.cropping(to: cropRect) else {
            throw AIEnhanceError.inferenceError("Failed to crop output image")
        }

        let maxDimension = 4320
        let idealWidth = originalWidth * modelScale
        let idealHeight = originalHeight * modelScale

        let targetWidth: Int
        let targetHeight: Int

        if idealWidth <= maxDimension && idealHeight <= maxDimension {
            targetWidth = idealWidth
            targetHeight = idealHeight
        } else {
            let scale = min(Double(maxDimension) / Double(idealWidth), Double(maxDimension) / Double(idealHeight))
            targetWidth = Int(Double(idealWidth) * scale)
            targetHeight = Int(Double(idealHeight) * scale)
        }

        if croppedImage.width == targetWidth && croppedImage.height == targetHeight {
            return croppedImage
        }

        let colorSpace = croppedImage.colorSpace ?? CGColorSpaceCreateDeviceRGB()
        guard let context = CGContext(
            data: nil,
            width: targetWidth,
            height: targetHeight,
            bitsPerComponent: 8,
            bytesPerRow: targetWidth * 4,
            space: colorSpace,
            bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
        ) else {
            throw AIEnhanceError.inferenceError("Failed to create output context")
        }

        context.interpolationQuality = .high
        context.draw(croppedImage, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight))

        guard let finalImage = context.makeImage() else {
            throw AIEnhanceError.inferenceError("Failed to create final image")
        }

        return finalImage
    }

    private func createCGImage(from pixels: [UInt8], width: Int, height: Int) throws -> CGImage {
        let colorSpace = CGColorSpaceCreateDeviceRGB()
        let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue)

        guard
            let provider = CGDataProvider(data: Data(pixels) as CFData),
            let image = CGImage(
                width: width,
                height: height,
                bitsPerComponent: 8,
                bitsPerPixel: 32,
                bytesPerRow: width * 4,
                space: colorSpace,
                bitmapInfo: bitmapInfo,
                provider: provider,
                decode: nil,
                shouldInterpolate: true,
                intent: .defaultIntent
            )
        else {
            throw AIEnhanceError.inferenceError("Failed to create output image")
        }

        return image
    }
}