// // WholeImageProcessor.swift // LivePhotoCore // // Processes images for Real-ESRGAN model with fixed 512x512 input. // Handles scaling, padding, and cropping to preserve original aspect ratio. // import CoreGraphics import CoreVideo import Foundation import os /// Processes images for the Real-ESRGAN model /// The model requires fixed 512x512 input and outputs 2048x2048 struct WholeImageProcessor { private let logger = Logger(subsystem: "LivePhotoCore", category: "WholeImageProcessor") /// Process an image through the AI model /// - Parameters: /// - inputImage: Input CGImage to enhance /// - processor: RealESRGAN processor for inference /// - progress: Optional progress callback /// - Returns: Enhanced image with original aspect ratio preserved func processImage( _ inputImage: CGImage, processor: RealESRGANProcessor, progress: AIEnhanceProgress? ) async throws -> CGImage { let originalWidth = inputImage.width let originalHeight = inputImage.height logger.info("Processing \(originalWidth)x\(originalHeight) image") progress?(0.1) // Step 1: Scale and pad to 512x512 let (paddedImage, scaleFactor, paddingInfo) = try prepareInputImage(inputImage) progress?(0.2) // Step 2: Convert to CVPixelBuffer let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(paddedImage) progress?(0.3) // Step 3: Run inference let outputData = try await processor.processImage(pixelBuffer) progress?(0.8) // Step 4: Convert output to CGImage let outputImage = try createCGImage( from: outputData, width: RealESRGANProcessor.outputSize, height: RealESRGANProcessor.outputSize ) progress?(0.9) // Step 5: Crop padding and scale to target size let finalImage = try extractAndScaleOutput( outputImage, originalWidth: originalWidth, originalHeight: originalHeight, scaleFactor: scaleFactor, paddingInfo: paddingInfo ) progress?(1.0) logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)") return finalImage } // MARK: - Private Helpers /// Padding information for later extraction private struct PaddingInfo { let paddedX: Int // X offset of original content in padded image let paddedY: Int // Y offset of original content in padded image let scaledWidth: Int // Width of original content after scaling let scaledHeight: Int // Height of original content after scaling } /// Prepare input image: scale to fit 1280x1280 while preserving aspect ratio, then pad private func prepareInputImage(_ image: CGImage) throws -> (CGImage, CGFloat, PaddingInfo) { let inputSize = RealESRGANProcessor.inputSize let originalWidth = CGFloat(image.width) let originalHeight = CGFloat(image.height) // Calculate scale to fit within inputSize x inputSize let scale = min( CGFloat(inputSize) / originalWidth, CGFloat(inputSize) / originalHeight ) let scaledWidth = Int(originalWidth * scale) let scaledHeight = Int(originalHeight * scale) // Calculate padding to center the image let paddingX = (inputSize - scaledWidth) / 2 let paddingY = (inputSize - scaledHeight) / 2 logger.info("Scaling \(Int(originalWidth))x\(Int(originalHeight)) -> \(scaledWidth)x\(scaledHeight), padding: (\(paddingX), \(paddingY))") // Create padded context let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB() guard let context = CGContext( data: nil, width: inputSize, height: inputSize, bitsPerComponent: 8, bytesPerRow: inputSize * 4, space: colorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue ) else { throw AIEnhanceError.inputImageInvalid } // Fill with black (or neutral color) context.setFillColor(gray: 0.0, alpha: 1.0) context.fill(CGRect(x: 0, y: 0, width: inputSize, height: inputSize)) // Draw scaled image centered // Note: CGContext has origin at bottom-left, so we need to flip Y coordinate let drawRect = CGRect(x: paddingX, y: paddingY, width: scaledWidth, height: scaledHeight) context.draw(image, in: drawRect) guard let paddedImage = context.makeImage() else { throw AIEnhanceError.inputImageInvalid } let paddingInfo = PaddingInfo( paddedX: paddingX, paddedY: paddingY, scaledWidth: scaledWidth, scaledHeight: scaledHeight ) return (paddedImage, scale, paddingInfo) } /// Extract the enhanced content area and scale to final size private func extractAndScaleOutput( _ outputImage: CGImage, originalWidth: Int, originalHeight: Int, scaleFactor: CGFloat, paddingInfo: PaddingInfo ) throws -> CGImage { let modelScale = RealESRGANProcessor.scaleFactor // Calculate crop region in output image (4x the padding info) let cropX = paddingInfo.paddedX * modelScale let cropY = paddingInfo.paddedY * modelScale let cropWidth = paddingInfo.scaledWidth * modelScale let cropHeight = paddingInfo.scaledHeight * modelScale logger.info("Cropping output at (\(cropX), \(cropY)) size \(cropWidth)x\(cropHeight)") // Crop the content area let cropRect = CGRect(x: cropX, y: cropY, width: cropWidth, height: cropHeight) guard let croppedImage = outputImage.cropping(to: cropRect) else { throw AIEnhanceError.inferenceError("Failed to crop output image") } // Calculate final target size (4x original, capped at reasonable limit while preserving aspect ratio) let maxDimension = 4320 // Cap at ~4K let idealWidth = originalWidth * modelScale let idealHeight = originalHeight * modelScale let targetWidth: Int let targetHeight: Int if idealWidth <= maxDimension && idealHeight <= maxDimension { // Both dimensions fit within limit targetWidth = idealWidth targetHeight = idealHeight } else { // Scale down to fit within maxDimension while preserving aspect ratio let scale = min(Double(maxDimension) / Double(idealWidth), Double(maxDimension) / Double(idealHeight)) targetWidth = Int(Double(idealWidth) * scale) targetHeight = Int(Double(idealHeight) * scale) } // If cropped image is already the right size, return it if croppedImage.width == targetWidth && croppedImage.height == targetHeight { return croppedImage } // Scale to target size let colorSpace = croppedImage.colorSpace ?? CGColorSpaceCreateDeviceRGB() guard let context = CGContext( data: nil, width: targetWidth, height: targetHeight, bitsPerComponent: 8, bytesPerRow: targetWidth * 4, space: colorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue ) else { throw AIEnhanceError.inferenceError("Failed to create output context") } context.interpolationQuality = .high context.draw(croppedImage, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight)) guard let finalImage = context.makeImage() else { throw AIEnhanceError.inferenceError("Failed to create final image") } logger.info("Final image size: \(finalImage.width)x\(finalImage.height)") return finalImage } /// Create CGImage from RGBA pixel data private func createCGImage(from pixels: [UInt8], width: Int, height: Int) throws -> CGImage { let colorSpace = CGColorSpaceCreateDeviceRGB() let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue) guard let provider = CGDataProvider(data: Data(pixels) as CFData), let image = CGImage( width: width, height: height, bitsPerComponent: 8, bitsPerPixel: 32, bytesPerRow: width * 4, space: colorSpace, bitmapInfo: bitmapInfo, provider: provider, decode: nil, shouldInterpolate: true, intent: .defaultIntent ) else { throw AIEnhanceError.inferenceError("Failed to create output image") } return image } } // Keep the old name as a typealias for compatibility typealias TiledImageProcessor = WholeImageProcessor