// // TiledImageProcessor.swift // LivePhotoCore // // True tiled image processing for Real-ESRGAN model. // Splits large images into overlapping 512x512 tiles, processes each separately, // and stitches with weighted blending for seamless results. // import CoreGraphics import CoreVideo import Foundation import os // MARK: - Types /// Represents a single tile for processing struct ImageTile { let image: CGImage let originX: Int // Position in source image let originY: Int let outputOriginX: Int // Position in output image (scaled) let outputOriginY: Int } /// Tiling configuration struct TilingConfig { let tileSize: Int = 512 let overlap: Int = 64 // Blending zone for seamless stitching let modelScale: Int = 4 var effectiveTileSize: Int { tileSize - overlap * 2 } // 384 var outputTileSize: Int { tileSize * modelScale } // 2048 var outputOverlap: Int { overlap * modelScale } // 256 } // MARK: - TiledImageProcessor /// Processes large images by splitting into tiles struct TiledImageProcessor { private let config = TilingConfig() private let logger = Logger(subsystem: "LivePhotoCore", category: "TiledImageProcessor") /// Process an image through the AI model using tiled approach /// - Parameters: /// - inputImage: Input CGImage to enhance /// - processor: RealESRGAN processor for inference /// - progress: Optional progress callback /// - Returns: Enhanced image with original aspect ratio preserved func processImage( _ inputImage: CGImage, processor: RealESRGANProcessor, progress: AIEnhanceProgress? ) async throws -> CGImage { let originalWidth = inputImage.width let originalHeight = inputImage.height logger.info("Tiled processing \(originalWidth)x\(originalHeight) image") progress?(0.05) // Step 1: Extract tiles with overlap let tiles = extractTiles(from: inputImage) logger.info("Extracted \(tiles.count) tiles") progress?(0.1) // Step 2: Process each tile var processedTiles: [(tile: ImageTile, output: [UInt8])] = [] let tileProgressBase = 0.1 let tileProgressRange = 0.7 for (index, tile) in tiles.enumerated() { try Task.checkCancellation() let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(tile.image) let outputData = try await processor.processImage(pixelBuffer) processedTiles.append((tile, outputData)) let tileProgress = tileProgressBase + tileProgressRange * Double(index + 1) / Double(tiles.count) progress?(tileProgress) // Yield to allow memory cleanup between tiles await Task.yield() } progress?(0.85) // Step 3: Stitch tiles with blending let outputWidth = originalWidth * config.modelScale let outputHeight = originalHeight * config.modelScale let stitchedImage = try stitchTiles( processedTiles, outputWidth: outputWidth, outputHeight: outputHeight ) progress?(0.95) // Step 4: Cap at max dimension if needed let finalImage = try capToMaxDimension(stitchedImage, maxDimension: 4320) progress?(1.0) logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)") return finalImage } // MARK: - Tile Extraction /// Extract overlapping tiles from the input image private func extractTiles(from image: CGImage) -> [ImageTile] { var tiles: [ImageTile] = [] let width = image.width let height = image.height let step = config.effectiveTileSize // 384 var y = 0 while y < height { var x = 0 while x < width { // Calculate tile bounds let tileX = x let tileY = y let tileWidth = min(config.tileSize, width - tileX) let tileHeight = min(config.tileSize, height - tileY) // Extract or pad tile to full 512x512 let tileImage = extractOrPadTile( from: image, x: tileX, y: tileY, width: tileWidth, height: tileHeight ) if let tileImage = tileImage { tiles.append(ImageTile( image: tileImage, originX: tileX, originY: tileY, outputOriginX: tileX * config.modelScale, outputOriginY: tileY * config.modelScale )) } x += step if x >= width && x < width + step - 1 { // Ensure we cover the right edge break } } y += step if y >= height && y < height + step - 1 { // Ensure we cover the bottom edge break } } return tiles } /// Extract a tile from the image, padding with edge reflection if necessary private func extractOrPadTile( from image: CGImage, x: Int, y: Int, width: Int, height: Int ) -> CGImage? { let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB() guard let context = CGContext( data: nil, width: config.tileSize, height: config.tileSize, bitsPerComponent: 8, bytesPerRow: config.tileSize * 4, space: colorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue ) else { return nil } // Fill with edge color (use edge reflection for better results) context.setFillColor(gray: 0.0, alpha: 1.0) context.fill(CGRect(x: 0, y: 0, width: config.tileSize, height: config.tileSize)) // Crop the tile from source image let cropRect = CGRect(x: x, y: y, width: width, height: height) guard let croppedImage = image.cropping(to: cropRect) else { return nil } // Draw at origin (bottom-left in CGContext) // Note: CGImage coordinates have origin at top-left, CGContext at bottom-left // So we draw at (0, tileSize - height) to place at top let drawY = config.tileSize - height context.draw(croppedImage, in: CGRect(x: 0, y: drawY, width: width, height: height)) return context.makeImage() } // MARK: - Tile Stitching /// Stitch processed tiles with weighted blending private func stitchTiles( _ tiles: [(tile: ImageTile, output: [UInt8])], outputWidth: Int, outputHeight: Int ) throws -> CGImage { // Create output buffers var outputBuffer = [Float](repeating: 0, count: outputWidth * outputHeight * 3) var weightBuffer = [Float](repeating: 0, count: outputWidth * outputHeight) let outputTileSize = config.outputTileSize // 2048 for (tile, data) in tiles { // Create blending weights for this tile let weights = createBlendingWeights( tileWidth: min(outputTileSize, outputWidth - tile.outputOriginX), tileHeight: min(outputTileSize, outputHeight - tile.outputOriginY) ) // Blend tile into output blendTileIntoOutput( data: data, weights: weights, atX: tile.outputOriginX, atY: tile.outputOriginY, outputWidth: outputWidth, outputHeight: outputHeight, outputBuffer: &outputBuffer, weightBuffer: &weightBuffer ) } // Normalize by accumulated weights normalizeByWeights(&outputBuffer, weights: weightBuffer, width: outputWidth, height: outputHeight) // Convert to CGImage return try createCGImage(from: outputBuffer, width: outputWidth, height: outputHeight) } /// Create blending weights with linear falloff at edges private func createBlendingWeights(tileWidth: Int, tileHeight: Int) -> [Float] { let overlap = config.outputOverlap // 256 var weights = [Float](repeating: 1.0, count: tileWidth * tileHeight) for y in 0..= tileWidth - overlap { weight *= Float(tileWidth - x - 1) / Float(overlap) } // Top edge ramp if y < overlap { weight *= Float(y) / Float(overlap) } // Bottom edge ramp if y >= tileHeight - overlap { weight *= Float(tileHeight - y - 1) / Float(overlap) } // Ensure minimum weight to avoid division by zero weight = max(weight, 0.001) weights[y * tileWidth + x] = weight } } return weights } /// Blend a tile into the output buffer with weights private func blendTileIntoOutput( data: [UInt8], weights: [Float], atX: Int, atY: Int, outputWidth: Int, outputHeight: Int, outputBuffer: inout [Float], weightBuffer: inout [Float] ) { let tileSize = config.outputTileSize let tileWidth = min(tileSize, outputWidth - atX) let tileHeight = min(tileSize, outputHeight - atY) for ty in 0..= outputHeight { continue } for tx in 0..= outputWidth { continue } let tileIdx = ty * tileSize + tx let outputIdx = outputY * outputWidth + outputX // Bounds check for tile data (RGBA format, 4 bytes per pixel) let dataIdx = tileIdx * 4 guard dataIdx + 2 < data.count else { continue } let weight = weights[ty * tileWidth + tx] // Accumulate weighted RGB values outputBuffer[outputIdx * 3 + 0] += Float(data[dataIdx + 0]) * weight // R outputBuffer[outputIdx * 3 + 1] += Float(data[dataIdx + 1]) * weight // G outputBuffer[outputIdx * 3 + 2] += Float(data[dataIdx + 2]) * weight // B weightBuffer[outputIdx] += weight } } } /// Normalize output buffer by accumulated weights private func normalizeByWeights( _ buffer: inout [Float], weights: [Float], width: Int, height: Int ) { for i in 0..<(width * height) { let w = max(weights[i], 0.001) buffer[i * 3 + 0] /= w buffer[i * 3 + 1] /= w buffer[i * 3 + 2] /= w } } /// Create CGImage from float RGB buffer private func createCGImage(from buffer: [Float], width: Int, height: Int) throws -> CGImage { // Convert float buffer to RGBA UInt8 var pixels = [UInt8](repeating: 255, count: width * height * 4) for i in 0..<(width * height) { pixels[i * 4 + 0] = UInt8(clamping: Int(buffer[i * 3 + 0])) // R pixels[i * 4 + 1] = UInt8(clamping: Int(buffer[i * 3 + 1])) // G pixels[i * 4 + 2] = UInt8(clamping: Int(buffer[i * 3 + 2])) // B pixels[i * 4 + 3] = 255 // A } let colorSpace = CGColorSpaceCreateDeviceRGB() let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue) guard let provider = CGDataProvider(data: Data(pixels) as CFData), let image = CGImage( width: width, height: height, bitsPerComponent: 8, bitsPerPixel: 32, bytesPerRow: width * 4, space: colorSpace, bitmapInfo: bitmapInfo, provider: provider, decode: nil, shouldInterpolate: true, intent: .defaultIntent ) else { throw AIEnhanceError.inferenceError("Failed to create stitched image") } return image } /// Cap image to maximum dimension while preserving aspect ratio private func capToMaxDimension(_ image: CGImage, maxDimension: Int) throws -> CGImage { let width = image.width let height = image.height if width <= maxDimension && height <= maxDimension { return image } let scale = min(Double(maxDimension) / Double(width), Double(maxDimension) / Double(height)) let targetWidth = Int(Double(width) * scale) let targetHeight = Int(Double(height) * scale) let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB() guard let context = CGContext( data: nil, width: targetWidth, height: targetHeight, bitsPerComponent: 8, bytesPerRow: targetWidth * 4, space: colorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue ) else { throw AIEnhanceError.inferenceError("Failed to create scaling context") } context.interpolationQuality = .high context.draw(image, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight)) guard let scaledImage = context.makeImage() else { throw AIEnhanceError.inferenceError("Failed to scale image") } return scaledImage } } // MARK: - WholeImageProcessor (for small images) /// Processes small images (< 512x512) for the Real-ESRGAN model /// Uses scaling and padding approach for images that fit within a single tile struct WholeImageProcessor { private let logger = Logger(subsystem: "LivePhotoCore", category: "WholeImageProcessor") /// Process an image through the AI model func processImage( _ inputImage: CGImage, processor: RealESRGANProcessor, progress: AIEnhanceProgress? ) async throws -> CGImage { let originalWidth = inputImage.width let originalHeight = inputImage.height logger.info("Whole image processing \(originalWidth)x\(originalHeight) image") progress?(0.1) // Step 1: Scale and pad to 512x512 let (paddedImage, _, paddingInfo) = try prepareInputImage(inputImage) progress?(0.2) // Step 2: Convert to CVPixelBuffer let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(paddedImage) progress?(0.3) // Step 3: Run inference let outputData = try await processor.processImage(pixelBuffer) progress?(0.8) // Step 4: Convert output to CGImage let outputImage = try createCGImage( from: outputData, width: RealESRGANProcessor.outputSize, height: RealESRGANProcessor.outputSize ) progress?(0.9) // Step 5: Crop padding and scale to target size let finalImage = try extractAndScaleOutput( outputImage, originalWidth: originalWidth, originalHeight: originalHeight, paddingInfo: paddingInfo ) progress?(1.0) logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)") return finalImage } // MARK: - Private Helpers private struct PaddingInfo { let paddedX: Int let paddedY: Int let scaledWidth: Int let scaledHeight: Int } private func prepareInputImage(_ image: CGImage) throws -> (CGImage, CGFloat, PaddingInfo) { let inputSize = RealESRGANProcessor.inputSize let originalWidth = CGFloat(image.width) let originalHeight = CGFloat(image.height) let scale = min( CGFloat(inputSize) / originalWidth, CGFloat(inputSize) / originalHeight ) let scaledWidth = Int(originalWidth * scale) let scaledHeight = Int(originalHeight * scale) let paddingX = (inputSize - scaledWidth) / 2 let paddingY = (inputSize - scaledHeight) / 2 let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB() guard let context = CGContext( data: nil, width: inputSize, height: inputSize, bitsPerComponent: 8, bytesPerRow: inputSize * 4, space: colorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue ) else { throw AIEnhanceError.inputImageInvalid } context.setFillColor(gray: 0.0, alpha: 1.0) context.fill(CGRect(x: 0, y: 0, width: inputSize, height: inputSize)) let drawRect = CGRect(x: paddingX, y: paddingY, width: scaledWidth, height: scaledHeight) context.draw(image, in: drawRect) guard let paddedImage = context.makeImage() else { throw AIEnhanceError.inputImageInvalid } let paddingInfo = PaddingInfo( paddedX: paddingX, paddedY: paddingY, scaledWidth: scaledWidth, scaledHeight: scaledHeight ) return (paddedImage, scale, paddingInfo) } private func extractAndScaleOutput( _ outputImage: CGImage, originalWidth: Int, originalHeight: Int, paddingInfo: PaddingInfo ) throws -> CGImage { let modelScale = RealESRGANProcessor.scaleFactor let cropX = paddingInfo.paddedX * modelScale let cropY = paddingInfo.paddedY * modelScale let cropWidth = paddingInfo.scaledWidth * modelScale let cropHeight = paddingInfo.scaledHeight * modelScale let cropRect = CGRect(x: cropX, y: cropY, width: cropWidth, height: cropHeight) guard let croppedImage = outputImage.cropping(to: cropRect) else { throw AIEnhanceError.inferenceError("Failed to crop output image") } let maxDimension = 4320 let idealWidth = originalWidth * modelScale let idealHeight = originalHeight * modelScale let targetWidth: Int let targetHeight: Int if idealWidth <= maxDimension && idealHeight <= maxDimension { targetWidth = idealWidth targetHeight = idealHeight } else { let scale = min(Double(maxDimension) / Double(idealWidth), Double(maxDimension) / Double(idealHeight)) targetWidth = Int(Double(idealWidth) * scale) targetHeight = Int(Double(idealHeight) * scale) } if croppedImage.width == targetWidth && croppedImage.height == targetHeight { return croppedImage } let colorSpace = croppedImage.colorSpace ?? CGColorSpaceCreateDeviceRGB() guard let context = CGContext( data: nil, width: targetWidth, height: targetHeight, bitsPerComponent: 8, bytesPerRow: targetWidth * 4, space: colorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue ) else { throw AIEnhanceError.inferenceError("Failed to create output context") } context.interpolationQuality = .high context.draw(croppedImage, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight)) guard let finalImage = context.makeImage() else { throw AIEnhanceError.inferenceError("Failed to create final image") } return finalImage } private func createCGImage(from pixels: [UInt8], width: Int, height: Int) throws -> CGImage { let colorSpace = CGColorSpaceCreateDeviceRGB() let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue) guard let provider = CGDataProvider(data: Data(pixels) as CFData), let image = CGImage( width: width, height: height, bitsPerComponent: 8, bitsPerPixel: 32, bytesPerRow: width * 4, space: colorSpace, bitmapInfo: bitmapInfo, provider: provider, decode: nil, shouldInterpolate: true, intent: .defaultIntent ) else { throw AIEnhanceError.inferenceError("Failed to create output image") } return image } }