// // RealESRGANProcessor.swift // LivePhotoCore // // Core ML inference logic for Real-ESRGAN model. // This model requires fixed 512x512 input and outputs 2048x2048. // import Accelerate import CoreML import CoreVideo import Foundation import os /// Real-ESRGAN Core ML model processor /// Note: This model has fixed input size of 512x512 actor RealESRGANProcessor { private var model: MLModel? private let logger = Logger(subsystem: "LivePhotoCore", category: "RealESRGANProcessor") /// Fixed input size required by the model (512x512) static let inputSize: Int = 512 /// Scale factor (4x for Real-ESRGAN x4plus) static let scaleFactor: Int = 4 /// Output size (inputSize * scaleFactor = 2048) static let outputSize: Int = inputSize * scaleFactor // 2048 init() {} /// Load Core ML model from bundle func loadModel() async throws { guard model == nil else { logger.debug("Model already loaded") return } logger.info("Loading Real-ESRGAN Core ML model...") // Try to find model in bundle let modelName = "RealESRGAN_x4plus" var modelURL: URL? // Try SPM bundle first #if SWIFT_PACKAGE if let url = Bundle.module.url(forResource: modelName, withExtension: "mlmodelc") { modelURL = url } else if let url = Bundle.module.url(forResource: modelName, withExtension: "mlpackage") { modelURL = url } #endif // Try main bundle if modelURL == nil { if let url = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") { modelURL = url } else if let url = Bundle.main.url(forResource: modelName, withExtension: "mlpackage") { modelURL = url } } guard let url = modelURL else { logger.error("Model file not found: \(modelName)") throw AIEnhanceError.modelNotFound } logger.info("Found model at: \(url.path)") // Configure model for optimal performance let config = MLModelConfiguration() config.computeUnits = .all // Use Neural Engine when available do { model = try await MLModel.load(contentsOf: url, configuration: config) logger.info("Model loaded successfully") } catch { logger.error("Failed to load model: \(error.localizedDescription)") throw AIEnhanceError.modelLoadFailed(error.localizedDescription) } } /// Unload model from memory func unloadModel() { model = nil logger.info("Model unloaded from memory") } /// Process a 512x512 image through the model /// - Parameter pixelBuffer: Input image as CVPixelBuffer (must be 512x512, BGRA format) /// - Returns: Enhanced image as RGBA data array (2048x2048) func processImage(_ pixelBuffer: CVPixelBuffer) async throws -> [UInt8] { guard let model else { throw AIEnhanceError.modelNotFound } // Verify input size let width = CVPixelBufferGetWidth(pixelBuffer) let height = CVPixelBufferGetHeight(pixelBuffer) guard width == Self.inputSize, height == Self.inputSize else { throw AIEnhanceError.inferenceError( "Invalid input size \(width)x\(height), expected \(Self.inputSize)x\(Self.inputSize)" ) } // Check for cancellation try Task.checkCancellation() logger.info("Running inference on \(width)x\(height) image...") // Run inference synchronously (MLModel prediction is thread-safe) let output: [UInt8] = try await withCheckedThrowingContinuation { continuation in DispatchQueue.global(qos: .userInitiated).async { do { // Create input feature from pixel buffer let inputFeature = try MLFeatureValue(pixelBuffer: pixelBuffer) let inputProvider = try MLDictionaryFeatureProvider( dictionary: ["input": inputFeature] ) // Run inference synchronously let prediction = try model.prediction(from: inputProvider) // Extract output from model // The model outputs to "activation_out" as either MultiArray or Image let rgbaData: [UInt8] if let outputValue = prediction.featureValue(for: "activation_out") { if let multiArray = outputValue.multiArrayValue { // Output is MLMultiArray with shape [C, H, W] self.logger.info("Output is MultiArray: \(multiArray.shape)") rgbaData = try self.multiArrayToRGBA(multiArray) } else if let outputBuffer = outputValue.imageBufferValue { // Output is CVPixelBuffer (image) let outWidth = CVPixelBufferGetWidth(outputBuffer) let outHeight = CVPixelBufferGetHeight(outputBuffer) self.logger.info("Output is Image: \(outWidth)x\(outHeight)") rgbaData = try ImageFormatConverter.pixelBufferToRGBAData(outputBuffer) } else { continuation.resume(throwing: AIEnhanceError.inferenceError( "Cannot extract data from model output" )) return } } else { continuation.resume(throwing: AIEnhanceError.inferenceError( "Model output 'activation_out' not found" )) return } continuation.resume(returning: rgbaData) } catch let error as AIEnhanceError { continuation.resume(throwing: error) } catch { continuation.resume(throwing: AIEnhanceError.inferenceError(error.localizedDescription)) } } } return output } /// Convert MLMultiArray [C, H, W] to RGBA byte array /// - Parameter multiArray: Output from model with shape [3, H, W] (RGB channels) /// - Returns: RGBA byte array with shape [H * W * 4] private func multiArrayToRGBA(_ multiArray: MLMultiArray) throws -> [UInt8] { let shape = multiArray.shape.map { $0.intValue } // Expect shape [3, H, W] for RGB guard shape.count == 3, shape[0] == 3 else { throw AIEnhanceError.inferenceError( "Unexpected output shape: \(shape), expected [3, H, W]" ) } let channels = shape[0] let height = shape[1] let width = shape[2] logger.info("Converting MultiArray \(channels)x\(height)x\(width) to RGBA") // Output array: RGBA format var rgbaData = [UInt8](repeating: 255, count: width * height * 4) // Get pointer to MultiArray data let dataPointer = multiArray.dataPointer.assumingMemoryBound(to: Float32.self) let channelStride = height * width // Convert CHW (channel-first) to RGBA (interleaved) // Model output is typically in range [0, 1] or [-1, 1], need to scale to [0, 255] for y in 0..