#!/usr/bin/env python3 """ Real-ESRGAN x2plus PyTorch to Core ML Conversion Script Requirements: pip install torch torchvision coremltools pillow numpy Usage: 1. Download RealESRGAN_x2plus.pth from: https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth 2. Place the .pth file in this scripts/ directory 3. Run: python convert_realesrgan_to_coreml.py 4. Output: RealESRGAN_x2plus.mlpackage in ../Sources/LivePhotoCore/Resources/ Note: The model processes tiles of 128x128 pixels. For larger images, the app will tile and stitch. """ import torch import torch.nn as nn import coremltools as ct from coremltools.models.neural_network import quantization_utils import numpy as np from pathlib import Path import sys # ============================================================================ # Real-ESRGAN Model Architecture (RRDBNet) # Simplified version matching the official implementation # ============================================================================ def make_layer(block, n_layers, **kwargs): layers = [] for _ in range(n_layers): layers.append(block(**kwargs)) return nn.Sequential(*layers) class ResidualDenseBlock(nn.Module): """Residual Dense Block used in RRDB.""" def __init__(self, num_feat=64, num_grow_ch=32): super(ResidualDenseBlock, self).__init__() self.conv1 = nn.Conv2d(num_feat, num_grow_ch, 3, 1, 1) self.conv2 = nn.Conv2d(num_feat + num_grow_ch, num_grow_ch, 3, 1, 1) self.conv3 = nn.Conv2d(num_feat + 2 * num_grow_ch, num_grow_ch, 3, 1, 1) self.conv4 = nn.Conv2d(num_feat + 3 * num_grow_ch, num_grow_ch, 3, 1, 1) self.conv5 = nn.Conv2d(num_feat + 4 * num_grow_ch, num_feat, 3, 1, 1) self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) def forward(self, x): x1 = self.lrelu(self.conv1(x)) x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1))) x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1))) x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1))) x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1)) return x5 * 0.2 + x class RRDB(nn.Module): """Residual in Residual Dense Block.""" def __init__(self, num_feat, num_grow_ch=32): super(RRDB, self).__init__() self.rdb1 = ResidualDenseBlock(num_feat, num_grow_ch) self.rdb2 = ResidualDenseBlock(num_feat, num_grow_ch) self.rdb3 = ResidualDenseBlock(num_feat, num_grow_ch) def forward(self, x): out = self.rdb1(x) out = self.rdb2(out) out = self.rdb3(out) return out * 0.2 + x class RRDBNet(nn.Module): """Networks consisting of Residual in Residual Dense Block.""" def __init__(self, num_in_ch=3, num_out_ch=3, scale=2, num_feat=64, num_block=23, num_grow_ch=32): super(RRDBNet, self).__init__() self.scale = scale # First conv self.conv_first = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1) # Body (RRDB blocks) self.body = make_layer(RRDB, num_block, num_feat=num_feat, num_grow_ch=num_grow_ch) self.conv_body = nn.Conv2d(num_feat, num_feat, 3, 1, 1) # Upsampling self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) if scale == 4: self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1) self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) def forward(self, x): feat = self.conv_first(x) body_feat = self.conv_body(self.body(feat)) feat = feat + body_feat # Upsample feat = self.lrelu(self.conv_up1(nn.functional.interpolate(feat, scale_factor=2, mode='nearest'))) if self.scale == 4: feat = self.lrelu(self.conv_up2(nn.functional.interpolate(feat, scale_factor=2, mode='nearest'))) out = self.conv_last(self.lrelu(self.conv_hr(feat))) return out def convert_to_coreml( weights_path: str, output_dir: str, tile_size: int = 128, scale: int = 2, quantize: bool = True ): """ Convert Real-ESRGAN PyTorch weights to Core ML format. Args: weights_path: Path to .pth weights file output_dir: Output directory for .mlpackage tile_size: Input tile size (128 recommended for memory efficiency) scale: Upscale factor (2 or 4) quantize: Apply INT8 quantization to reduce model size """ print(f"Loading PyTorch model from: {weights_path}") # Initialize model model = RRDBNet( num_in_ch=3, num_out_ch=3, scale=scale, num_feat=64, num_block=23, num_grow_ch=32 ) # Load weights state_dict = torch.load(weights_path, map_location='cpu') # Handle different weight formats if 'params_ema' in state_dict: state_dict = state_dict['params_ema'] elif 'params' in state_dict: state_dict = state_dict['params'] model.load_state_dict(state_dict, strict=True) model.eval() print(f"Model loaded successfully. Scale: {scale}x") # Trace the model print(f"Tracing model with input size: {tile_size}x{tile_size}") example_input = torch.rand(1, 3, tile_size, tile_size) with torch.no_grad(): traced_model = torch.jit.trace(model, example_input) # Convert to Core ML print("Converting to Core ML...") output_size = tile_size * scale mlmodel = ct.convert( traced_model, inputs=[ ct.ImageType( name="input", shape=(1, 3, tile_size, tile_size), color_layout=ct.colorlayout.RGB, scale=1.0 / 255.0, # Normalize to [0, 1] ) ], outputs=[ ct.ImageType( name="output", color_layout=ct.colorlayout.RGB, scale=255.0, # Denormalize back to [0, 255] ) ], minimum_deployment_target=ct.target.iOS17, compute_units=ct.ComputeUnit.ALL, # Enable Neural Engine convert_to="mlprogram", # Use ML Program format for iOS 17+ ) # Set model metadata mlmodel.author = "Real-ESRGAN (xinntao) / Converted for Live Photo Maker" mlmodel.license = "BSD 3-Clause License" mlmodel.short_description = f"Real-ESRGAN x{scale} super-resolution model. Input: {tile_size}x{tile_size} RGB image tile. Output: {output_size}x{output_size} enhanced tile." mlmodel.version = "1.0" # Apply quantization if requested if quantize: print("Applying INT8 quantization...") # Note: For mlprogram format, use compression instead of quantization_utils # This is a simplified approach; full quantization requires calibration data pass # Skip quantization for now - mlprogram doesn't support simple quantization # Save output_path = Path(output_dir) / f"RealESRGAN_x{scale}plus.mlpackage" print(f"Saving to: {output_path}") mlmodel.save(str(output_path)) # Print model info spec = mlmodel.get_spec() print(f"\n=== Model Info ===") print(f"Input: {spec.description.input[0].name}") print(f"Output: {spec.description.output[0].name}") print(f"File size: {sum(f.stat().st_size for f in output_path.rglob('*') if f.is_file()) / 1024 / 1024:.2f} MB") return output_path def verify_conversion(mlpackage_path: str, weights_path: str, tile_size: int = 128): """ Verify the Core ML conversion by comparing outputs. """ print("\n=== Verifying Conversion ===") try: import coremltools as ct from PIL import Image # Load Core ML model mlmodel = ct.models.MLModel(mlpackage_path) # Create test input test_input = np.random.randint(0, 255, (tile_size, tile_size, 3), dtype=np.uint8) test_image = Image.fromarray(test_input, mode='RGB') # Run Core ML inference coreml_output = mlmodel.predict({'input': test_image}) print(f"Core ML inference successful!") print(f"Output shape: {coreml_output['output'].size}") return True except Exception as e: print(f"Verification failed: {e}") return False if __name__ == "__main__": script_dir = Path(__file__).parent weights_path = script_dir / "RealESRGAN_x2plus.pth" output_dir = script_dir.parent / "Sources" / "LivePhotoCore" / "Resources" # Check if weights exist if not weights_path.exists(): print(f"ERROR: Weights file not found at: {weights_path}") print("\nPlease download the weights file from:") print("https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth") print(f"\nAnd place it in: {script_dir}") sys.exit(1) # Create output directory output_dir.mkdir(parents=True, exist_ok=True) # Convert mlpackage_path = convert_to_coreml( weights_path=str(weights_path), output_dir=str(output_dir), tile_size=128, scale=2, quantize=False # Quantization handled separately ) # Verify verify_conversion(str(mlpackage_path), str(weights_path)) print("\n=== Conversion Complete ===") print(f"Output: {mlpackage_path}") print("\nNext steps:") print("1. Open Xcode and add the .mlpackage to your project") print("2. Xcode will compile it to .mlmodelc automatically") print("3. Or compile manually: xcrun coremlcompiler compile RealESRGAN_x2plus.mlpackage .")