feat: M2-M4 完成,添加 AI 增强、设计系统、App Store 准备

新增功能:
- AI 超分辨率模块 (Real-ESRGAN Core ML)
- Soft UI 设计系统 (DesignSystem.swift)
- 设置页、隐私政策页、引导页
- 最近作品管理器

App Store 准备:
- 完善截图 (iPhone 6.7"/6.5", iPad 12.9")
- App Store 元数据文档
- 修复应用图标 alpha 通道
- 更新显示名称为 Live Photo Studio

工程配置:
- 配置 Git LFS 跟踪 mlmodel 文件
- 添加 Claude skill 开发指南
- 更新 .gitignore 规则

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
empty
2025-12-16 10:24:31 +08:00
parent 64cdb82459
commit 5aba93e967
46 changed files with 5279 additions and 421 deletions

View File

@@ -0,0 +1,207 @@
//
// AIEnhancer.swift
// LivePhotoCore
//
// AI super-resolution enhancement using Real-ESRGAN Core ML model.
//
import CoreGraphics
import CoreML
import Foundation
import os
// MARK: - Configuration
/// AI enhancement configuration
public struct AIEnhanceConfig: Codable, Sendable, Hashable {
/// Enable AI super-resolution
public var enabled: Bool
public init(enabled: Bool = false) {
self.enabled = enabled
}
/// Disabled configuration
public static let disabled = AIEnhanceConfig(enabled: false)
/// Standard configuration
public static let standard = AIEnhanceConfig(enabled: true)
}
// MARK: - Result
/// AI enhancement result
public struct AIEnhanceResult: Sendable {
/// Enhanced image
public let enhancedImage: CGImage
/// Original image size
public let originalSize: CGSize
/// Enhanced image size
public let enhancedSize: CGSize
/// Processing time in milliseconds
public let processingTimeMs: Double
}
// MARK: - Errors
/// AI enhancement error types
public enum AIEnhanceError: Error, Sendable, LocalizedError {
case modelNotFound
case modelLoadFailed(String)
case inputImageInvalid
case inferenceError(String)
case memoryPressure
case cancelled
case deviceNotSupported
public var errorDescription: String? {
switch self {
case .modelNotFound:
return "AI model file not found in bundle"
case let .modelLoadFailed(reason):
return "Failed to load AI model: \(reason)"
case .inputImageInvalid:
return "Input image is invalid or cannot be processed"
case let .inferenceError(reason):
return "AI inference failed: \(reason)"
case .memoryPressure:
return "Not enough memory for AI processing"
case .cancelled:
return "AI enhancement was cancelled"
case .deviceNotSupported:
return "Device does not support AI enhancement"
}
}
}
// MARK: - Progress
/// Progress callback for AI enhancement
/// - Parameter progress: Value from 0.0 to 1.0
public typealias AIEnhanceProgress = @Sendable (Double) -> Void
// MARK: - Main Actor
/// AI enhancement actor for super-resolution processing
public actor AIEnhancer {
private let config: AIEnhanceConfig
private var processor: RealESRGANProcessor?
private let logger = Logger(subsystem: "LivePhotoCore", category: "AIEnhancer")
/// Scale factor (4 for Real-ESRGAN x4plus)
public static let scaleFactor: Int = 4
/// Initialize with configuration
public init(config: AIEnhanceConfig = .standard) {
self.config = config
}
// MARK: - Device Capability
/// Check if AI enhancement is available on this device
public static func isAvailable() -> Bool {
// Require iOS 17+
guard #available(iOS 17.0, *) else {
return false
}
// Check device memory (require at least 4GB)
let totalMemory = ProcessInfo.processInfo.physicalMemory
let memoryGB = Double(totalMemory) / (1024 * 1024 * 1024)
guard memoryGB >= 4.0 else {
return false
}
// Neural Engine is available on A12+ (iPhone XS and later)
// iOS 17 requirement ensures A12+ is present
return true
}
// MARK: - Model Management
/// Preload the model (call during app launch or settings change)
public func preloadModel() async throws {
guard AIEnhancer.isAvailable() else {
throw AIEnhanceError.deviceNotSupported
}
guard processor == nil else {
logger.debug("Model already loaded")
return
}
logger.info("Preloading Real-ESRGAN model...")
processor = RealESRGANProcessor()
try await processor?.loadModel()
logger.info("Model preloaded successfully")
}
/// Release model from memory
public func unloadModel() async {
await processor?.unloadModel()
processor = nil
logger.info("Model unloaded")
}
// MARK: - Enhancement
/// Enhance a single image with AI super-resolution
/// - Parameters:
/// - image: Input CGImage to enhance
/// - progress: Optional progress callback (0.0 to 1.0)
/// - Returns: Enhanced result with metadata
public func enhance(
image: CGImage,
progress: AIEnhanceProgress? = nil
) async throws -> AIEnhanceResult {
guard config.enabled else {
throw AIEnhanceError.inputImageInvalid
}
guard AIEnhancer.isAvailable() else {
throw AIEnhanceError.deviceNotSupported
}
let startTime = CFAbsoluteTimeGetCurrent()
let originalSize = CGSize(width: image.width, height: image.height)
logger.info("Starting AI enhancement: \(image.width)x\(image.height)")
// Ensure model is loaded
if processor == nil {
try await preloadModel()
}
guard let processor = processor else {
throw AIEnhanceError.modelNotFound
}
// Process image (no tiling - model has fixed 1280x1280 input)
let wholeImageProcessor = WholeImageProcessor()
let enhancedImage = try await wholeImageProcessor.processImage(
image,
processor: processor,
progress: progress
)
let processingTime = (CFAbsoluteTimeGetCurrent() - startTime) * 1000
let enhancedSize = CGSize(width: enhancedImage.width, height: enhancedImage.height)
logger.info(
"AI enhancement complete: \(Int(originalSize.width))x\(Int(originalSize.height)) -> \(Int(enhancedSize.width))x\(Int(enhancedSize.height)) in \(Int(processingTime))ms"
)
return AIEnhanceResult(
enhancedImage: enhancedImage,
originalSize: originalSize,
enhancedSize: enhancedSize,
processingTimeMs: processingTime
)
}
}

View File

@@ -0,0 +1,261 @@
//
// ImageFormatConverter.swift
// LivePhotoCore
//
// Utilities for converting between CGImage and CVPixelBuffer formats.
//
import Accelerate
import CoreGraphics
import CoreVideo
import Foundation
import VideoToolbox
/// Utilities for image format conversion
enum ImageFormatConverter {
/// Convert CGImage to CVPixelBuffer for Core ML input
/// - Parameters:
/// - image: Input CGImage
/// - pixelFormat: Output pixel format (default BGRA)
/// - Returns: CVPixelBuffer ready for Core ML
static func cgImageToPixelBuffer(
_ image: CGImage,
pixelFormat: OSType = kCVPixelFormatType_32BGRA
) throws -> CVPixelBuffer {
let width = image.width
let height = image.height
// Create pixel buffer
var pixelBuffer: CVPixelBuffer?
let attrs: [CFString: Any] = [
kCVPixelBufferCGImageCompatibilityKey: true,
kCVPixelBufferCGBitmapContextCompatibilityKey: true,
kCVPixelBufferMetalCompatibilityKey: true,
]
let status = CVPixelBufferCreate(
kCFAllocatorDefault,
width,
height,
pixelFormat,
attrs as CFDictionary,
&pixelBuffer
)
guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
throw AIEnhanceError.inputImageInvalid
}
// Lock buffer for writing
CVPixelBufferLockBaseAddress(buffer, [])
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
guard let baseAddress = CVPixelBufferGetBaseAddress(buffer) else {
throw AIEnhanceError.inputImageInvalid
}
let bytesPerRow = CVPixelBufferGetBytesPerRow(buffer)
let colorSpace = CGColorSpaceCreateDeviceRGB()
// Create bitmap context to draw into pixel buffer
guard
let context = CGContext(
data: baseAddress,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: bytesPerRow,
space: colorSpace,
bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue
| CGBitmapInfo.byteOrder32Little.rawValue
)
else {
throw AIEnhanceError.inputImageInvalid
}
// Draw image into context (this converts the format)
context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
return buffer
}
/// Convert CVPixelBuffer to CGImage
/// - Parameter pixelBuffer: Input pixel buffer
/// - Returns: CGImage representation
static func pixelBufferToCGImage(_ pixelBuffer: CVPixelBuffer) throws -> CGImage {
var cgImage: CGImage?
VTCreateCGImageFromCVPixelBuffer(pixelBuffer, options: nil, imageOut: &cgImage)
guard let image = cgImage else {
throw AIEnhanceError.inferenceError("Failed to create CGImage from pixel buffer")
}
return image
}
/// Extract raw RGBA pixel data from CVPixelBuffer
/// - Parameter pixelBuffer: Input pixel buffer
/// - Returns: Array of RGBA bytes
static func pixelBufferToRGBAData(_ pixelBuffer: CVPixelBuffer) throws -> [UInt8] {
CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)
let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else {
throw AIEnhanceError.inferenceError("Cannot access pixel buffer data")
}
let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
// Handle BGRA format (most common from Core ML)
if pixelFormat == kCVPixelFormatType_32BGRA {
return convertBGRAToRGBA(
baseAddress: baseAddress,
width: width,
height: height,
bytesPerRow: bytesPerRow
)
}
// Handle RGBA format
if pixelFormat == kCVPixelFormatType_32RGBA {
var result = [UInt8](repeating: 0, count: width * height * 4)
for y in 0..<height {
let srcRow = baseAddress.advanced(by: y * bytesPerRow)
let dstOffset = y * width * 4
memcpy(&result[dstOffset], srcRow, width * 4)
}
return result
}
// Handle ARGB format
if pixelFormat == kCVPixelFormatType_32ARGB {
return convertARGBToRGBA(
baseAddress: baseAddress,
width: width,
height: height,
bytesPerRow: bytesPerRow
)
}
throw AIEnhanceError.inferenceError("Unsupported pixel format: \(pixelFormat)")
}
/// Create CVPixelBuffer from raw RGBA data
/// - Parameters:
/// - rgbaData: RGBA pixel data
/// - width: Image width
/// - height: Image height
/// - Returns: CVPixelBuffer
static func rgbaDataToPixelBuffer(
_ rgbaData: [UInt8],
width: Int,
height: Int
) throws -> CVPixelBuffer {
var pixelBuffer: CVPixelBuffer?
let attrs: [CFString: Any] = [
kCVPixelBufferCGImageCompatibilityKey: true,
kCVPixelBufferCGBitmapContextCompatibilityKey: true,
]
let status = CVPixelBufferCreate(
kCFAllocatorDefault,
width,
height,
kCVPixelFormatType_32BGRA,
attrs as CFDictionary,
&pixelBuffer
)
guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
throw AIEnhanceError.inputImageInvalid
}
CVPixelBufferLockBaseAddress(buffer, [])
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
guard let baseAddress = CVPixelBufferGetBaseAddress(buffer) else {
throw AIEnhanceError.inputImageInvalid
}
let bytesPerRow = CVPixelBufferGetBytesPerRow(buffer)
// Convert RGBA to BGRA while copying
for y in 0..<height {
let dstRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
let srcOffset = y * width * 4
for x in 0..<width {
let srcIdx = srcOffset + x * 4
let dstIdx = x * 4
// RGBA -> BGRA swap
dstRow[dstIdx + 0] = rgbaData[srcIdx + 2] // B
dstRow[dstIdx + 1] = rgbaData[srcIdx + 1] // G
dstRow[dstIdx + 2] = rgbaData[srcIdx + 0] // R
dstRow[dstIdx + 3] = rgbaData[srcIdx + 3] // A
}
}
return buffer
}
// MARK: - Private Helpers
private static func convertBGRAToRGBA(
baseAddress: UnsafeMutableRawPointer,
width: Int,
height: Int,
bytesPerRow: Int
) -> [UInt8] {
var result = [UInt8](repeating: 0, count: width * height * 4)
for y in 0..<height {
let srcRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
let dstOffset = y * width * 4
for x in 0..<width {
let srcIdx = x * 4
let dstIdx = dstOffset + x * 4
// BGRA -> RGBA swap
result[dstIdx + 0] = srcRow[srcIdx + 2] // R
result[dstIdx + 1] = srcRow[srcIdx + 1] // G
result[dstIdx + 2] = srcRow[srcIdx + 0] // B
result[dstIdx + 3] = srcRow[srcIdx + 3] // A
}
}
return result
}
private static func convertARGBToRGBA(
baseAddress: UnsafeMutableRawPointer,
width: Int,
height: Int,
bytesPerRow: Int
) -> [UInt8] {
var result = [UInt8](repeating: 0, count: width * height * 4)
for y in 0..<height {
let srcRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
let dstOffset = y * width * 4
for x in 0..<width {
let srcIdx = x * 4
let dstIdx = dstOffset + x * 4
// ARGB -> RGBA swap
result[dstIdx + 0] = srcRow[srcIdx + 1] // R
result[dstIdx + 1] = srcRow[srcIdx + 2] // G
result[dstIdx + 2] = srcRow[srcIdx + 3] // B
result[dstIdx + 3] = srcRow[srcIdx + 0] // A
}
}
return result
}
}

View File

@@ -0,0 +1,213 @@
//
// RealESRGANProcessor.swift
// LivePhotoCore
//
// Core ML inference logic for Real-ESRGAN model.
// This model requires fixed 512x512 input and outputs 2048x2048.
//
import Accelerate
import CoreML
import CoreVideo
import Foundation
import os
/// Real-ESRGAN Core ML model processor
/// Note: This model has fixed input size of 512x512
actor RealESRGANProcessor {
private var model: MLModel?
private let logger = Logger(subsystem: "LivePhotoCore", category: "RealESRGANProcessor")
/// Fixed input size required by the model (512x512)
static let inputSize: Int = 512
/// Scale factor (4x for Real-ESRGAN x4plus)
static let scaleFactor: Int = 4
/// Output size (inputSize * scaleFactor = 2048)
static let outputSize: Int = inputSize * scaleFactor // 2048
init() {}
/// Load Core ML model from bundle
func loadModel() async throws {
guard model == nil else {
logger.debug("Model already loaded")
return
}
logger.info("Loading Real-ESRGAN Core ML model...")
// Try to find model in bundle
let modelName = "RealESRGAN_x4plus"
var modelURL: URL?
// Try SPM bundle first
#if SWIFT_PACKAGE
if let url = Bundle.module.url(forResource: modelName, withExtension: "mlmodelc") {
modelURL = url
} else if let url = Bundle.module.url(forResource: modelName, withExtension: "mlpackage") {
modelURL = url
}
#endif
// Try main bundle
if modelURL == nil {
if let url = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") {
modelURL = url
} else if let url = Bundle.main.url(forResource: modelName, withExtension: "mlpackage") {
modelURL = url
}
}
guard let url = modelURL else {
logger.error("Model file not found: \(modelName)")
throw AIEnhanceError.modelNotFound
}
logger.info("Found model at: \(url.path)")
// Configure model for optimal performance
let config = MLModelConfiguration()
config.computeUnits = .all // Use Neural Engine when available
do {
model = try await MLModel.load(contentsOf: url, configuration: config)
logger.info("Model loaded successfully")
} catch {
logger.error("Failed to load model: \(error.localizedDescription)")
throw AIEnhanceError.modelLoadFailed(error.localizedDescription)
}
}
/// Unload model from memory
func unloadModel() {
model = nil
logger.info("Model unloaded from memory")
}
/// Process a 512x512 image through the model
/// - Parameter pixelBuffer: Input image as CVPixelBuffer (must be 512x512, BGRA format)
/// - Returns: Enhanced image as RGBA data array (2048x2048)
func processImage(_ pixelBuffer: CVPixelBuffer) async throws -> [UInt8] {
guard let model else {
throw AIEnhanceError.modelNotFound
}
// Verify input size
let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)
guard width == Self.inputSize, height == Self.inputSize else {
throw AIEnhanceError.inferenceError(
"Invalid input size \(width)x\(height), expected \(Self.inputSize)x\(Self.inputSize)"
)
}
// Check for cancellation
try Task.checkCancellation()
logger.info("Running inference on \(width)x\(height) image...")
// Run inference synchronously (MLModel prediction is thread-safe)
let output: [UInt8] = try await withCheckedThrowingContinuation { continuation in
DispatchQueue.global(qos: .userInitiated).async {
do {
// Create input feature from pixel buffer
let inputFeature = try MLFeatureValue(pixelBuffer: pixelBuffer)
let inputProvider = try MLDictionaryFeatureProvider(
dictionary: ["input": inputFeature]
)
// Run inference synchronously
let prediction = try model.prediction(from: inputProvider)
// Extract output from model
// The model outputs to "activation_out" as either MultiArray or Image
let rgbaData: [UInt8]
if let outputValue = prediction.featureValue(for: "activation_out") {
if let multiArray = outputValue.multiArrayValue {
// Output is MLMultiArray with shape [C, H, W]
self.logger.info("Output is MultiArray: \(multiArray.shape)")
rgbaData = try self.multiArrayToRGBA(multiArray)
} else if let outputBuffer = outputValue.imageBufferValue {
// Output is CVPixelBuffer (image)
let outWidth = CVPixelBufferGetWidth(outputBuffer)
let outHeight = CVPixelBufferGetHeight(outputBuffer)
self.logger.info("Output is Image: \(outWidth)x\(outHeight)")
rgbaData = try ImageFormatConverter.pixelBufferToRGBAData(outputBuffer)
} else {
continuation.resume(throwing: AIEnhanceError.inferenceError(
"Cannot extract data from model output"
))
return
}
} else {
continuation.resume(throwing: AIEnhanceError.inferenceError(
"Model output 'activation_out' not found"
))
return
}
continuation.resume(returning: rgbaData)
} catch let error as AIEnhanceError {
continuation.resume(throwing: error)
} catch {
continuation.resume(throwing: AIEnhanceError.inferenceError(error.localizedDescription))
}
}
}
return output
}
/// Convert MLMultiArray [C, H, W] to RGBA byte array
/// - Parameter multiArray: Output from model with shape [3, H, W] (RGB channels)
/// - Returns: RGBA byte array with shape [H * W * 4]
private func multiArrayToRGBA(_ multiArray: MLMultiArray) throws -> [UInt8] {
let shape = multiArray.shape.map { $0.intValue }
// Expect shape [3, H, W] for RGB
guard shape.count == 3, shape[0] == 3 else {
throw AIEnhanceError.inferenceError(
"Unexpected output shape: \(shape), expected [3, H, W]"
)
}
let channels = shape[0]
let height = shape[1]
let width = shape[2]
logger.info("Converting MultiArray \(channels)x\(height)x\(width) to RGBA")
// Output array: RGBA format
var rgbaData = [UInt8](repeating: 255, count: width * height * 4)
// Get pointer to MultiArray data
let dataPointer = multiArray.dataPointer.assumingMemoryBound(to: Float32.self)
let channelStride = height * width
// Convert CHW (channel-first) to RGBA (interleaved)
// Model output is typically in range [0, 1] or [-1, 1], need to scale to [0, 255]
for y in 0..<height {
for x in 0..<width {
let pixelIndex = y * width + x
let rgbaIndex = pixelIndex * 4
// Read RGB values from CHW layout
let r = dataPointer[0 * channelStride + pixelIndex]
let g = dataPointer[1 * channelStride + pixelIndex]
let b = dataPointer[2 * channelStride + pixelIndex]
// Clamp and convert to 0-255
// Model typically outputs values in [0, 1] range
rgbaData[rgbaIndex + 0] = UInt8(clamping: Int(max(0, min(1, r)) * 255))
rgbaData[rgbaIndex + 1] = UInt8(clamping: Int(max(0, min(1, g)) * 255))
rgbaData[rgbaIndex + 2] = UInt8(clamping: Int(max(0, min(1, b)) * 255))
rgbaData[rgbaIndex + 3] = 255 // Alpha
}
}
return rgbaData
}
}

View File

@@ -0,0 +1,240 @@
//
// WholeImageProcessor.swift
// LivePhotoCore
//
// Processes images for Real-ESRGAN model with fixed 512x512 input.
// Handles scaling, padding, and cropping to preserve original aspect ratio.
//
import CoreGraphics
import CoreVideo
import Foundation
import os
/// Processes images for the Real-ESRGAN model
/// The model requires fixed 512x512 input and outputs 2048x2048
struct WholeImageProcessor {
private let logger = Logger(subsystem: "LivePhotoCore", category: "WholeImageProcessor")
/// Process an image through the AI model
/// - Parameters:
/// - inputImage: Input CGImage to enhance
/// - processor: RealESRGAN processor for inference
/// - progress: Optional progress callback
/// - Returns: Enhanced image with original aspect ratio preserved
func processImage(
_ inputImage: CGImage,
processor: RealESRGANProcessor,
progress: AIEnhanceProgress?
) async throws -> CGImage {
let originalWidth = inputImage.width
let originalHeight = inputImage.height
logger.info("Processing \(originalWidth)x\(originalHeight) image")
progress?(0.1)
// Step 1: Scale and pad to 512x512
let (paddedImage, scaleFactor, paddingInfo) = try prepareInputImage(inputImage)
progress?(0.2)
// Step 2: Convert to CVPixelBuffer
let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(paddedImage)
progress?(0.3)
// Step 3: Run inference
let outputData = try await processor.processImage(pixelBuffer)
progress?(0.8)
// Step 4: Convert output to CGImage
let outputImage = try createCGImage(
from: outputData,
width: RealESRGANProcessor.outputSize,
height: RealESRGANProcessor.outputSize
)
progress?(0.9)
// Step 5: Crop padding and scale to target size
let finalImage = try extractAndScaleOutput(
outputImage,
originalWidth: originalWidth,
originalHeight: originalHeight,
scaleFactor: scaleFactor,
paddingInfo: paddingInfo
)
progress?(1.0)
logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)")
return finalImage
}
// MARK: - Private Helpers
/// Padding information for later extraction
private struct PaddingInfo {
let paddedX: Int // X offset of original content in padded image
let paddedY: Int // Y offset of original content in padded image
let scaledWidth: Int // Width of original content after scaling
let scaledHeight: Int // Height of original content after scaling
}
/// Prepare input image: scale to fit 1280x1280 while preserving aspect ratio, then pad
private func prepareInputImage(_ image: CGImage) throws -> (CGImage, CGFloat, PaddingInfo) {
let inputSize = RealESRGANProcessor.inputSize
let originalWidth = CGFloat(image.width)
let originalHeight = CGFloat(image.height)
// Calculate scale to fit within inputSize x inputSize
let scale = min(
CGFloat(inputSize) / originalWidth,
CGFloat(inputSize) / originalHeight
)
let scaledWidth = Int(originalWidth * scale)
let scaledHeight = Int(originalHeight * scale)
// Calculate padding to center the image
let paddingX = (inputSize - scaledWidth) / 2
let paddingY = (inputSize - scaledHeight) / 2
logger.info("Scaling \(Int(originalWidth))x\(Int(originalHeight)) -> \(scaledWidth)x\(scaledHeight), padding: (\(paddingX), \(paddingY))")
// Create padded context
let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB()
guard let context = CGContext(
data: nil,
width: inputSize,
height: inputSize,
bitsPerComponent: 8,
bytesPerRow: inputSize * 4,
space: colorSpace,
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
) else {
throw AIEnhanceError.inputImageInvalid
}
// Fill with black (or neutral color)
context.setFillColor(gray: 0.0, alpha: 1.0)
context.fill(CGRect(x: 0, y: 0, width: inputSize, height: inputSize))
// Draw scaled image centered
// Note: CGContext has origin at bottom-left, so we need to flip Y coordinate
let drawRect = CGRect(x: paddingX, y: paddingY, width: scaledWidth, height: scaledHeight)
context.draw(image, in: drawRect)
guard let paddedImage = context.makeImage() else {
throw AIEnhanceError.inputImageInvalid
}
let paddingInfo = PaddingInfo(
paddedX: paddingX,
paddedY: paddingY,
scaledWidth: scaledWidth,
scaledHeight: scaledHeight
)
return (paddedImage, scale, paddingInfo)
}
/// Extract the enhanced content area and scale to final size
private func extractAndScaleOutput(
_ outputImage: CGImage,
originalWidth: Int,
originalHeight: Int,
scaleFactor: CGFloat,
paddingInfo: PaddingInfo
) throws -> CGImage {
let modelScale = RealESRGANProcessor.scaleFactor
// Calculate crop region in output image (4x the padding info)
let cropX = paddingInfo.paddedX * modelScale
let cropY = paddingInfo.paddedY * modelScale
let cropWidth = paddingInfo.scaledWidth * modelScale
let cropHeight = paddingInfo.scaledHeight * modelScale
logger.info("Cropping output at (\(cropX), \(cropY)) size \(cropWidth)x\(cropHeight)")
// Crop the content area
let cropRect = CGRect(x: cropX, y: cropY, width: cropWidth, height: cropHeight)
guard let croppedImage = outputImage.cropping(to: cropRect) else {
throw AIEnhanceError.inferenceError("Failed to crop output image")
}
// Calculate final target size (4x original, capped at reasonable limit while preserving aspect ratio)
let maxDimension = 4320 // Cap at ~4K
let idealWidth = originalWidth * modelScale
let idealHeight = originalHeight * modelScale
let targetWidth: Int
let targetHeight: Int
if idealWidth <= maxDimension && idealHeight <= maxDimension {
// Both dimensions fit within limit
targetWidth = idealWidth
targetHeight = idealHeight
} else {
// Scale down to fit within maxDimension while preserving aspect ratio
let scale = min(Double(maxDimension) / Double(idealWidth), Double(maxDimension) / Double(idealHeight))
targetWidth = Int(Double(idealWidth) * scale)
targetHeight = Int(Double(idealHeight) * scale)
}
// If cropped image is already the right size, return it
if croppedImage.width == targetWidth && croppedImage.height == targetHeight {
return croppedImage
}
// Scale to target size
let colorSpace = croppedImage.colorSpace ?? CGColorSpaceCreateDeviceRGB()
guard let context = CGContext(
data: nil,
width: targetWidth,
height: targetHeight,
bitsPerComponent: 8,
bytesPerRow: targetWidth * 4,
space: colorSpace,
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
) else {
throw AIEnhanceError.inferenceError("Failed to create output context")
}
context.interpolationQuality = .high
context.draw(croppedImage, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight))
guard let finalImage = context.makeImage() else {
throw AIEnhanceError.inferenceError("Failed to create final image")
}
logger.info("Final image size: \(finalImage.width)x\(finalImage.height)")
return finalImage
}
/// Create CGImage from RGBA pixel data
private func createCGImage(from pixels: [UInt8], width: Int, height: Int) throws -> CGImage {
let colorSpace = CGColorSpaceCreateDeviceRGB()
let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue)
guard
let provider = CGDataProvider(data: Data(pixels) as CFData),
let image = CGImage(
width: width,
height: height,
bitsPerComponent: 8,
bitsPerPixel: 32,
bytesPerRow: width * 4,
space: colorSpace,
bitmapInfo: bitmapInfo,
provider: provider,
decode: nil,
shouldInterpolate: true,
intent: .defaultIntent
)
else {
throw AIEnhanceError.inferenceError("Failed to create output image")
}
return image
}
}
// Keep the old name as a typealias for compatibility
typealias TiledImageProcessor = WholeImageProcessor