feat: M2-M4 完成,添加 AI 增强、设计系统、App Store 准备
新增功能: - AI 超分辨率模块 (Real-ESRGAN Core ML) - Soft UI 设计系统 (DesignSystem.swift) - 设置页、隐私政策页、引导页 - 最近作品管理器 App Store 准备: - 完善截图 (iPhone 6.7"/6.5", iPad 12.9") - App Store 元数据文档 - 修复应用图标 alpha 通道 - 更新显示名称为 Live Photo Studio 工程配置: - 配置 Git LFS 跟踪 mlmodel 文件 - 添加 Claude skill 开发指南 - 更新 .gitignore 规则 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
207
Sources/LivePhotoCore/AIEnhancer/AIEnhancer.swift
Normal file
207
Sources/LivePhotoCore/AIEnhancer/AIEnhancer.swift
Normal file
@@ -0,0 +1,207 @@
|
||||
//
|
||||
// AIEnhancer.swift
|
||||
// LivePhotoCore
|
||||
//
|
||||
// AI super-resolution enhancement using Real-ESRGAN Core ML model.
|
||||
//
|
||||
|
||||
import CoreGraphics
|
||||
import CoreML
|
||||
import Foundation
|
||||
import os
|
||||
|
||||
// MARK: - Configuration
|
||||
|
||||
/// AI enhancement configuration
|
||||
public struct AIEnhanceConfig: Codable, Sendable, Hashable {
|
||||
/// Enable AI super-resolution
|
||||
public var enabled: Bool
|
||||
|
||||
public init(enabled: Bool = false) {
|
||||
self.enabled = enabled
|
||||
}
|
||||
|
||||
/// Disabled configuration
|
||||
public static let disabled = AIEnhanceConfig(enabled: false)
|
||||
|
||||
/// Standard configuration
|
||||
public static let standard = AIEnhanceConfig(enabled: true)
|
||||
}
|
||||
|
||||
// MARK: - Result
|
||||
|
||||
/// AI enhancement result
|
||||
public struct AIEnhanceResult: Sendable {
|
||||
/// Enhanced image
|
||||
public let enhancedImage: CGImage
|
||||
|
||||
/// Original image size
|
||||
public let originalSize: CGSize
|
||||
|
||||
/// Enhanced image size
|
||||
public let enhancedSize: CGSize
|
||||
|
||||
/// Processing time in milliseconds
|
||||
public let processingTimeMs: Double
|
||||
}
|
||||
|
||||
// MARK: - Errors
|
||||
|
||||
/// AI enhancement error types
|
||||
public enum AIEnhanceError: Error, Sendable, LocalizedError {
|
||||
case modelNotFound
|
||||
case modelLoadFailed(String)
|
||||
case inputImageInvalid
|
||||
case inferenceError(String)
|
||||
case memoryPressure
|
||||
case cancelled
|
||||
case deviceNotSupported
|
||||
|
||||
public var errorDescription: String? {
|
||||
switch self {
|
||||
case .modelNotFound:
|
||||
return "AI model file not found in bundle"
|
||||
case let .modelLoadFailed(reason):
|
||||
return "Failed to load AI model: \(reason)"
|
||||
case .inputImageInvalid:
|
||||
return "Input image is invalid or cannot be processed"
|
||||
case let .inferenceError(reason):
|
||||
return "AI inference failed: \(reason)"
|
||||
case .memoryPressure:
|
||||
return "Not enough memory for AI processing"
|
||||
case .cancelled:
|
||||
return "AI enhancement was cancelled"
|
||||
case .deviceNotSupported:
|
||||
return "Device does not support AI enhancement"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Progress
|
||||
|
||||
/// Progress callback for AI enhancement
|
||||
/// - Parameter progress: Value from 0.0 to 1.0
|
||||
public typealias AIEnhanceProgress = @Sendable (Double) -> Void
|
||||
|
||||
// MARK: - Main Actor
|
||||
|
||||
/// AI enhancement actor for super-resolution processing
|
||||
public actor AIEnhancer {
|
||||
private let config: AIEnhanceConfig
|
||||
private var processor: RealESRGANProcessor?
|
||||
private let logger = Logger(subsystem: "LivePhotoCore", category: "AIEnhancer")
|
||||
|
||||
/// Scale factor (4 for Real-ESRGAN x4plus)
|
||||
public static let scaleFactor: Int = 4
|
||||
|
||||
/// Initialize with configuration
|
||||
public init(config: AIEnhanceConfig = .standard) {
|
||||
self.config = config
|
||||
}
|
||||
|
||||
// MARK: - Device Capability
|
||||
|
||||
/// Check if AI enhancement is available on this device
|
||||
public static func isAvailable() -> Bool {
|
||||
// Require iOS 17+
|
||||
guard #available(iOS 17.0, *) else {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check device memory (require at least 4GB)
|
||||
let totalMemory = ProcessInfo.processInfo.physicalMemory
|
||||
let memoryGB = Double(totalMemory) / (1024 * 1024 * 1024)
|
||||
guard memoryGB >= 4.0 else {
|
||||
return false
|
||||
}
|
||||
|
||||
// Neural Engine is available on A12+ (iPhone XS and later)
|
||||
// iOS 17 requirement ensures A12+ is present
|
||||
return true
|
||||
}
|
||||
|
||||
// MARK: - Model Management
|
||||
|
||||
/// Preload the model (call during app launch or settings change)
|
||||
public func preloadModel() async throws {
|
||||
guard AIEnhancer.isAvailable() else {
|
||||
throw AIEnhanceError.deviceNotSupported
|
||||
}
|
||||
|
||||
guard processor == nil else {
|
||||
logger.debug("Model already loaded")
|
||||
return
|
||||
}
|
||||
|
||||
logger.info("Preloading Real-ESRGAN model...")
|
||||
|
||||
processor = RealESRGANProcessor()
|
||||
try await processor?.loadModel()
|
||||
|
||||
logger.info("Model preloaded successfully")
|
||||
}
|
||||
|
||||
/// Release model from memory
|
||||
public func unloadModel() async {
|
||||
await processor?.unloadModel()
|
||||
processor = nil
|
||||
logger.info("Model unloaded")
|
||||
}
|
||||
|
||||
// MARK: - Enhancement
|
||||
|
||||
/// Enhance a single image with AI super-resolution
|
||||
/// - Parameters:
|
||||
/// - image: Input CGImage to enhance
|
||||
/// - progress: Optional progress callback (0.0 to 1.0)
|
||||
/// - Returns: Enhanced result with metadata
|
||||
public func enhance(
|
||||
image: CGImage,
|
||||
progress: AIEnhanceProgress? = nil
|
||||
) async throws -> AIEnhanceResult {
|
||||
guard config.enabled else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
guard AIEnhancer.isAvailable() else {
|
||||
throw AIEnhanceError.deviceNotSupported
|
||||
}
|
||||
|
||||
let startTime = CFAbsoluteTimeGetCurrent()
|
||||
let originalSize = CGSize(width: image.width, height: image.height)
|
||||
|
||||
logger.info("Starting AI enhancement: \(image.width)x\(image.height)")
|
||||
|
||||
// Ensure model is loaded
|
||||
if processor == nil {
|
||||
try await preloadModel()
|
||||
}
|
||||
|
||||
guard let processor = processor else {
|
||||
throw AIEnhanceError.modelNotFound
|
||||
}
|
||||
|
||||
// Process image (no tiling - model has fixed 1280x1280 input)
|
||||
let wholeImageProcessor = WholeImageProcessor()
|
||||
|
||||
let enhancedImage = try await wholeImageProcessor.processImage(
|
||||
image,
|
||||
processor: processor,
|
||||
progress: progress
|
||||
)
|
||||
|
||||
let processingTime = (CFAbsoluteTimeGetCurrent() - startTime) * 1000
|
||||
let enhancedSize = CGSize(width: enhancedImage.width, height: enhancedImage.height)
|
||||
|
||||
logger.info(
|
||||
"AI enhancement complete: \(Int(originalSize.width))x\(Int(originalSize.height)) -> \(Int(enhancedSize.width))x\(Int(enhancedSize.height)) in \(Int(processingTime))ms"
|
||||
)
|
||||
|
||||
return AIEnhanceResult(
|
||||
enhancedImage: enhancedImage,
|
||||
originalSize: originalSize,
|
||||
enhancedSize: enhancedSize,
|
||||
processingTimeMs: processingTime
|
||||
)
|
||||
}
|
||||
}
|
||||
261
Sources/LivePhotoCore/AIEnhancer/ImageFormatConverter.swift
Normal file
261
Sources/LivePhotoCore/AIEnhancer/ImageFormatConverter.swift
Normal file
@@ -0,0 +1,261 @@
|
||||
//
|
||||
// ImageFormatConverter.swift
|
||||
// LivePhotoCore
|
||||
//
|
||||
// Utilities for converting between CGImage and CVPixelBuffer formats.
|
||||
//
|
||||
|
||||
import Accelerate
|
||||
import CoreGraphics
|
||||
import CoreVideo
|
||||
import Foundation
|
||||
import VideoToolbox
|
||||
|
||||
/// Utilities for image format conversion
|
||||
enum ImageFormatConverter {
|
||||
/// Convert CGImage to CVPixelBuffer for Core ML input
|
||||
/// - Parameters:
|
||||
/// - image: Input CGImage
|
||||
/// - pixelFormat: Output pixel format (default BGRA)
|
||||
/// - Returns: CVPixelBuffer ready for Core ML
|
||||
static func cgImageToPixelBuffer(
|
||||
_ image: CGImage,
|
||||
pixelFormat: OSType = kCVPixelFormatType_32BGRA
|
||||
) throws -> CVPixelBuffer {
|
||||
let width = image.width
|
||||
let height = image.height
|
||||
|
||||
// Create pixel buffer
|
||||
var pixelBuffer: CVPixelBuffer?
|
||||
let attrs: [CFString: Any] = [
|
||||
kCVPixelBufferCGImageCompatibilityKey: true,
|
||||
kCVPixelBufferCGBitmapContextCompatibilityKey: true,
|
||||
kCVPixelBufferMetalCompatibilityKey: true,
|
||||
]
|
||||
|
||||
let status = CVPixelBufferCreate(
|
||||
kCFAllocatorDefault,
|
||||
width,
|
||||
height,
|
||||
pixelFormat,
|
||||
attrs as CFDictionary,
|
||||
&pixelBuffer
|
||||
)
|
||||
|
||||
guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
// Lock buffer for writing
|
||||
CVPixelBufferLockBaseAddress(buffer, [])
|
||||
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
|
||||
|
||||
guard let baseAddress = CVPixelBufferGetBaseAddress(buffer) else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
let bytesPerRow = CVPixelBufferGetBytesPerRow(buffer)
|
||||
let colorSpace = CGColorSpaceCreateDeviceRGB()
|
||||
|
||||
// Create bitmap context to draw into pixel buffer
|
||||
guard
|
||||
let context = CGContext(
|
||||
data: baseAddress,
|
||||
width: width,
|
||||
height: height,
|
||||
bitsPerComponent: 8,
|
||||
bytesPerRow: bytesPerRow,
|
||||
space: colorSpace,
|
||||
bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue
|
||||
| CGBitmapInfo.byteOrder32Little.rawValue
|
||||
)
|
||||
else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
// Draw image into context (this converts the format)
|
||||
context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
|
||||
|
||||
return buffer
|
||||
}
|
||||
|
||||
/// Convert CVPixelBuffer to CGImage
|
||||
/// - Parameter pixelBuffer: Input pixel buffer
|
||||
/// - Returns: CGImage representation
|
||||
static func pixelBufferToCGImage(_ pixelBuffer: CVPixelBuffer) throws -> CGImage {
|
||||
var cgImage: CGImage?
|
||||
VTCreateCGImageFromCVPixelBuffer(pixelBuffer, options: nil, imageOut: &cgImage)
|
||||
|
||||
guard let image = cgImage else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create CGImage from pixel buffer")
|
||||
}
|
||||
|
||||
return image
|
||||
}
|
||||
|
||||
/// Extract raw RGBA pixel data from CVPixelBuffer
|
||||
/// - Parameter pixelBuffer: Input pixel buffer
|
||||
/// - Returns: Array of RGBA bytes
|
||||
static func pixelBufferToRGBAData(_ pixelBuffer: CVPixelBuffer) throws -> [UInt8] {
|
||||
CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
|
||||
defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
|
||||
|
||||
let width = CVPixelBufferGetWidth(pixelBuffer)
|
||||
let height = CVPixelBufferGetHeight(pixelBuffer)
|
||||
let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
|
||||
|
||||
guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else {
|
||||
throw AIEnhanceError.inferenceError("Cannot access pixel buffer data")
|
||||
}
|
||||
|
||||
let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
|
||||
|
||||
// Handle BGRA format (most common from Core ML)
|
||||
if pixelFormat == kCVPixelFormatType_32BGRA {
|
||||
return convertBGRAToRGBA(
|
||||
baseAddress: baseAddress,
|
||||
width: width,
|
||||
height: height,
|
||||
bytesPerRow: bytesPerRow
|
||||
)
|
||||
}
|
||||
|
||||
// Handle RGBA format
|
||||
if pixelFormat == kCVPixelFormatType_32RGBA {
|
||||
var result = [UInt8](repeating: 0, count: width * height * 4)
|
||||
for y in 0..<height {
|
||||
let srcRow = baseAddress.advanced(by: y * bytesPerRow)
|
||||
let dstOffset = y * width * 4
|
||||
memcpy(&result[dstOffset], srcRow, width * 4)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Handle ARGB format
|
||||
if pixelFormat == kCVPixelFormatType_32ARGB {
|
||||
return convertARGBToRGBA(
|
||||
baseAddress: baseAddress,
|
||||
width: width,
|
||||
height: height,
|
||||
bytesPerRow: bytesPerRow
|
||||
)
|
||||
}
|
||||
|
||||
throw AIEnhanceError.inferenceError("Unsupported pixel format: \(pixelFormat)")
|
||||
}
|
||||
|
||||
/// Create CVPixelBuffer from raw RGBA data
|
||||
/// - Parameters:
|
||||
/// - rgbaData: RGBA pixel data
|
||||
/// - width: Image width
|
||||
/// - height: Image height
|
||||
/// - Returns: CVPixelBuffer
|
||||
static func rgbaDataToPixelBuffer(
|
||||
_ rgbaData: [UInt8],
|
||||
width: Int,
|
||||
height: Int
|
||||
) throws -> CVPixelBuffer {
|
||||
var pixelBuffer: CVPixelBuffer?
|
||||
let attrs: [CFString: Any] = [
|
||||
kCVPixelBufferCGImageCompatibilityKey: true,
|
||||
kCVPixelBufferCGBitmapContextCompatibilityKey: true,
|
||||
]
|
||||
|
||||
let status = CVPixelBufferCreate(
|
||||
kCFAllocatorDefault,
|
||||
width,
|
||||
height,
|
||||
kCVPixelFormatType_32BGRA,
|
||||
attrs as CFDictionary,
|
||||
&pixelBuffer
|
||||
)
|
||||
|
||||
guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
CVPixelBufferLockBaseAddress(buffer, [])
|
||||
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
|
||||
|
||||
guard let baseAddress = CVPixelBufferGetBaseAddress(buffer) else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
let bytesPerRow = CVPixelBufferGetBytesPerRow(buffer)
|
||||
|
||||
// Convert RGBA to BGRA while copying
|
||||
for y in 0..<height {
|
||||
let dstRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
|
||||
let srcOffset = y * width * 4
|
||||
|
||||
for x in 0..<width {
|
||||
let srcIdx = srcOffset + x * 4
|
||||
let dstIdx = x * 4
|
||||
|
||||
// RGBA -> BGRA swap
|
||||
dstRow[dstIdx + 0] = rgbaData[srcIdx + 2] // B
|
||||
dstRow[dstIdx + 1] = rgbaData[srcIdx + 1] // G
|
||||
dstRow[dstIdx + 2] = rgbaData[srcIdx + 0] // R
|
||||
dstRow[dstIdx + 3] = rgbaData[srcIdx + 3] // A
|
||||
}
|
||||
}
|
||||
|
||||
return buffer
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
private static func convertBGRAToRGBA(
|
||||
baseAddress: UnsafeMutableRawPointer,
|
||||
width: Int,
|
||||
height: Int,
|
||||
bytesPerRow: Int
|
||||
) -> [UInt8] {
|
||||
var result = [UInt8](repeating: 0, count: width * height * 4)
|
||||
|
||||
for y in 0..<height {
|
||||
let srcRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
|
||||
let dstOffset = y * width * 4
|
||||
|
||||
for x in 0..<width {
|
||||
let srcIdx = x * 4
|
||||
let dstIdx = dstOffset + x * 4
|
||||
|
||||
// BGRA -> RGBA swap
|
||||
result[dstIdx + 0] = srcRow[srcIdx + 2] // R
|
||||
result[dstIdx + 1] = srcRow[srcIdx + 1] // G
|
||||
result[dstIdx + 2] = srcRow[srcIdx + 0] // B
|
||||
result[dstIdx + 3] = srcRow[srcIdx + 3] // A
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
private static func convertARGBToRGBA(
|
||||
baseAddress: UnsafeMutableRawPointer,
|
||||
width: Int,
|
||||
height: Int,
|
||||
bytesPerRow: Int
|
||||
) -> [UInt8] {
|
||||
var result = [UInt8](repeating: 0, count: width * height * 4)
|
||||
|
||||
for y in 0..<height {
|
||||
let srcRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
|
||||
let dstOffset = y * width * 4
|
||||
|
||||
for x in 0..<width {
|
||||
let srcIdx = x * 4
|
||||
let dstIdx = dstOffset + x * 4
|
||||
|
||||
// ARGB -> RGBA swap
|
||||
result[dstIdx + 0] = srcRow[srcIdx + 1] // R
|
||||
result[dstIdx + 1] = srcRow[srcIdx + 2] // G
|
||||
result[dstIdx + 2] = srcRow[srcIdx + 3] // B
|
||||
result[dstIdx + 3] = srcRow[srcIdx + 0] // A
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
213
Sources/LivePhotoCore/AIEnhancer/RealESRGANProcessor.swift
Normal file
213
Sources/LivePhotoCore/AIEnhancer/RealESRGANProcessor.swift
Normal file
@@ -0,0 +1,213 @@
|
||||
//
|
||||
// RealESRGANProcessor.swift
|
||||
// LivePhotoCore
|
||||
//
|
||||
// Core ML inference logic for Real-ESRGAN model.
|
||||
// This model requires fixed 512x512 input and outputs 2048x2048.
|
||||
//
|
||||
|
||||
import Accelerate
|
||||
import CoreML
|
||||
import CoreVideo
|
||||
import Foundation
|
||||
import os
|
||||
|
||||
/// Real-ESRGAN Core ML model processor
|
||||
/// Note: This model has fixed input size of 512x512
|
||||
actor RealESRGANProcessor {
|
||||
private var model: MLModel?
|
||||
private let logger = Logger(subsystem: "LivePhotoCore", category: "RealESRGANProcessor")
|
||||
|
||||
/// Fixed input size required by the model (512x512)
|
||||
static let inputSize: Int = 512
|
||||
|
||||
/// Scale factor (4x for Real-ESRGAN x4plus)
|
||||
static let scaleFactor: Int = 4
|
||||
|
||||
/// Output size (inputSize * scaleFactor = 2048)
|
||||
static let outputSize: Int = inputSize * scaleFactor // 2048
|
||||
|
||||
init() {}
|
||||
|
||||
/// Load Core ML model from bundle
|
||||
func loadModel() async throws {
|
||||
guard model == nil else {
|
||||
logger.debug("Model already loaded")
|
||||
return
|
||||
}
|
||||
|
||||
logger.info("Loading Real-ESRGAN Core ML model...")
|
||||
|
||||
// Try to find model in bundle
|
||||
let modelName = "RealESRGAN_x4plus"
|
||||
var modelURL: URL?
|
||||
|
||||
// Try SPM bundle first
|
||||
#if SWIFT_PACKAGE
|
||||
if let url = Bundle.module.url(forResource: modelName, withExtension: "mlmodelc") {
|
||||
modelURL = url
|
||||
} else if let url = Bundle.module.url(forResource: modelName, withExtension: "mlpackage") {
|
||||
modelURL = url
|
||||
}
|
||||
#endif
|
||||
|
||||
// Try main bundle
|
||||
if modelURL == nil {
|
||||
if let url = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") {
|
||||
modelURL = url
|
||||
} else if let url = Bundle.main.url(forResource: modelName, withExtension: "mlpackage") {
|
||||
modelURL = url
|
||||
}
|
||||
}
|
||||
|
||||
guard let url = modelURL else {
|
||||
logger.error("Model file not found: \(modelName)")
|
||||
throw AIEnhanceError.modelNotFound
|
||||
}
|
||||
|
||||
logger.info("Found model at: \(url.path)")
|
||||
|
||||
// Configure model for optimal performance
|
||||
let config = MLModelConfiguration()
|
||||
config.computeUnits = .all // Use Neural Engine when available
|
||||
|
||||
do {
|
||||
model = try await MLModel.load(contentsOf: url, configuration: config)
|
||||
logger.info("Model loaded successfully")
|
||||
} catch {
|
||||
logger.error("Failed to load model: \(error.localizedDescription)")
|
||||
throw AIEnhanceError.modelLoadFailed(error.localizedDescription)
|
||||
}
|
||||
}
|
||||
|
||||
/// Unload model from memory
|
||||
func unloadModel() {
|
||||
model = nil
|
||||
logger.info("Model unloaded from memory")
|
||||
}
|
||||
|
||||
/// Process a 512x512 image through the model
|
||||
/// - Parameter pixelBuffer: Input image as CVPixelBuffer (must be 512x512, BGRA format)
|
||||
/// - Returns: Enhanced image as RGBA data array (2048x2048)
|
||||
func processImage(_ pixelBuffer: CVPixelBuffer) async throws -> [UInt8] {
|
||||
guard let model else {
|
||||
throw AIEnhanceError.modelNotFound
|
||||
}
|
||||
|
||||
// Verify input size
|
||||
let width = CVPixelBufferGetWidth(pixelBuffer)
|
||||
let height = CVPixelBufferGetHeight(pixelBuffer)
|
||||
guard width == Self.inputSize, height == Self.inputSize else {
|
||||
throw AIEnhanceError.inferenceError(
|
||||
"Invalid input size \(width)x\(height), expected \(Self.inputSize)x\(Self.inputSize)"
|
||||
)
|
||||
}
|
||||
|
||||
// Check for cancellation
|
||||
try Task.checkCancellation()
|
||||
|
||||
logger.info("Running inference on \(width)x\(height) image...")
|
||||
|
||||
// Run inference synchronously (MLModel prediction is thread-safe)
|
||||
let output: [UInt8] = try await withCheckedThrowingContinuation { continuation in
|
||||
DispatchQueue.global(qos: .userInitiated).async {
|
||||
do {
|
||||
// Create input feature from pixel buffer
|
||||
let inputFeature = try MLFeatureValue(pixelBuffer: pixelBuffer)
|
||||
let inputProvider = try MLDictionaryFeatureProvider(
|
||||
dictionary: ["input": inputFeature]
|
||||
)
|
||||
|
||||
// Run inference synchronously
|
||||
let prediction = try model.prediction(from: inputProvider)
|
||||
|
||||
// Extract output from model
|
||||
// The model outputs to "activation_out" as either MultiArray or Image
|
||||
let rgbaData: [UInt8]
|
||||
|
||||
if let outputValue = prediction.featureValue(for: "activation_out") {
|
||||
if let multiArray = outputValue.multiArrayValue {
|
||||
// Output is MLMultiArray with shape [C, H, W]
|
||||
self.logger.info("Output is MultiArray: \(multiArray.shape)")
|
||||
rgbaData = try self.multiArrayToRGBA(multiArray)
|
||||
} else if let outputBuffer = outputValue.imageBufferValue {
|
||||
// Output is CVPixelBuffer (image)
|
||||
let outWidth = CVPixelBufferGetWidth(outputBuffer)
|
||||
let outHeight = CVPixelBufferGetHeight(outputBuffer)
|
||||
self.logger.info("Output is Image: \(outWidth)x\(outHeight)")
|
||||
rgbaData = try ImageFormatConverter.pixelBufferToRGBAData(outputBuffer)
|
||||
} else {
|
||||
continuation.resume(throwing: AIEnhanceError.inferenceError(
|
||||
"Cannot extract data from model output"
|
||||
))
|
||||
return
|
||||
}
|
||||
} else {
|
||||
continuation.resume(throwing: AIEnhanceError.inferenceError(
|
||||
"Model output 'activation_out' not found"
|
||||
))
|
||||
return
|
||||
}
|
||||
|
||||
continuation.resume(returning: rgbaData)
|
||||
} catch let error as AIEnhanceError {
|
||||
continuation.resume(throwing: error)
|
||||
} catch {
|
||||
continuation.resume(throwing: AIEnhanceError.inferenceError(error.localizedDescription))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
/// Convert MLMultiArray [C, H, W] to RGBA byte array
|
||||
/// - Parameter multiArray: Output from model with shape [3, H, W] (RGB channels)
|
||||
/// - Returns: RGBA byte array with shape [H * W * 4]
|
||||
private func multiArrayToRGBA(_ multiArray: MLMultiArray) throws -> [UInt8] {
|
||||
let shape = multiArray.shape.map { $0.intValue }
|
||||
|
||||
// Expect shape [3, H, W] for RGB
|
||||
guard shape.count == 3, shape[0] == 3 else {
|
||||
throw AIEnhanceError.inferenceError(
|
||||
"Unexpected output shape: \(shape), expected [3, H, W]"
|
||||
)
|
||||
}
|
||||
|
||||
let channels = shape[0]
|
||||
let height = shape[1]
|
||||
let width = shape[2]
|
||||
|
||||
logger.info("Converting MultiArray \(channels)x\(height)x\(width) to RGBA")
|
||||
|
||||
// Output array: RGBA format
|
||||
var rgbaData = [UInt8](repeating: 255, count: width * height * 4)
|
||||
|
||||
// Get pointer to MultiArray data
|
||||
let dataPointer = multiArray.dataPointer.assumingMemoryBound(to: Float32.self)
|
||||
let channelStride = height * width
|
||||
|
||||
// Convert CHW (channel-first) to RGBA (interleaved)
|
||||
// Model output is typically in range [0, 1] or [-1, 1], need to scale to [0, 255]
|
||||
for y in 0..<height {
|
||||
for x in 0..<width {
|
||||
let pixelIndex = y * width + x
|
||||
let rgbaIndex = pixelIndex * 4
|
||||
|
||||
// Read RGB values from CHW layout
|
||||
let r = dataPointer[0 * channelStride + pixelIndex]
|
||||
let g = dataPointer[1 * channelStride + pixelIndex]
|
||||
let b = dataPointer[2 * channelStride + pixelIndex]
|
||||
|
||||
// Clamp and convert to 0-255
|
||||
// Model typically outputs values in [0, 1] range
|
||||
rgbaData[rgbaIndex + 0] = UInt8(clamping: Int(max(0, min(1, r)) * 255))
|
||||
rgbaData[rgbaIndex + 1] = UInt8(clamping: Int(max(0, min(1, g)) * 255))
|
||||
rgbaData[rgbaIndex + 2] = UInt8(clamping: Int(max(0, min(1, b)) * 255))
|
||||
rgbaData[rgbaIndex + 3] = 255 // Alpha
|
||||
}
|
||||
}
|
||||
|
||||
return rgbaData
|
||||
}
|
||||
}
|
||||
240
Sources/LivePhotoCore/AIEnhancer/TiledImageProcessor.swift
Normal file
240
Sources/LivePhotoCore/AIEnhancer/TiledImageProcessor.swift
Normal file
@@ -0,0 +1,240 @@
|
||||
//
|
||||
// WholeImageProcessor.swift
|
||||
// LivePhotoCore
|
||||
//
|
||||
// Processes images for Real-ESRGAN model with fixed 512x512 input.
|
||||
// Handles scaling, padding, and cropping to preserve original aspect ratio.
|
||||
//
|
||||
|
||||
import CoreGraphics
|
||||
import CoreVideo
|
||||
import Foundation
|
||||
import os
|
||||
|
||||
/// Processes images for the Real-ESRGAN model
|
||||
/// The model requires fixed 512x512 input and outputs 2048x2048
|
||||
struct WholeImageProcessor {
|
||||
private let logger = Logger(subsystem: "LivePhotoCore", category: "WholeImageProcessor")
|
||||
|
||||
/// Process an image through the AI model
|
||||
/// - Parameters:
|
||||
/// - inputImage: Input CGImage to enhance
|
||||
/// - processor: RealESRGAN processor for inference
|
||||
/// - progress: Optional progress callback
|
||||
/// - Returns: Enhanced image with original aspect ratio preserved
|
||||
func processImage(
|
||||
_ inputImage: CGImage,
|
||||
processor: RealESRGANProcessor,
|
||||
progress: AIEnhanceProgress?
|
||||
) async throws -> CGImage {
|
||||
let originalWidth = inputImage.width
|
||||
let originalHeight = inputImage.height
|
||||
|
||||
logger.info("Processing \(originalWidth)x\(originalHeight) image")
|
||||
progress?(0.1)
|
||||
|
||||
// Step 1: Scale and pad to 512x512
|
||||
let (paddedImage, scaleFactor, paddingInfo) = try prepareInputImage(inputImage)
|
||||
progress?(0.2)
|
||||
|
||||
// Step 2: Convert to CVPixelBuffer
|
||||
let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(paddedImage)
|
||||
progress?(0.3)
|
||||
|
||||
// Step 3: Run inference
|
||||
let outputData = try await processor.processImage(pixelBuffer)
|
||||
progress?(0.8)
|
||||
|
||||
// Step 4: Convert output to CGImage
|
||||
let outputImage = try createCGImage(
|
||||
from: outputData,
|
||||
width: RealESRGANProcessor.outputSize,
|
||||
height: RealESRGANProcessor.outputSize
|
||||
)
|
||||
progress?(0.9)
|
||||
|
||||
// Step 5: Crop padding and scale to target size
|
||||
let finalImage = try extractAndScaleOutput(
|
||||
outputImage,
|
||||
originalWidth: originalWidth,
|
||||
originalHeight: originalHeight,
|
||||
scaleFactor: scaleFactor,
|
||||
paddingInfo: paddingInfo
|
||||
)
|
||||
progress?(1.0)
|
||||
|
||||
logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)")
|
||||
return finalImage
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
/// Padding information for later extraction
|
||||
private struct PaddingInfo {
|
||||
let paddedX: Int // X offset of original content in padded image
|
||||
let paddedY: Int // Y offset of original content in padded image
|
||||
let scaledWidth: Int // Width of original content after scaling
|
||||
let scaledHeight: Int // Height of original content after scaling
|
||||
}
|
||||
|
||||
/// Prepare input image: scale to fit 1280x1280 while preserving aspect ratio, then pad
|
||||
private func prepareInputImage(_ image: CGImage) throws -> (CGImage, CGFloat, PaddingInfo) {
|
||||
let inputSize = RealESRGANProcessor.inputSize
|
||||
let originalWidth = CGFloat(image.width)
|
||||
let originalHeight = CGFloat(image.height)
|
||||
|
||||
// Calculate scale to fit within inputSize x inputSize
|
||||
let scale = min(
|
||||
CGFloat(inputSize) / originalWidth,
|
||||
CGFloat(inputSize) / originalHeight
|
||||
)
|
||||
|
||||
let scaledWidth = Int(originalWidth * scale)
|
||||
let scaledHeight = Int(originalHeight * scale)
|
||||
|
||||
// Calculate padding to center the image
|
||||
let paddingX = (inputSize - scaledWidth) / 2
|
||||
let paddingY = (inputSize - scaledHeight) / 2
|
||||
|
||||
logger.info("Scaling \(Int(originalWidth))x\(Int(originalHeight)) -> \(scaledWidth)x\(scaledHeight), padding: (\(paddingX), \(paddingY))")
|
||||
|
||||
// Create padded context
|
||||
let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB()
|
||||
guard let context = CGContext(
|
||||
data: nil,
|
||||
width: inputSize,
|
||||
height: inputSize,
|
||||
bitsPerComponent: 8,
|
||||
bytesPerRow: inputSize * 4,
|
||||
space: colorSpace,
|
||||
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
|
||||
) else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
// Fill with black (or neutral color)
|
||||
context.setFillColor(gray: 0.0, alpha: 1.0)
|
||||
context.fill(CGRect(x: 0, y: 0, width: inputSize, height: inputSize))
|
||||
|
||||
// Draw scaled image centered
|
||||
// Note: CGContext has origin at bottom-left, so we need to flip Y coordinate
|
||||
let drawRect = CGRect(x: paddingX, y: paddingY, width: scaledWidth, height: scaledHeight)
|
||||
context.draw(image, in: drawRect)
|
||||
|
||||
guard let paddedImage = context.makeImage() else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
let paddingInfo = PaddingInfo(
|
||||
paddedX: paddingX,
|
||||
paddedY: paddingY,
|
||||
scaledWidth: scaledWidth,
|
||||
scaledHeight: scaledHeight
|
||||
)
|
||||
|
||||
return (paddedImage, scale, paddingInfo)
|
||||
}
|
||||
|
||||
/// Extract the enhanced content area and scale to final size
|
||||
private func extractAndScaleOutput(
|
||||
_ outputImage: CGImage,
|
||||
originalWidth: Int,
|
||||
originalHeight: Int,
|
||||
scaleFactor: CGFloat,
|
||||
paddingInfo: PaddingInfo
|
||||
) throws -> CGImage {
|
||||
let modelScale = RealESRGANProcessor.scaleFactor
|
||||
|
||||
// Calculate crop region in output image (4x the padding info)
|
||||
let cropX = paddingInfo.paddedX * modelScale
|
||||
let cropY = paddingInfo.paddedY * modelScale
|
||||
let cropWidth = paddingInfo.scaledWidth * modelScale
|
||||
let cropHeight = paddingInfo.scaledHeight * modelScale
|
||||
|
||||
logger.info("Cropping output at (\(cropX), \(cropY)) size \(cropWidth)x\(cropHeight)")
|
||||
|
||||
// Crop the content area
|
||||
let cropRect = CGRect(x: cropX, y: cropY, width: cropWidth, height: cropHeight)
|
||||
guard let croppedImage = outputImage.cropping(to: cropRect) else {
|
||||
throw AIEnhanceError.inferenceError("Failed to crop output image")
|
||||
}
|
||||
|
||||
// Calculate final target size (4x original, capped at reasonable limit while preserving aspect ratio)
|
||||
let maxDimension = 4320 // Cap at ~4K
|
||||
let idealWidth = originalWidth * modelScale
|
||||
let idealHeight = originalHeight * modelScale
|
||||
|
||||
let targetWidth: Int
|
||||
let targetHeight: Int
|
||||
|
||||
if idealWidth <= maxDimension && idealHeight <= maxDimension {
|
||||
// Both dimensions fit within limit
|
||||
targetWidth = idealWidth
|
||||
targetHeight = idealHeight
|
||||
} else {
|
||||
// Scale down to fit within maxDimension while preserving aspect ratio
|
||||
let scale = min(Double(maxDimension) / Double(idealWidth), Double(maxDimension) / Double(idealHeight))
|
||||
targetWidth = Int(Double(idealWidth) * scale)
|
||||
targetHeight = Int(Double(idealHeight) * scale)
|
||||
}
|
||||
|
||||
// If cropped image is already the right size, return it
|
||||
if croppedImage.width == targetWidth && croppedImage.height == targetHeight {
|
||||
return croppedImage
|
||||
}
|
||||
|
||||
// Scale to target size
|
||||
let colorSpace = croppedImage.colorSpace ?? CGColorSpaceCreateDeviceRGB()
|
||||
guard let context = CGContext(
|
||||
data: nil,
|
||||
width: targetWidth,
|
||||
height: targetHeight,
|
||||
bitsPerComponent: 8,
|
||||
bytesPerRow: targetWidth * 4,
|
||||
space: colorSpace,
|
||||
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
|
||||
) else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create output context")
|
||||
}
|
||||
|
||||
context.interpolationQuality = .high
|
||||
context.draw(croppedImage, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight))
|
||||
|
||||
guard let finalImage = context.makeImage() else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create final image")
|
||||
}
|
||||
|
||||
logger.info("Final image size: \(finalImage.width)x\(finalImage.height)")
|
||||
return finalImage
|
||||
}
|
||||
|
||||
/// Create CGImage from RGBA pixel data
|
||||
private func createCGImage(from pixels: [UInt8], width: Int, height: Int) throws -> CGImage {
|
||||
let colorSpace = CGColorSpaceCreateDeviceRGB()
|
||||
let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue)
|
||||
|
||||
guard
|
||||
let provider = CGDataProvider(data: Data(pixels) as CFData),
|
||||
let image = CGImage(
|
||||
width: width,
|
||||
height: height,
|
||||
bitsPerComponent: 8,
|
||||
bitsPerPixel: 32,
|
||||
bytesPerRow: width * 4,
|
||||
space: colorSpace,
|
||||
bitmapInfo: bitmapInfo,
|
||||
provider: provider,
|
||||
decode: nil,
|
||||
shouldInterpolate: true,
|
||||
intent: .defaultIntent
|
||||
)
|
||||
else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create output image")
|
||||
}
|
||||
|
||||
return image
|
||||
}
|
||||
}
|
||||
|
||||
// Keep the old name as a typealias for compatibility
|
||||
typealias TiledImageProcessor = WholeImageProcessor
|
||||
Reference in New Issue
Block a user