Files
to-live-photo/Sources/LivePhotoCore/AIEnhancer/RealESRGANProcessor.swift
empty 4bcad4d4b8 fix: 安全审查 P0-P2 问题修复(26项)
P0 关键修复:
- 移除 exit(0) 强制退出,改为应用语言设置后下次启动生效
- 修复 LivePhotoValidator hasResumed data race,引入线程安全 ResumeOnce
- 修复 addAssetID(toVideo:) continuation 泄漏,添加 writer/reader 启动状态检查
- 修复 OnboardingView "跳过" 按钮未国际化
- 修复 LanguageManager "跟随系统" 硬编码中文
- .gitignore 补全 AI 工具目录

P1 架构与 UI 修复:
- 修复 RealESRGANProcessor actor 隔离违规
- 修复 ODRManager continuation 生命周期保护
- TiledImageProcessor 改为流式拼接,降低内存峰值
- EditorView 硬编码颜色统一为设计系统
- ProcessingView 取消导航竞态修复
- 反馈诊断包添加知情同意提示

P2 代码质量与合规:
- EditorView/WallpaperGuideView 硬编码间距圆角统一为设计令牌
- PrivacyPolicyView 设计系统颜色统一
- HomeView 重复 onChange 合并
- PHAuthorizationStatus 改为英文技术术语
- Analytics 日志 assetId 脱敏
- 隐私政策补充 localIdentifier 存储说明
- 清理孤立的 subscription 翻译 key
- 脚本硬编码绝对路径改为相对路径
- DesignSystem SoftSlider 类型不匹配编译错误修复

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-07 20:04:41 +08:00

212 lines
8.1 KiB
Swift

//
// RealESRGANProcessor.swift
// LivePhotoCore
//
// Core ML inference logic for Real-ESRGAN model.
// This model requires fixed 512x512 input and outputs 2048x2048.
//
import Accelerate
import CoreML
import CoreVideo
import Foundation
import os
/// Real-ESRGAN Core ML model processor
/// Note: This model has fixed input size of 512x512
actor RealESRGANProcessor {
private var model: MLModel?
private let logger = Logger(subsystem: "LivePhotoCore", category: "RealESRGANProcessor")
/// Fixed input size required by the model (512x512)
static let inputSize: Int = 512
/// Scale factor (4x for Real-ESRGAN x4plus)
static let scaleFactor: Int = 4
/// Output size (inputSize * scaleFactor = 2048)
static let outputSize: Int = inputSize * scaleFactor // 2048
init() {}
/// Load Core ML model from ODR or bundle
func loadModel() async throws {
guard model == nil else {
logger.debug("Model already loaded")
return
}
logger.info("Loading Real-ESRGAN Core ML model...")
// 1. Try ODRManager first (supports both ODR download and bundle fallback)
var modelURL = await ODRManager.shared.getModelURL()
// 2. If ODRManager returns nil, try direct bundle lookup as fallback
if modelURL == nil {
let modelName = "RealESRGAN_x4plus"
// Try main bundle
if let url = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") {
modelURL = url
} else if let url = Bundle.main.url(forResource: modelName, withExtension: "mlpackage") {
modelURL = url
}
// Try SPM bundle (development)
#if SWIFT_PACKAGE
if modelURL == nil {
if let url = Bundle.module.url(forResource: modelName, withExtension: "mlmodelc") {
modelURL = url
} else if let url = Bundle.module.url(forResource: modelName, withExtension: "mlpackage") {
modelURL = url
}
}
#endif
}
guard let url = modelURL else {
logger.error("Model not found. Please download the AI model first.")
throw AIEnhanceError.modelNotFound
}
logger.info("Found model at: \(url.path)")
// Configure model for optimal performance
let config = MLModelConfiguration()
config.computeUnits = .all // Use Neural Engine when available
do {
model = try await MLModel.load(contentsOf: url, configuration: config)
logger.info("Model loaded successfully")
} catch {
logger.error("Failed to load model: \(error.localizedDescription)")
throw AIEnhanceError.modelLoadFailed(error.localizedDescription)
}
}
/// Unload model from memory
func unloadModel() {
model = nil
logger.info("Model unloaded from memory")
}
/// Process a 512x512 image through the model
/// - Parameter pixelBuffer: Input image as CVPixelBuffer (must be 512x512, BGRA format)
/// - Returns: Enhanced image as RGBA data array (2048x2048)
func processImage(_ pixelBuffer: CVPixelBuffer) async throws -> [UInt8] {
guard let model else {
throw AIEnhanceError.modelNotFound
}
// Verify input size
let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)
guard width == Self.inputSize, height == Self.inputSize else {
throw AIEnhanceError.inferenceError(
"Invalid input size \(width)x\(height), expected \(Self.inputSize)x\(Self.inputSize)"
)
}
// Check for cancellation
try Task.checkCancellation()
logger.info("Running inference on \(width)x\(height) image...")
// Capture actor-isolated state before entering non-isolated closure
let localModel = model
// Run inference on background queue (MLModel prediction is thread-safe)
let output: [UInt8] = try await withCheckedThrowingContinuation { continuation in
DispatchQueue.global(qos: .userInitiated).async {
do {
// Create input feature from pixel buffer
let inputFeature = try MLFeatureValue(pixelBuffer: pixelBuffer)
let inputProvider = try MLDictionaryFeatureProvider(
dictionary: ["input": inputFeature]
)
// Run inference synchronously
let prediction = try localModel.prediction(from: inputProvider)
// Extract output from model
let rgbaData: [UInt8]
if let outputValue = prediction.featureValue(for: "activation_out") {
if let multiArray = outputValue.multiArrayValue {
rgbaData = try Self.multiArrayToRGBA(multiArray)
} else if let outputBuffer = outputValue.imageBufferValue {
rgbaData = try ImageFormatConverter.pixelBufferToRGBAData(outputBuffer)
} else {
continuation.resume(throwing: AIEnhanceError.inferenceError(
"Cannot extract data from model output"
))
return
}
} else {
continuation.resume(throwing: AIEnhanceError.inferenceError(
"Model output 'activation_out' not found"
))
return
}
continuation.resume(returning: rgbaData)
} catch let error as AIEnhanceError {
continuation.resume(throwing: error)
} catch {
continuation.resume(throwing: AIEnhanceError.inferenceError(error.localizedDescription))
}
}
}
logger.info("Inference completed, output size: \(output.count) bytes")
return output
}
/// Convert MLMultiArray [C, H, W] to RGBA byte array
/// - Parameter multiArray: Output from model with shape [3, H, W] (RGB channels)
/// - Returns: RGBA byte array with shape [H * W * 4]
private static func multiArrayToRGBA(_ multiArray: MLMultiArray) throws -> [UInt8] {
let shape = multiArray.shape.map { $0.intValue }
// Expect shape [3, H, W] for RGB
guard shape.count == 3, shape[0] == 3 else {
throw AIEnhanceError.inferenceError(
"Unexpected output shape: \(shape), expected [3, H, W]"
)
}
let height = shape[1]
let width = shape[2]
// Output array: RGBA format
var rgbaData = [UInt8](repeating: 255, count: width * height * 4)
// Get pointer to MultiArray data
let dataPointer = multiArray.dataPointer.assumingMemoryBound(to: Float32.self)
let channelStride = height * width
// Convert CHW (channel-first) to RGBA (interleaved)
// Model output is typically in range [0, 1] or [-1, 1], need to scale to [0, 255]
for y in 0..<height {
for x in 0..<width {
let pixelIndex = y * width + x
let rgbaIndex = pixelIndex * 4
// Read RGB values from CHW layout
let r = dataPointer[0 * channelStride + pixelIndex]
let g = dataPointer[1 * channelStride + pixelIndex]
let b = dataPointer[2 * channelStride + pixelIndex]
// Clamp and convert to 0-255
// Model typically outputs values in [0, 1] range
rgbaData[rgbaIndex + 0] = UInt8(clamping: Int(max(0, min(1, r)) * 255))
rgbaData[rgbaIndex + 1] = UInt8(clamping: Int(max(0, min(1, g)) * 255))
rgbaData[rgbaIndex + 2] = UInt8(clamping: Int(max(0, min(1, b)) * 255))
rgbaData[rgbaIndex + 3] = 255 // Alpha
}
}
return rgbaData
}
}