feat: M2-M4 完成,添加 AI 增强、设计系统、App Store 准备
新增功能: - AI 超分辨率模块 (Real-ESRGAN Core ML) - Soft UI 设计系统 (DesignSystem.swift) - 设置页、隐私政策页、引导页 - 最近作品管理器 App Store 准备: - 完善截图 (iPhone 6.7"/6.5", iPad 12.9") - App Store 元数据文档 - 修复应用图标 alpha 通道 - 更新显示名称为 Live Photo Studio 工程配置: - 配置 Git LFS 跟踪 mlmodel 文件 - 添加 Claude skill 开发指南 - 更新 .gitignore 规则 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
207
Sources/LivePhotoCore/AIEnhancer/AIEnhancer.swift
Normal file
207
Sources/LivePhotoCore/AIEnhancer/AIEnhancer.swift
Normal file
@@ -0,0 +1,207 @@
|
||||
//
|
||||
// AIEnhancer.swift
|
||||
// LivePhotoCore
|
||||
//
|
||||
// AI super-resolution enhancement using Real-ESRGAN Core ML model.
|
||||
//
|
||||
|
||||
import CoreGraphics
|
||||
import CoreML
|
||||
import Foundation
|
||||
import os
|
||||
|
||||
// MARK: - Configuration
|
||||
|
||||
/// AI enhancement configuration
|
||||
public struct AIEnhanceConfig: Codable, Sendable, Hashable {
|
||||
/// Enable AI super-resolution
|
||||
public var enabled: Bool
|
||||
|
||||
public init(enabled: Bool = false) {
|
||||
self.enabled = enabled
|
||||
}
|
||||
|
||||
/// Disabled configuration
|
||||
public static let disabled = AIEnhanceConfig(enabled: false)
|
||||
|
||||
/// Standard configuration
|
||||
public static let standard = AIEnhanceConfig(enabled: true)
|
||||
}
|
||||
|
||||
// MARK: - Result
|
||||
|
||||
/// AI enhancement result
|
||||
public struct AIEnhanceResult: Sendable {
|
||||
/// Enhanced image
|
||||
public let enhancedImage: CGImage
|
||||
|
||||
/// Original image size
|
||||
public let originalSize: CGSize
|
||||
|
||||
/// Enhanced image size
|
||||
public let enhancedSize: CGSize
|
||||
|
||||
/// Processing time in milliseconds
|
||||
public let processingTimeMs: Double
|
||||
}
|
||||
|
||||
// MARK: - Errors
|
||||
|
||||
/// AI enhancement error types
|
||||
public enum AIEnhanceError: Error, Sendable, LocalizedError {
|
||||
case modelNotFound
|
||||
case modelLoadFailed(String)
|
||||
case inputImageInvalid
|
||||
case inferenceError(String)
|
||||
case memoryPressure
|
||||
case cancelled
|
||||
case deviceNotSupported
|
||||
|
||||
public var errorDescription: String? {
|
||||
switch self {
|
||||
case .modelNotFound:
|
||||
return "AI model file not found in bundle"
|
||||
case let .modelLoadFailed(reason):
|
||||
return "Failed to load AI model: \(reason)"
|
||||
case .inputImageInvalid:
|
||||
return "Input image is invalid or cannot be processed"
|
||||
case let .inferenceError(reason):
|
||||
return "AI inference failed: \(reason)"
|
||||
case .memoryPressure:
|
||||
return "Not enough memory for AI processing"
|
||||
case .cancelled:
|
||||
return "AI enhancement was cancelled"
|
||||
case .deviceNotSupported:
|
||||
return "Device does not support AI enhancement"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Progress
|
||||
|
||||
/// Progress callback for AI enhancement
|
||||
/// - Parameter progress: Value from 0.0 to 1.0
|
||||
public typealias AIEnhanceProgress = @Sendable (Double) -> Void
|
||||
|
||||
// MARK: - Main Actor
|
||||
|
||||
/// AI enhancement actor for super-resolution processing
|
||||
public actor AIEnhancer {
|
||||
private let config: AIEnhanceConfig
|
||||
private var processor: RealESRGANProcessor?
|
||||
private let logger = Logger(subsystem: "LivePhotoCore", category: "AIEnhancer")
|
||||
|
||||
/// Scale factor (4 for Real-ESRGAN x4plus)
|
||||
public static let scaleFactor: Int = 4
|
||||
|
||||
/// Initialize with configuration
|
||||
public init(config: AIEnhanceConfig = .standard) {
|
||||
self.config = config
|
||||
}
|
||||
|
||||
// MARK: - Device Capability
|
||||
|
||||
/// Check if AI enhancement is available on this device
|
||||
public static func isAvailable() -> Bool {
|
||||
// Require iOS 17+
|
||||
guard #available(iOS 17.0, *) else {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check device memory (require at least 4GB)
|
||||
let totalMemory = ProcessInfo.processInfo.physicalMemory
|
||||
let memoryGB = Double(totalMemory) / (1024 * 1024 * 1024)
|
||||
guard memoryGB >= 4.0 else {
|
||||
return false
|
||||
}
|
||||
|
||||
// Neural Engine is available on A12+ (iPhone XS and later)
|
||||
// iOS 17 requirement ensures A12+ is present
|
||||
return true
|
||||
}
|
||||
|
||||
// MARK: - Model Management
|
||||
|
||||
/// Preload the model (call during app launch or settings change)
|
||||
public func preloadModel() async throws {
|
||||
guard AIEnhancer.isAvailable() else {
|
||||
throw AIEnhanceError.deviceNotSupported
|
||||
}
|
||||
|
||||
guard processor == nil else {
|
||||
logger.debug("Model already loaded")
|
||||
return
|
||||
}
|
||||
|
||||
logger.info("Preloading Real-ESRGAN model...")
|
||||
|
||||
processor = RealESRGANProcessor()
|
||||
try await processor?.loadModel()
|
||||
|
||||
logger.info("Model preloaded successfully")
|
||||
}
|
||||
|
||||
/// Release model from memory
|
||||
public func unloadModel() async {
|
||||
await processor?.unloadModel()
|
||||
processor = nil
|
||||
logger.info("Model unloaded")
|
||||
}
|
||||
|
||||
// MARK: - Enhancement
|
||||
|
||||
/// Enhance a single image with AI super-resolution
|
||||
/// - Parameters:
|
||||
/// - image: Input CGImage to enhance
|
||||
/// - progress: Optional progress callback (0.0 to 1.0)
|
||||
/// - Returns: Enhanced result with metadata
|
||||
public func enhance(
|
||||
image: CGImage,
|
||||
progress: AIEnhanceProgress? = nil
|
||||
) async throws -> AIEnhanceResult {
|
||||
guard config.enabled else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
guard AIEnhancer.isAvailable() else {
|
||||
throw AIEnhanceError.deviceNotSupported
|
||||
}
|
||||
|
||||
let startTime = CFAbsoluteTimeGetCurrent()
|
||||
let originalSize = CGSize(width: image.width, height: image.height)
|
||||
|
||||
logger.info("Starting AI enhancement: \(image.width)x\(image.height)")
|
||||
|
||||
// Ensure model is loaded
|
||||
if processor == nil {
|
||||
try await preloadModel()
|
||||
}
|
||||
|
||||
guard let processor = processor else {
|
||||
throw AIEnhanceError.modelNotFound
|
||||
}
|
||||
|
||||
// Process image (no tiling - model has fixed 1280x1280 input)
|
||||
let wholeImageProcessor = WholeImageProcessor()
|
||||
|
||||
let enhancedImage = try await wholeImageProcessor.processImage(
|
||||
image,
|
||||
processor: processor,
|
||||
progress: progress
|
||||
)
|
||||
|
||||
let processingTime = (CFAbsoluteTimeGetCurrent() - startTime) * 1000
|
||||
let enhancedSize = CGSize(width: enhancedImage.width, height: enhancedImage.height)
|
||||
|
||||
logger.info(
|
||||
"AI enhancement complete: \(Int(originalSize.width))x\(Int(originalSize.height)) -> \(Int(enhancedSize.width))x\(Int(enhancedSize.height)) in \(Int(processingTime))ms"
|
||||
)
|
||||
|
||||
return AIEnhanceResult(
|
||||
enhancedImage: enhancedImage,
|
||||
originalSize: originalSize,
|
||||
enhancedSize: enhancedSize,
|
||||
processingTimeMs: processingTime
|
||||
)
|
||||
}
|
||||
}
|
||||
261
Sources/LivePhotoCore/AIEnhancer/ImageFormatConverter.swift
Normal file
261
Sources/LivePhotoCore/AIEnhancer/ImageFormatConverter.swift
Normal file
@@ -0,0 +1,261 @@
|
||||
//
|
||||
// ImageFormatConverter.swift
|
||||
// LivePhotoCore
|
||||
//
|
||||
// Utilities for converting between CGImage and CVPixelBuffer formats.
|
||||
//
|
||||
|
||||
import Accelerate
|
||||
import CoreGraphics
|
||||
import CoreVideo
|
||||
import Foundation
|
||||
import VideoToolbox
|
||||
|
||||
/// Utilities for image format conversion
|
||||
enum ImageFormatConverter {
|
||||
/// Convert CGImage to CVPixelBuffer for Core ML input
|
||||
/// - Parameters:
|
||||
/// - image: Input CGImage
|
||||
/// - pixelFormat: Output pixel format (default BGRA)
|
||||
/// - Returns: CVPixelBuffer ready for Core ML
|
||||
static func cgImageToPixelBuffer(
|
||||
_ image: CGImage,
|
||||
pixelFormat: OSType = kCVPixelFormatType_32BGRA
|
||||
) throws -> CVPixelBuffer {
|
||||
let width = image.width
|
||||
let height = image.height
|
||||
|
||||
// Create pixel buffer
|
||||
var pixelBuffer: CVPixelBuffer?
|
||||
let attrs: [CFString: Any] = [
|
||||
kCVPixelBufferCGImageCompatibilityKey: true,
|
||||
kCVPixelBufferCGBitmapContextCompatibilityKey: true,
|
||||
kCVPixelBufferMetalCompatibilityKey: true,
|
||||
]
|
||||
|
||||
let status = CVPixelBufferCreate(
|
||||
kCFAllocatorDefault,
|
||||
width,
|
||||
height,
|
||||
pixelFormat,
|
||||
attrs as CFDictionary,
|
||||
&pixelBuffer
|
||||
)
|
||||
|
||||
guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
// Lock buffer for writing
|
||||
CVPixelBufferLockBaseAddress(buffer, [])
|
||||
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
|
||||
|
||||
guard let baseAddress = CVPixelBufferGetBaseAddress(buffer) else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
let bytesPerRow = CVPixelBufferGetBytesPerRow(buffer)
|
||||
let colorSpace = CGColorSpaceCreateDeviceRGB()
|
||||
|
||||
// Create bitmap context to draw into pixel buffer
|
||||
guard
|
||||
let context = CGContext(
|
||||
data: baseAddress,
|
||||
width: width,
|
||||
height: height,
|
||||
bitsPerComponent: 8,
|
||||
bytesPerRow: bytesPerRow,
|
||||
space: colorSpace,
|
||||
bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue
|
||||
| CGBitmapInfo.byteOrder32Little.rawValue
|
||||
)
|
||||
else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
// Draw image into context (this converts the format)
|
||||
context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
|
||||
|
||||
return buffer
|
||||
}
|
||||
|
||||
/// Convert CVPixelBuffer to CGImage
|
||||
/// - Parameter pixelBuffer: Input pixel buffer
|
||||
/// - Returns: CGImage representation
|
||||
static func pixelBufferToCGImage(_ pixelBuffer: CVPixelBuffer) throws -> CGImage {
|
||||
var cgImage: CGImage?
|
||||
VTCreateCGImageFromCVPixelBuffer(pixelBuffer, options: nil, imageOut: &cgImage)
|
||||
|
||||
guard let image = cgImage else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create CGImage from pixel buffer")
|
||||
}
|
||||
|
||||
return image
|
||||
}
|
||||
|
||||
/// Extract raw RGBA pixel data from CVPixelBuffer
|
||||
/// - Parameter pixelBuffer: Input pixel buffer
|
||||
/// - Returns: Array of RGBA bytes
|
||||
static func pixelBufferToRGBAData(_ pixelBuffer: CVPixelBuffer) throws -> [UInt8] {
|
||||
CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
|
||||
defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
|
||||
|
||||
let width = CVPixelBufferGetWidth(pixelBuffer)
|
||||
let height = CVPixelBufferGetHeight(pixelBuffer)
|
||||
let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
|
||||
|
||||
guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else {
|
||||
throw AIEnhanceError.inferenceError("Cannot access pixel buffer data")
|
||||
}
|
||||
|
||||
let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
|
||||
|
||||
// Handle BGRA format (most common from Core ML)
|
||||
if pixelFormat == kCVPixelFormatType_32BGRA {
|
||||
return convertBGRAToRGBA(
|
||||
baseAddress: baseAddress,
|
||||
width: width,
|
||||
height: height,
|
||||
bytesPerRow: bytesPerRow
|
||||
)
|
||||
}
|
||||
|
||||
// Handle RGBA format
|
||||
if pixelFormat == kCVPixelFormatType_32RGBA {
|
||||
var result = [UInt8](repeating: 0, count: width * height * 4)
|
||||
for y in 0..<height {
|
||||
let srcRow = baseAddress.advanced(by: y * bytesPerRow)
|
||||
let dstOffset = y * width * 4
|
||||
memcpy(&result[dstOffset], srcRow, width * 4)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Handle ARGB format
|
||||
if pixelFormat == kCVPixelFormatType_32ARGB {
|
||||
return convertARGBToRGBA(
|
||||
baseAddress: baseAddress,
|
||||
width: width,
|
||||
height: height,
|
||||
bytesPerRow: bytesPerRow
|
||||
)
|
||||
}
|
||||
|
||||
throw AIEnhanceError.inferenceError("Unsupported pixel format: \(pixelFormat)")
|
||||
}
|
||||
|
||||
/// Create CVPixelBuffer from raw RGBA data
|
||||
/// - Parameters:
|
||||
/// - rgbaData: RGBA pixel data
|
||||
/// - width: Image width
|
||||
/// - height: Image height
|
||||
/// - Returns: CVPixelBuffer
|
||||
static func rgbaDataToPixelBuffer(
|
||||
_ rgbaData: [UInt8],
|
||||
width: Int,
|
||||
height: Int
|
||||
) throws -> CVPixelBuffer {
|
||||
var pixelBuffer: CVPixelBuffer?
|
||||
let attrs: [CFString: Any] = [
|
||||
kCVPixelBufferCGImageCompatibilityKey: true,
|
||||
kCVPixelBufferCGBitmapContextCompatibilityKey: true,
|
||||
]
|
||||
|
||||
let status = CVPixelBufferCreate(
|
||||
kCFAllocatorDefault,
|
||||
width,
|
||||
height,
|
||||
kCVPixelFormatType_32BGRA,
|
||||
attrs as CFDictionary,
|
||||
&pixelBuffer
|
||||
)
|
||||
|
||||
guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
CVPixelBufferLockBaseAddress(buffer, [])
|
||||
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
|
||||
|
||||
guard let baseAddress = CVPixelBufferGetBaseAddress(buffer) else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
let bytesPerRow = CVPixelBufferGetBytesPerRow(buffer)
|
||||
|
||||
// Convert RGBA to BGRA while copying
|
||||
for y in 0..<height {
|
||||
let dstRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
|
||||
let srcOffset = y * width * 4
|
||||
|
||||
for x in 0..<width {
|
||||
let srcIdx = srcOffset + x * 4
|
||||
let dstIdx = x * 4
|
||||
|
||||
// RGBA -> BGRA swap
|
||||
dstRow[dstIdx + 0] = rgbaData[srcIdx + 2] // B
|
||||
dstRow[dstIdx + 1] = rgbaData[srcIdx + 1] // G
|
||||
dstRow[dstIdx + 2] = rgbaData[srcIdx + 0] // R
|
||||
dstRow[dstIdx + 3] = rgbaData[srcIdx + 3] // A
|
||||
}
|
||||
}
|
||||
|
||||
return buffer
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
private static func convertBGRAToRGBA(
|
||||
baseAddress: UnsafeMutableRawPointer,
|
||||
width: Int,
|
||||
height: Int,
|
||||
bytesPerRow: Int
|
||||
) -> [UInt8] {
|
||||
var result = [UInt8](repeating: 0, count: width * height * 4)
|
||||
|
||||
for y in 0..<height {
|
||||
let srcRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
|
||||
let dstOffset = y * width * 4
|
||||
|
||||
for x in 0..<width {
|
||||
let srcIdx = x * 4
|
||||
let dstIdx = dstOffset + x * 4
|
||||
|
||||
// BGRA -> RGBA swap
|
||||
result[dstIdx + 0] = srcRow[srcIdx + 2] // R
|
||||
result[dstIdx + 1] = srcRow[srcIdx + 1] // G
|
||||
result[dstIdx + 2] = srcRow[srcIdx + 0] // B
|
||||
result[dstIdx + 3] = srcRow[srcIdx + 3] // A
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
private static func convertARGBToRGBA(
|
||||
baseAddress: UnsafeMutableRawPointer,
|
||||
width: Int,
|
||||
height: Int,
|
||||
bytesPerRow: Int
|
||||
) -> [UInt8] {
|
||||
var result = [UInt8](repeating: 0, count: width * height * 4)
|
||||
|
||||
for y in 0..<height {
|
||||
let srcRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
|
||||
let dstOffset = y * width * 4
|
||||
|
||||
for x in 0..<width {
|
||||
let srcIdx = x * 4
|
||||
let dstIdx = dstOffset + x * 4
|
||||
|
||||
// ARGB -> RGBA swap
|
||||
result[dstIdx + 0] = srcRow[srcIdx + 1] // R
|
||||
result[dstIdx + 1] = srcRow[srcIdx + 2] // G
|
||||
result[dstIdx + 2] = srcRow[srcIdx + 3] // B
|
||||
result[dstIdx + 3] = srcRow[srcIdx + 0] // A
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
213
Sources/LivePhotoCore/AIEnhancer/RealESRGANProcessor.swift
Normal file
213
Sources/LivePhotoCore/AIEnhancer/RealESRGANProcessor.swift
Normal file
@@ -0,0 +1,213 @@
|
||||
//
|
||||
// RealESRGANProcessor.swift
|
||||
// LivePhotoCore
|
||||
//
|
||||
// Core ML inference logic for Real-ESRGAN model.
|
||||
// This model requires fixed 512x512 input and outputs 2048x2048.
|
||||
//
|
||||
|
||||
import Accelerate
|
||||
import CoreML
|
||||
import CoreVideo
|
||||
import Foundation
|
||||
import os
|
||||
|
||||
/// Real-ESRGAN Core ML model processor
|
||||
/// Note: This model has fixed input size of 512x512
|
||||
actor RealESRGANProcessor {
|
||||
private var model: MLModel?
|
||||
private let logger = Logger(subsystem: "LivePhotoCore", category: "RealESRGANProcessor")
|
||||
|
||||
/// Fixed input size required by the model (512x512)
|
||||
static let inputSize: Int = 512
|
||||
|
||||
/// Scale factor (4x for Real-ESRGAN x4plus)
|
||||
static let scaleFactor: Int = 4
|
||||
|
||||
/// Output size (inputSize * scaleFactor = 2048)
|
||||
static let outputSize: Int = inputSize * scaleFactor // 2048
|
||||
|
||||
init() {}
|
||||
|
||||
/// Load Core ML model from bundle
|
||||
func loadModel() async throws {
|
||||
guard model == nil else {
|
||||
logger.debug("Model already loaded")
|
||||
return
|
||||
}
|
||||
|
||||
logger.info("Loading Real-ESRGAN Core ML model...")
|
||||
|
||||
// Try to find model in bundle
|
||||
let modelName = "RealESRGAN_x4plus"
|
||||
var modelURL: URL?
|
||||
|
||||
// Try SPM bundle first
|
||||
#if SWIFT_PACKAGE
|
||||
if let url = Bundle.module.url(forResource: modelName, withExtension: "mlmodelc") {
|
||||
modelURL = url
|
||||
} else if let url = Bundle.module.url(forResource: modelName, withExtension: "mlpackage") {
|
||||
modelURL = url
|
||||
}
|
||||
#endif
|
||||
|
||||
// Try main bundle
|
||||
if modelURL == nil {
|
||||
if let url = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") {
|
||||
modelURL = url
|
||||
} else if let url = Bundle.main.url(forResource: modelName, withExtension: "mlpackage") {
|
||||
modelURL = url
|
||||
}
|
||||
}
|
||||
|
||||
guard let url = modelURL else {
|
||||
logger.error("Model file not found: \(modelName)")
|
||||
throw AIEnhanceError.modelNotFound
|
||||
}
|
||||
|
||||
logger.info("Found model at: \(url.path)")
|
||||
|
||||
// Configure model for optimal performance
|
||||
let config = MLModelConfiguration()
|
||||
config.computeUnits = .all // Use Neural Engine when available
|
||||
|
||||
do {
|
||||
model = try await MLModel.load(contentsOf: url, configuration: config)
|
||||
logger.info("Model loaded successfully")
|
||||
} catch {
|
||||
logger.error("Failed to load model: \(error.localizedDescription)")
|
||||
throw AIEnhanceError.modelLoadFailed(error.localizedDescription)
|
||||
}
|
||||
}
|
||||
|
||||
/// Unload model from memory
|
||||
func unloadModel() {
|
||||
model = nil
|
||||
logger.info("Model unloaded from memory")
|
||||
}
|
||||
|
||||
/// Process a 512x512 image through the model
|
||||
/// - Parameter pixelBuffer: Input image as CVPixelBuffer (must be 512x512, BGRA format)
|
||||
/// - Returns: Enhanced image as RGBA data array (2048x2048)
|
||||
func processImage(_ pixelBuffer: CVPixelBuffer) async throws -> [UInt8] {
|
||||
guard let model else {
|
||||
throw AIEnhanceError.modelNotFound
|
||||
}
|
||||
|
||||
// Verify input size
|
||||
let width = CVPixelBufferGetWidth(pixelBuffer)
|
||||
let height = CVPixelBufferGetHeight(pixelBuffer)
|
||||
guard width == Self.inputSize, height == Self.inputSize else {
|
||||
throw AIEnhanceError.inferenceError(
|
||||
"Invalid input size \(width)x\(height), expected \(Self.inputSize)x\(Self.inputSize)"
|
||||
)
|
||||
}
|
||||
|
||||
// Check for cancellation
|
||||
try Task.checkCancellation()
|
||||
|
||||
logger.info("Running inference on \(width)x\(height) image...")
|
||||
|
||||
// Run inference synchronously (MLModel prediction is thread-safe)
|
||||
let output: [UInt8] = try await withCheckedThrowingContinuation { continuation in
|
||||
DispatchQueue.global(qos: .userInitiated).async {
|
||||
do {
|
||||
// Create input feature from pixel buffer
|
||||
let inputFeature = try MLFeatureValue(pixelBuffer: pixelBuffer)
|
||||
let inputProvider = try MLDictionaryFeatureProvider(
|
||||
dictionary: ["input": inputFeature]
|
||||
)
|
||||
|
||||
// Run inference synchronously
|
||||
let prediction = try model.prediction(from: inputProvider)
|
||||
|
||||
// Extract output from model
|
||||
// The model outputs to "activation_out" as either MultiArray or Image
|
||||
let rgbaData: [UInt8]
|
||||
|
||||
if let outputValue = prediction.featureValue(for: "activation_out") {
|
||||
if let multiArray = outputValue.multiArrayValue {
|
||||
// Output is MLMultiArray with shape [C, H, W]
|
||||
self.logger.info("Output is MultiArray: \(multiArray.shape)")
|
||||
rgbaData = try self.multiArrayToRGBA(multiArray)
|
||||
} else if let outputBuffer = outputValue.imageBufferValue {
|
||||
// Output is CVPixelBuffer (image)
|
||||
let outWidth = CVPixelBufferGetWidth(outputBuffer)
|
||||
let outHeight = CVPixelBufferGetHeight(outputBuffer)
|
||||
self.logger.info("Output is Image: \(outWidth)x\(outHeight)")
|
||||
rgbaData = try ImageFormatConverter.pixelBufferToRGBAData(outputBuffer)
|
||||
} else {
|
||||
continuation.resume(throwing: AIEnhanceError.inferenceError(
|
||||
"Cannot extract data from model output"
|
||||
))
|
||||
return
|
||||
}
|
||||
} else {
|
||||
continuation.resume(throwing: AIEnhanceError.inferenceError(
|
||||
"Model output 'activation_out' not found"
|
||||
))
|
||||
return
|
||||
}
|
||||
|
||||
continuation.resume(returning: rgbaData)
|
||||
} catch let error as AIEnhanceError {
|
||||
continuation.resume(throwing: error)
|
||||
} catch {
|
||||
continuation.resume(throwing: AIEnhanceError.inferenceError(error.localizedDescription))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
/// Convert MLMultiArray [C, H, W] to RGBA byte array
|
||||
/// - Parameter multiArray: Output from model with shape [3, H, W] (RGB channels)
|
||||
/// - Returns: RGBA byte array with shape [H * W * 4]
|
||||
private func multiArrayToRGBA(_ multiArray: MLMultiArray) throws -> [UInt8] {
|
||||
let shape = multiArray.shape.map { $0.intValue }
|
||||
|
||||
// Expect shape [3, H, W] for RGB
|
||||
guard shape.count == 3, shape[0] == 3 else {
|
||||
throw AIEnhanceError.inferenceError(
|
||||
"Unexpected output shape: \(shape), expected [3, H, W]"
|
||||
)
|
||||
}
|
||||
|
||||
let channels = shape[0]
|
||||
let height = shape[1]
|
||||
let width = shape[2]
|
||||
|
||||
logger.info("Converting MultiArray \(channels)x\(height)x\(width) to RGBA")
|
||||
|
||||
// Output array: RGBA format
|
||||
var rgbaData = [UInt8](repeating: 255, count: width * height * 4)
|
||||
|
||||
// Get pointer to MultiArray data
|
||||
let dataPointer = multiArray.dataPointer.assumingMemoryBound(to: Float32.self)
|
||||
let channelStride = height * width
|
||||
|
||||
// Convert CHW (channel-first) to RGBA (interleaved)
|
||||
// Model output is typically in range [0, 1] or [-1, 1], need to scale to [0, 255]
|
||||
for y in 0..<height {
|
||||
for x in 0..<width {
|
||||
let pixelIndex = y * width + x
|
||||
let rgbaIndex = pixelIndex * 4
|
||||
|
||||
// Read RGB values from CHW layout
|
||||
let r = dataPointer[0 * channelStride + pixelIndex]
|
||||
let g = dataPointer[1 * channelStride + pixelIndex]
|
||||
let b = dataPointer[2 * channelStride + pixelIndex]
|
||||
|
||||
// Clamp and convert to 0-255
|
||||
// Model typically outputs values in [0, 1] range
|
||||
rgbaData[rgbaIndex + 0] = UInt8(clamping: Int(max(0, min(1, r)) * 255))
|
||||
rgbaData[rgbaIndex + 1] = UInt8(clamping: Int(max(0, min(1, g)) * 255))
|
||||
rgbaData[rgbaIndex + 2] = UInt8(clamping: Int(max(0, min(1, b)) * 255))
|
||||
rgbaData[rgbaIndex + 3] = 255 // Alpha
|
||||
}
|
||||
}
|
||||
|
||||
return rgbaData
|
||||
}
|
||||
}
|
||||
240
Sources/LivePhotoCore/AIEnhancer/TiledImageProcessor.swift
Normal file
240
Sources/LivePhotoCore/AIEnhancer/TiledImageProcessor.swift
Normal file
@@ -0,0 +1,240 @@
|
||||
//
|
||||
// WholeImageProcessor.swift
|
||||
// LivePhotoCore
|
||||
//
|
||||
// Processes images for Real-ESRGAN model with fixed 512x512 input.
|
||||
// Handles scaling, padding, and cropping to preserve original aspect ratio.
|
||||
//
|
||||
|
||||
import CoreGraphics
|
||||
import CoreVideo
|
||||
import Foundation
|
||||
import os
|
||||
|
||||
/// Processes images for the Real-ESRGAN model
|
||||
/// The model requires fixed 512x512 input and outputs 2048x2048
|
||||
struct WholeImageProcessor {
|
||||
private let logger = Logger(subsystem: "LivePhotoCore", category: "WholeImageProcessor")
|
||||
|
||||
/// Process an image through the AI model
|
||||
/// - Parameters:
|
||||
/// - inputImage: Input CGImage to enhance
|
||||
/// - processor: RealESRGAN processor for inference
|
||||
/// - progress: Optional progress callback
|
||||
/// - Returns: Enhanced image with original aspect ratio preserved
|
||||
func processImage(
|
||||
_ inputImage: CGImage,
|
||||
processor: RealESRGANProcessor,
|
||||
progress: AIEnhanceProgress?
|
||||
) async throws -> CGImage {
|
||||
let originalWidth = inputImage.width
|
||||
let originalHeight = inputImage.height
|
||||
|
||||
logger.info("Processing \(originalWidth)x\(originalHeight) image")
|
||||
progress?(0.1)
|
||||
|
||||
// Step 1: Scale and pad to 512x512
|
||||
let (paddedImage, scaleFactor, paddingInfo) = try prepareInputImage(inputImage)
|
||||
progress?(0.2)
|
||||
|
||||
// Step 2: Convert to CVPixelBuffer
|
||||
let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(paddedImage)
|
||||
progress?(0.3)
|
||||
|
||||
// Step 3: Run inference
|
||||
let outputData = try await processor.processImage(pixelBuffer)
|
||||
progress?(0.8)
|
||||
|
||||
// Step 4: Convert output to CGImage
|
||||
let outputImage = try createCGImage(
|
||||
from: outputData,
|
||||
width: RealESRGANProcessor.outputSize,
|
||||
height: RealESRGANProcessor.outputSize
|
||||
)
|
||||
progress?(0.9)
|
||||
|
||||
// Step 5: Crop padding and scale to target size
|
||||
let finalImage = try extractAndScaleOutput(
|
||||
outputImage,
|
||||
originalWidth: originalWidth,
|
||||
originalHeight: originalHeight,
|
||||
scaleFactor: scaleFactor,
|
||||
paddingInfo: paddingInfo
|
||||
)
|
||||
progress?(1.0)
|
||||
|
||||
logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)")
|
||||
return finalImage
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
/// Padding information for later extraction
|
||||
private struct PaddingInfo {
|
||||
let paddedX: Int // X offset of original content in padded image
|
||||
let paddedY: Int // Y offset of original content in padded image
|
||||
let scaledWidth: Int // Width of original content after scaling
|
||||
let scaledHeight: Int // Height of original content after scaling
|
||||
}
|
||||
|
||||
/// Prepare input image: scale to fit 1280x1280 while preserving aspect ratio, then pad
|
||||
private func prepareInputImage(_ image: CGImage) throws -> (CGImage, CGFloat, PaddingInfo) {
|
||||
let inputSize = RealESRGANProcessor.inputSize
|
||||
let originalWidth = CGFloat(image.width)
|
||||
let originalHeight = CGFloat(image.height)
|
||||
|
||||
// Calculate scale to fit within inputSize x inputSize
|
||||
let scale = min(
|
||||
CGFloat(inputSize) / originalWidth,
|
||||
CGFloat(inputSize) / originalHeight
|
||||
)
|
||||
|
||||
let scaledWidth = Int(originalWidth * scale)
|
||||
let scaledHeight = Int(originalHeight * scale)
|
||||
|
||||
// Calculate padding to center the image
|
||||
let paddingX = (inputSize - scaledWidth) / 2
|
||||
let paddingY = (inputSize - scaledHeight) / 2
|
||||
|
||||
logger.info("Scaling \(Int(originalWidth))x\(Int(originalHeight)) -> \(scaledWidth)x\(scaledHeight), padding: (\(paddingX), \(paddingY))")
|
||||
|
||||
// Create padded context
|
||||
let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB()
|
||||
guard let context = CGContext(
|
||||
data: nil,
|
||||
width: inputSize,
|
||||
height: inputSize,
|
||||
bitsPerComponent: 8,
|
||||
bytesPerRow: inputSize * 4,
|
||||
space: colorSpace,
|
||||
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
|
||||
) else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
// Fill with black (or neutral color)
|
||||
context.setFillColor(gray: 0.0, alpha: 1.0)
|
||||
context.fill(CGRect(x: 0, y: 0, width: inputSize, height: inputSize))
|
||||
|
||||
// Draw scaled image centered
|
||||
// Note: CGContext has origin at bottom-left, so we need to flip Y coordinate
|
||||
let drawRect = CGRect(x: paddingX, y: paddingY, width: scaledWidth, height: scaledHeight)
|
||||
context.draw(image, in: drawRect)
|
||||
|
||||
guard let paddedImage = context.makeImage() else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
let paddingInfo = PaddingInfo(
|
||||
paddedX: paddingX,
|
||||
paddedY: paddingY,
|
||||
scaledWidth: scaledWidth,
|
||||
scaledHeight: scaledHeight
|
||||
)
|
||||
|
||||
return (paddedImage, scale, paddingInfo)
|
||||
}
|
||||
|
||||
/// Extract the enhanced content area and scale to final size
|
||||
private func extractAndScaleOutput(
|
||||
_ outputImage: CGImage,
|
||||
originalWidth: Int,
|
||||
originalHeight: Int,
|
||||
scaleFactor: CGFloat,
|
||||
paddingInfo: PaddingInfo
|
||||
) throws -> CGImage {
|
||||
let modelScale = RealESRGANProcessor.scaleFactor
|
||||
|
||||
// Calculate crop region in output image (4x the padding info)
|
||||
let cropX = paddingInfo.paddedX * modelScale
|
||||
let cropY = paddingInfo.paddedY * modelScale
|
||||
let cropWidth = paddingInfo.scaledWidth * modelScale
|
||||
let cropHeight = paddingInfo.scaledHeight * modelScale
|
||||
|
||||
logger.info("Cropping output at (\(cropX), \(cropY)) size \(cropWidth)x\(cropHeight)")
|
||||
|
||||
// Crop the content area
|
||||
let cropRect = CGRect(x: cropX, y: cropY, width: cropWidth, height: cropHeight)
|
||||
guard let croppedImage = outputImage.cropping(to: cropRect) else {
|
||||
throw AIEnhanceError.inferenceError("Failed to crop output image")
|
||||
}
|
||||
|
||||
// Calculate final target size (4x original, capped at reasonable limit while preserving aspect ratio)
|
||||
let maxDimension = 4320 // Cap at ~4K
|
||||
let idealWidth = originalWidth * modelScale
|
||||
let idealHeight = originalHeight * modelScale
|
||||
|
||||
let targetWidth: Int
|
||||
let targetHeight: Int
|
||||
|
||||
if idealWidth <= maxDimension && idealHeight <= maxDimension {
|
||||
// Both dimensions fit within limit
|
||||
targetWidth = idealWidth
|
||||
targetHeight = idealHeight
|
||||
} else {
|
||||
// Scale down to fit within maxDimension while preserving aspect ratio
|
||||
let scale = min(Double(maxDimension) / Double(idealWidth), Double(maxDimension) / Double(idealHeight))
|
||||
targetWidth = Int(Double(idealWidth) * scale)
|
||||
targetHeight = Int(Double(idealHeight) * scale)
|
||||
}
|
||||
|
||||
// If cropped image is already the right size, return it
|
||||
if croppedImage.width == targetWidth && croppedImage.height == targetHeight {
|
||||
return croppedImage
|
||||
}
|
||||
|
||||
// Scale to target size
|
||||
let colorSpace = croppedImage.colorSpace ?? CGColorSpaceCreateDeviceRGB()
|
||||
guard let context = CGContext(
|
||||
data: nil,
|
||||
width: targetWidth,
|
||||
height: targetHeight,
|
||||
bitsPerComponent: 8,
|
||||
bytesPerRow: targetWidth * 4,
|
||||
space: colorSpace,
|
||||
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
|
||||
) else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create output context")
|
||||
}
|
||||
|
||||
context.interpolationQuality = .high
|
||||
context.draw(croppedImage, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight))
|
||||
|
||||
guard let finalImage = context.makeImage() else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create final image")
|
||||
}
|
||||
|
||||
logger.info("Final image size: \(finalImage.width)x\(finalImage.height)")
|
||||
return finalImage
|
||||
}
|
||||
|
||||
/// Create CGImage from RGBA pixel data
|
||||
private func createCGImage(from pixels: [UInt8], width: Int, height: Int) throws -> CGImage {
|
||||
let colorSpace = CGColorSpaceCreateDeviceRGB()
|
||||
let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue)
|
||||
|
||||
guard
|
||||
let provider = CGDataProvider(data: Data(pixels) as CFData),
|
||||
let image = CGImage(
|
||||
width: width,
|
||||
height: height,
|
||||
bitsPerComponent: 8,
|
||||
bitsPerPixel: 32,
|
||||
bytesPerRow: width * 4,
|
||||
space: colorSpace,
|
||||
bitmapInfo: bitmapInfo,
|
||||
provider: provider,
|
||||
decode: nil,
|
||||
shouldInterpolate: true,
|
||||
intent: .defaultIntent
|
||||
)
|
||||
else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create output image")
|
||||
}
|
||||
|
||||
return image
|
||||
}
|
||||
}
|
||||
|
||||
// Keep the old name as a typealias for compatibility
|
||||
typealias TiledImageProcessor = WholeImageProcessor
|
||||
@@ -10,6 +10,7 @@ import VideoToolbox
|
||||
public enum LivePhotoBuildStage: String, Codable, Sendable {
|
||||
case normalize
|
||||
case extractKeyFrame
|
||||
case aiEnhance
|
||||
case writePhotoMetadata
|
||||
case writeVideoMetadata
|
||||
case saveToAlbum
|
||||
@@ -131,6 +132,9 @@ public struct ExportParams: Codable, Sendable, Hashable {
|
||||
public var maxDimension: Int
|
||||
public var cropRect: CropRect
|
||||
public var aspectRatio: AspectRatioTemplate
|
||||
public var compatibilityMode: Bool
|
||||
public var targetFrameRate: Int
|
||||
public var aiEnhanceConfig: AIEnhanceConfig
|
||||
|
||||
public init(
|
||||
trimStart: Double = 0,
|
||||
@@ -141,7 +145,10 @@ public struct ExportParams: Codable, Sendable, Hashable {
|
||||
hdrPolicy: HDRPolicy = .toneMapToSDR,
|
||||
maxDimension: Int = 1920,
|
||||
cropRect: CropRect = .full,
|
||||
aspectRatio: AspectRatioTemplate = .original
|
||||
aspectRatio: AspectRatioTemplate = .original,
|
||||
compatibilityMode: Bool = false,
|
||||
targetFrameRate: Int = 60,
|
||||
aiEnhanceConfig: AIEnhanceConfig = .disabled
|
||||
) {
|
||||
self.trimStart = trimStart
|
||||
self.trimEnd = trimEnd
|
||||
@@ -152,6 +159,20 @@ public struct ExportParams: Codable, Sendable, Hashable {
|
||||
self.maxDimension = maxDimension
|
||||
self.cropRect = cropRect
|
||||
self.aspectRatio = aspectRatio
|
||||
self.compatibilityMode = compatibilityMode
|
||||
self.targetFrameRate = targetFrameRate
|
||||
self.aiEnhanceConfig = aiEnhanceConfig
|
||||
}
|
||||
|
||||
/// 应用兼容模式的便捷方法
|
||||
public func withCompatibilityMode() -> ExportParams {
|
||||
var params = self
|
||||
params.compatibilityMode = true
|
||||
params.maxDimension = 720
|
||||
params.targetFrameRate = 30
|
||||
params.codecPolicy = .fallbackH264
|
||||
params.hdrPolicy = .toneMapToSDR
|
||||
return params
|
||||
}
|
||||
}
|
||||
|
||||
@@ -440,58 +461,82 @@ public actor LivePhotoBuilder {
|
||||
let assetIdentifier = UUID().uuidString
|
||||
let paths = try cacheManager.makeWorkPaths(workId: workId)
|
||||
|
||||
progress?(LivePhotoBuildProgress(stage: .normalize, fraction: 0))
|
||||
let trimmedVideoURL = try await trimVideo(
|
||||
sourceURL: sourceVideoURL,
|
||||
trimStart: exportParams.trimStart,
|
||||
trimEnd: exportParams.trimEnd,
|
||||
destinationURL: paths.workDir.appendingPathComponent("trimmed.mov")
|
||||
)
|
||||
// 临时文件路径(用于清理)
|
||||
let trimmedURL = paths.workDir.appendingPathComponent("trimmed.mov")
|
||||
let scaledURL = paths.workDir.appendingPathComponent("scaled.mov")
|
||||
let keyPhotoTempURL = paths.workDir.appendingPathComponent("keyPhoto").appendingPathExtension("heic")
|
||||
|
||||
// 关键:将视频变速到约 1 秒,与 metadata.mov 的时间标记匹配
|
||||
// live-wallpaper 项目使用 CMTimeMake(550, 600) = 0.917 秒
|
||||
// 我们使用 1 秒以完全匹配 metadata.mov 的时长
|
||||
let targetDuration = CMTimeMake(value: 550, timescale: 600) // ~0.917 秒,与 live-wallpaper 一致
|
||||
progress?(LivePhotoBuildProgress(stage: .normalize, fraction: 0.5))
|
||||
let scaledVideoURL = try await scaleVideoToTargetDuration(
|
||||
sourceURL: trimmedVideoURL,
|
||||
targetDuration: targetDuration,
|
||||
cropRect: exportParams.cropRect,
|
||||
aspectRatio: exportParams.aspectRatio,
|
||||
destinationURL: paths.workDir.appendingPathComponent("scaled.mov")
|
||||
)
|
||||
|
||||
// 计算关键帧时间:目标视频的中间位置(0.5 秒处,与 metadata.mov 的 still-image-time 匹配)
|
||||
let relativeKeyFrameTime = 0.5 // 固定为 0.5 秒,与 metadata.mov 匹配
|
||||
|
||||
progress?(LivePhotoBuildProgress(stage: .extractKeyFrame, fraction: 0))
|
||||
let keyPhotoURL = try await resolveKeyPhotoURL(
|
||||
videoURL: scaledVideoURL,
|
||||
coverImageURL: coverImageURL,
|
||||
keyFrameTime: relativeKeyFrameTime,
|
||||
destinationURL: paths.workDir.appendingPathComponent("keyPhoto").appendingPathExtension("heic")
|
||||
)
|
||||
|
||||
progress?(LivePhotoBuildProgress(stage: .writePhotoMetadata, fraction: 0))
|
||||
guard let pairedImageURL = addAssetID(
|
||||
assetIdentifier,
|
||||
toImage: keyPhotoURL,
|
||||
saveTo: paths.photoURL
|
||||
) else {
|
||||
throw AppError(code: "LPB-201", stage: .writePhotoMetadata, message: "封面生成失败", underlyingErrorDescription: nil, suggestedActions: ["缩短时长", "降低分辨率", "重试"])
|
||||
// 内部函数:清理临时文件
|
||||
func cleanupTempFiles() {
|
||||
try? FileManager.default.removeItem(at: trimmedURL)
|
||||
try? FileManager.default.removeItem(at: scaledURL)
|
||||
try? FileManager.default.removeItem(at: keyPhotoTempURL)
|
||||
}
|
||||
|
||||
progress?(LivePhotoBuildProgress(stage: .writeVideoMetadata, fraction: 0))
|
||||
let pairedVideoURL = try await addAssetID(assetIdentifier, toVideo: scaledVideoURL, saveTo: paths.pairedVideoURL, stillImageTimeSeconds: relativeKeyFrameTime, progress: { p in
|
||||
progress?(LivePhotoBuildProgress(stage: .writeVideoMetadata, fraction: p))
|
||||
})
|
||||
do {
|
||||
progress?(LivePhotoBuildProgress(stage: .normalize, fraction: 0))
|
||||
let trimmedVideoURL = try await trimVideo(
|
||||
sourceURL: sourceVideoURL,
|
||||
trimStart: exportParams.trimStart,
|
||||
trimEnd: exportParams.trimEnd,
|
||||
destinationURL: trimmedURL
|
||||
)
|
||||
|
||||
logger.info("Generated Live Photo files:")
|
||||
logger.info(" Photo: \(pairedImageURL.path)")
|
||||
logger.info(" Video: \(pairedVideoURL.path)")
|
||||
logger.info(" AssetIdentifier: \(assetIdentifier)")
|
||||
// 关键:将视频变速到约 1 秒,与 metadata.mov 的时间标记匹配
|
||||
let targetDuration = CMTimeMake(value: 550, timescale: 600) // ~0.917 秒,与 live-wallpaper 一致
|
||||
progress?(LivePhotoBuildProgress(stage: .normalize, fraction: 0.5))
|
||||
let scaledVideoURL = try await scaleVideoToTargetDuration(
|
||||
sourceURL: trimmedVideoURL,
|
||||
targetDuration: targetDuration,
|
||||
cropRect: exportParams.cropRect,
|
||||
aspectRatio: exportParams.aspectRatio,
|
||||
maxDimension: exportParams.maxDimension,
|
||||
targetFrameRate: exportParams.targetFrameRate,
|
||||
destinationURL: scaledURL
|
||||
)
|
||||
|
||||
return LivePhotoBuildOutput(workId: workId, assetIdentifier: assetIdentifier, pairedImageURL: pairedImageURL, pairedVideoURL: pairedVideoURL)
|
||||
// 计算关键帧时间:目标视频的中间位置(0.5 秒处,与 metadata.mov 的 still-image-time 匹配)
|
||||
let relativeKeyFrameTime = 0.5 // 固定为 0.5 秒,与 metadata.mov 匹配
|
||||
|
||||
progress?(LivePhotoBuildProgress(stage: .extractKeyFrame, fraction: 0))
|
||||
let keyPhotoURL = try await resolveKeyPhotoURL(
|
||||
videoURL: scaledVideoURL,
|
||||
coverImageURL: coverImageURL,
|
||||
keyFrameTime: relativeKeyFrameTime,
|
||||
destinationURL: keyPhotoTempURL,
|
||||
aiEnhanceConfig: exportParams.aiEnhanceConfig,
|
||||
progress: progress
|
||||
)
|
||||
|
||||
progress?(LivePhotoBuildProgress(stage: .writePhotoMetadata, fraction: 0))
|
||||
guard let pairedImageURL = addAssetID(
|
||||
assetIdentifier,
|
||||
toImage: keyPhotoURL,
|
||||
saveTo: paths.photoURL
|
||||
) else {
|
||||
cleanupTempFiles()
|
||||
throw AppError(code: "LPB-201", stage: .writePhotoMetadata, message: "封面生成失败", underlyingErrorDescription: nil, suggestedActions: ["缩短时长", "降低分辨率", "重试"])
|
||||
}
|
||||
|
||||
progress?(LivePhotoBuildProgress(stage: .writeVideoMetadata, fraction: 0))
|
||||
let pairedVideoURL = try await addAssetID(assetIdentifier, toVideo: scaledVideoURL, saveTo: paths.pairedVideoURL, stillImageTimeSeconds: relativeKeyFrameTime, progress: { p in
|
||||
progress?(LivePhotoBuildProgress(stage: .writeVideoMetadata, fraction: p))
|
||||
})
|
||||
|
||||
// 清理临时文件(成功后)
|
||||
cleanupTempFiles()
|
||||
|
||||
logger.info("Generated Live Photo files:")
|
||||
logger.info(" Photo: \(pairedImageURL.path)")
|
||||
logger.info(" Video: \(pairedVideoURL.path)")
|
||||
logger.info(" AssetIdentifier: \(assetIdentifier)")
|
||||
|
||||
return LivePhotoBuildOutput(workId: workId, assetIdentifier: assetIdentifier, pairedImageURL: pairedImageURL, pairedVideoURL: pairedVideoURL)
|
||||
} catch {
|
||||
// 清理临时文件(失败后)
|
||||
cleanupTempFiles()
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
private func trimVideo(sourceURL: URL, trimStart: Double, trimEnd: Double, destinationURL: URL) async throws -> URL {
|
||||
@@ -550,13 +595,15 @@ public actor LivePhotoBuilder {
|
||||
}
|
||||
|
||||
/// 将视频处理为 Live Photo 所需的格式
|
||||
/// 包括:时长变速到 ~0.917 秒、裁剪、尺寸调整到 1080x1920(或保持比例)、帧率转换为 60fps
|
||||
/// 完全对齐 live-wallpaper 项目的 accelerateVideo + resizeVideo 流程
|
||||
/// 包括:时长变速到 ~0.917 秒、裁剪、尺寸调整、帧率转换
|
||||
/// 优化:单次导出完成变速+裁剪+缩放(减少一次编码,降低内存峰值)
|
||||
private func scaleVideoToTargetDuration(
|
||||
sourceURL: URL,
|
||||
targetDuration: CMTime,
|
||||
cropRect: CropRect,
|
||||
aspectRatio: AspectRatioTemplate,
|
||||
maxDimension: Int,
|
||||
targetFrameRate: Int,
|
||||
destinationURL: URL
|
||||
) async throws -> URL {
|
||||
let asset = AVURLAsset(url: sourceURL)
|
||||
@@ -573,99 +620,64 @@ public actor LivePhotoBuilder {
|
||||
let naturalSize = try await videoTrack.load(.naturalSize)
|
||||
let preferredTransform = try await videoTrack.load(.preferredTransform)
|
||||
|
||||
// 计算应用 transform 后的尺寸(与 live-wallpaper resizeVideo 一致)
|
||||
// 计算应用 transform 后的尺寸
|
||||
let originalSize = CGSize(width: naturalSize.width, height: naturalSize.height)
|
||||
let transformedSize = originalSize.applying(preferredTransform)
|
||||
let absoluteSize = CGSize(width: abs(transformedSize.width), height: abs(transformedSize.height))
|
||||
|
||||
// 根据 maxDimension 计算基准宽度
|
||||
let baseWidth: CGFloat = maxDimension == 720 ? 720 : 1080
|
||||
let maxHeight: CGFloat = maxDimension == 720 ? 1280 : 1920
|
||||
|
||||
// 根据裁剪和比例计算输出尺寸
|
||||
let outputSize: CGSize
|
||||
if let targetRatio = aspectRatio.ratio {
|
||||
// 根据目标比例决定输出尺寸
|
||||
// 竖屏优先:宽度 1080,高度根据比例计算
|
||||
let width: CGFloat = 1080
|
||||
let width: CGFloat = baseWidth
|
||||
let height = width / targetRatio
|
||||
outputSize = CGSize(width: width, height: min(height, 1920))
|
||||
outputSize = CGSize(width: width, height: min(height, maxHeight))
|
||||
} else {
|
||||
// 原比例:根据源视频方向决定
|
||||
let isLandscape = absoluteSize.width > absoluteSize.height
|
||||
outputSize = isLandscape ? CGSize(width: 1920, height: 1080) : CGSize(width: 1080, height: 1920)
|
||||
}
|
||||
|
||||
// 步骤1:先变速到目标时长(对应 live-wallpaper 的 accelerateVideo)
|
||||
let acceleratedURL = destinationURL.deletingLastPathComponent().appendingPathComponent("accelerated.mov")
|
||||
if FileManager.default.fileExists(atPath: acceleratedURL.path) {
|
||||
try FileManager.default.removeItem(at: acceleratedURL)
|
||||
outputSize = isLandscape ? CGSize(width: maxHeight, height: baseWidth) : CGSize(width: baseWidth, height: maxHeight)
|
||||
}
|
||||
|
||||
// 优化:单次导出完成变速+裁剪+缩放
|
||||
// 使用 AVMutableComposition 进行时间缩放,AVMutableVideoComposition 进行空间变换
|
||||
let composition = AVMutableComposition()
|
||||
guard let compositionVideoTrack = composition.addMutableTrack(withMediaType: .video, preferredTrackID: kCMPersistentTrackID_Invalid) else {
|
||||
throw AppError(code: "LPB-101", stage: .normalize, message: "无法创建视频轨道", suggestedActions: ["重试"])
|
||||
}
|
||||
|
||||
try compositionVideoTrack.insertTimeRange(CMTimeRange(start: .zero, duration: originalDuration), of: videoTrack, at: .zero)
|
||||
// 变速:将原始时长缩放到目标时长(与 live-wallpaper accelerateVideo 第 287-288 行一致)
|
||||
// 变速:将原始时长缩放到目标时长
|
||||
compositionVideoTrack.scaleTimeRange(CMTimeRange(start: .zero, duration: originalDuration), toDuration: targetDuration)
|
||||
compositionVideoTrack.preferredTransform = preferredTransform
|
||||
|
||||
guard let accelerateExport = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetHighestQuality) else {
|
||||
guard let exportSession = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetHighestQuality) else {
|
||||
throw AppError(code: "LPB-101", stage: .normalize, message: "无法创建导出会话", suggestedActions: ["重试"])
|
||||
}
|
||||
|
||||
accelerateExport.outputURL = acceleratedURL
|
||||
accelerateExport.outputFileType = .mov
|
||||
|
||||
await accelerateExport.export()
|
||||
|
||||
guard accelerateExport.status == .completed else {
|
||||
throw AppError(code: "LPB-101", stage: .normalize, message: "视频变速失败", underlyingErrorDescription: accelerateExport.error?.localizedDescription, suggestedActions: ["重试"])
|
||||
}
|
||||
|
||||
// 步骤2:调整尺寸和帧率(对应 live-wallpaper 的 resizeVideo)
|
||||
let acceleratedAsset = AVURLAsset(url: acceleratedURL)
|
||||
guard let acceleratedVideoTrack = try await acceleratedAsset.loadTracks(withMediaType: .video).first else {
|
||||
return acceleratedURL
|
||||
}
|
||||
|
||||
let acceleratedDuration = try await acceleratedAsset.load(.duration)
|
||||
// 加载加速后视频轨道的属性
|
||||
let acceleratedNaturalSize = try await acceleratedVideoTrack.load(.naturalSize)
|
||||
let acceleratedTransform = try await acceleratedVideoTrack.load(.preferredTransform)
|
||||
|
||||
guard let resizeExport = AVAssetExportSession(asset: acceleratedAsset, presetName: AVAssetExportPresetHighestQuality) else {
|
||||
return acceleratedURL
|
||||
}
|
||||
|
||||
// 关键:使用 AVMutableVideoComposition 设置输出尺寸和帧率
|
||||
// 使用 AVMutableVideoComposition 设置输出尺寸和帧率
|
||||
let videoComposition = AVMutableVideoComposition()
|
||||
videoComposition.renderSize = outputSize
|
||||
// 关键:设置 60fps
|
||||
videoComposition.frameDuration = CMTime(value: 1, timescale: 60)
|
||||
videoComposition.frameDuration = CMTime(value: 1, timescale: CMTimeScale(targetFrameRate))
|
||||
|
||||
let instruction = AVMutableVideoCompositionInstruction()
|
||||
instruction.timeRange = CMTimeRange(start: .zero, duration: acceleratedDuration)
|
||||
instruction.timeRange = CMTimeRange(start: .zero, duration: targetDuration)
|
||||
|
||||
let layerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: acceleratedVideoTrack)
|
||||
let layerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: compositionVideoTrack)
|
||||
|
||||
// 关键修复:正确计算变换(支持裁剪)
|
||||
// 变换需要将 naturalSize 坐标系的像素映射到 outputSize 坐标系
|
||||
// 步骤:
|
||||
// 1. 应用 preferredTransform 旋转视频到正确方向
|
||||
// 2. 应用裁剪区域
|
||||
// 3. 根据旋转后的实际尺寸计算缩放和居中
|
||||
|
||||
// 计算旋转后的实际尺寸(用于确定缩放比例)
|
||||
let rotatedSize = acceleratedNaturalSize.applying(acceleratedTransform)
|
||||
// 计算旋转后的实际尺寸
|
||||
let rotatedSize = naturalSize.applying(preferredTransform)
|
||||
let rotatedAbsoluteSize = CGSize(width: abs(rotatedSize.width), height: abs(rotatedSize.height))
|
||||
|
||||
// 计算裁剪后的源区域尺寸
|
||||
let croppedSourceWidth = rotatedAbsoluteSize.width * cropRect.width
|
||||
let croppedSourceHeight = rotatedAbsoluteSize.height * cropRect.height
|
||||
|
||||
// 基于裁剪后尺寸计算缩放因子(填充模式,确保裁剪区域完全覆盖输出)
|
||||
// 基于裁剪后尺寸计算缩放因子(填充模式)
|
||||
let actualWidthRatio = outputSize.width / croppedSourceWidth
|
||||
let actualHeightRatio = outputSize.height / croppedSourceHeight
|
||||
let actualScaleFactor = max(actualWidthRatio, actualHeightRatio) // 使用 max 确保填充
|
||||
let actualScaleFactor = max(actualWidthRatio, actualHeightRatio)
|
||||
|
||||
let scaledWidth = rotatedAbsoluteSize.width * actualScaleFactor
|
||||
let scaledHeight = rotatedAbsoluteSize.height * actualScaleFactor
|
||||
@@ -679,35 +691,25 @@ public actor LivePhotoBuilder {
|
||||
let centerX = outputCenterX - cropCenterX
|
||||
let centerY = outputCenterY - cropCenterY
|
||||
|
||||
// 构建最终变换:
|
||||
// 对于 preferredTransform,它通常包含旋转+平移,平移部分是为了将旋转后的内容移到正坐标
|
||||
// 变换组合顺序(从右到左应用):
|
||||
// 1. 先应用 preferredTransform(旋转+平移到正坐标)
|
||||
// 2. 再缩放
|
||||
// 3. 最后平移到目标中心
|
||||
//
|
||||
// 使用 concatenating: A.concatenating(B) 表示先应用 A,再应用 B
|
||||
// 构建最终变换
|
||||
let scaleTransform = CGAffineTransform(scaleX: actualScaleFactor, y: actualScaleFactor)
|
||||
let translateToCenter = CGAffineTransform(translationX: centerX, y: centerY)
|
||||
let finalTransform = acceleratedTransform.concatenating(scaleTransform).concatenating(translateToCenter)
|
||||
let finalTransform = preferredTransform.concatenating(scaleTransform).concatenating(translateToCenter)
|
||||
|
||||
layerInstruction.setTransform(finalTransform, at: .zero)
|
||||
|
||||
instruction.layerInstructions = [layerInstruction]
|
||||
videoComposition.instructions = [instruction]
|
||||
|
||||
resizeExport.videoComposition = videoComposition
|
||||
resizeExport.outputURL = destinationURL
|
||||
resizeExport.outputFileType = .mov
|
||||
resizeExport.shouldOptimizeForNetworkUse = true
|
||||
exportSession.videoComposition = videoComposition
|
||||
exportSession.outputURL = destinationURL
|
||||
exportSession.outputFileType = .mov
|
||||
exportSession.shouldOptimizeForNetworkUse = true
|
||||
|
||||
await resizeExport.export()
|
||||
await exportSession.export()
|
||||
|
||||
// 清理临时文件
|
||||
try? FileManager.default.removeItem(at: acceleratedURL)
|
||||
|
||||
guard resizeExport.status == .completed else {
|
||||
throw AppError(code: "LPB-101", stage: .normalize, message: "视频尺寸调整失败", underlyingErrorDescription: resizeExport.error?.localizedDescription, suggestedActions: ["重试"])
|
||||
guard exportSession.status == .completed else {
|
||||
throw AppError(code: "LPB-101", stage: .normalize, message: "视频处理失败", underlyingErrorDescription: exportSession.error?.localizedDescription, suggestedActions: ["重试"])
|
||||
}
|
||||
|
||||
return destinationURL
|
||||
@@ -717,7 +719,9 @@ public actor LivePhotoBuilder {
|
||||
videoURL: URL,
|
||||
coverImageURL: URL?,
|
||||
keyFrameTime: Double,
|
||||
destinationURL: URL
|
||||
destinationURL: URL,
|
||||
aiEnhanceConfig: AIEnhanceConfig = .disabled,
|
||||
progress: (@Sendable (LivePhotoBuildProgress) -> Void)? = nil
|
||||
) async throws -> URL {
|
||||
// 最大分辨率限制(对标竞品 1080p)
|
||||
let maxDimension = 1920
|
||||
@@ -736,60 +740,75 @@ public actor LivePhotoBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
// 内部函数:缩放图像
|
||||
func scaleImage(_ image: CGImage, maxDim: Int) -> CGImage {
|
||||
let width = image.width
|
||||
let height = image.height
|
||||
let maxSide = max(width, height)
|
||||
if maxSide <= maxDim { return image }
|
||||
|
||||
let scale = CGFloat(maxDim) / CGFloat(maxSide)
|
||||
let newWidth = Int(CGFloat(width) * scale)
|
||||
let newHeight = Int(CGFloat(height) * scale)
|
||||
|
||||
guard let context = CGContext(
|
||||
data: nil, width: newWidth, height: newHeight,
|
||||
bitsPerComponent: 8, bytesPerRow: 0,
|
||||
space: CGColorSpaceCreateDeviceRGB(),
|
||||
bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue
|
||||
) else { return image }
|
||||
|
||||
context.interpolationQuality = .high
|
||||
context.draw(image, in: CGRect(x: 0, y: 0, width: newWidth, height: newHeight))
|
||||
return context.makeImage() ?? image
|
||||
// 内部函数:使用 CGImageSource 高效缩放图像(内存优化)
|
||||
func scaleImageFromSource(_ source: CGImageSource, maxDim: Int) -> CGImage? {
|
||||
let options: [CFString: Any] = [
|
||||
kCGImageSourceThumbnailMaxPixelSize: maxDim,
|
||||
kCGImageSourceCreateThumbnailFromImageAlways: true,
|
||||
kCGImageSourceCreateThumbnailWithTransform: true
|
||||
]
|
||||
return CGImageSourceCreateThumbnailAtIndex(source, 0, options as CFDictionary)
|
||||
}
|
||||
|
||||
var finalImage: CGImage
|
||||
|
||||
// 如果用户提供了封面图
|
||||
if let coverImageURL {
|
||||
guard let src = CGImageSourceCreateWithURL(coverImageURL as CFURL, nil),
|
||||
let img = CGImageSourceCreateImageAtIndex(src, 0, nil) else {
|
||||
guard let src = CGImageSourceCreateWithURL(coverImageURL as CFURL, nil) else {
|
||||
throw AppError(code: "LPB-201", stage: .extractKeyFrame, message: "封面读取失败", underlyingErrorDescription: nil, suggestedActions: ["更换封面图", "重试"])
|
||||
}
|
||||
let scaledImg = scaleImage(img, maxDim: maxDimension)
|
||||
try writeHEIC(scaledImg, to: destinationURL)
|
||||
return destinationURL
|
||||
|
||||
// 使用 CGImageSource 高效缩放,无需加载完整图像到内存
|
||||
if let scaledImg = scaleImageFromSource(src, maxDim: maxDimension) {
|
||||
finalImage = scaledImg
|
||||
} else if let img = CGImageSourceCreateImageAtIndex(src, 0, nil) {
|
||||
// 回退:直接使用原图
|
||||
finalImage = img
|
||||
} else {
|
||||
throw AppError(code: "LPB-201", stage: .extractKeyFrame, message: "封面读取失败", underlyingErrorDescription: nil, suggestedActions: ["更换封面图", "重试"])
|
||||
}
|
||||
} else {
|
||||
// 从视频抽帧
|
||||
let asset = AVURLAsset(url: videoURL)
|
||||
let imageGenerator = AVAssetImageGenerator(asset: asset)
|
||||
imageGenerator.appliesPreferredTrackTransform = true
|
||||
imageGenerator.requestedTimeToleranceAfter = CMTime(value: 1, timescale: 100)
|
||||
imageGenerator.requestedTimeToleranceBefore = CMTime(value: 1, timescale: 100)
|
||||
// 设置最大尺寸,让 AVAssetImageGenerator 自动缩放
|
||||
imageGenerator.maximumSize = CGSize(width: maxDimension, height: maxDimension)
|
||||
|
||||
let safeSeconds = max(0, min(keyFrameTime, max(0, asset.duration.seconds - 0.1)))
|
||||
let time = CMTime(seconds: safeSeconds, preferredTimescale: asset.duration.timescale)
|
||||
|
||||
do {
|
||||
finalImage = try imageGenerator.copyCGImage(at: time, actualTime: nil)
|
||||
} catch {
|
||||
throw AppError(code: "LPB-201", stage: .extractKeyFrame, message: "抽帧失败", underlyingErrorDescription: error.localizedDescription, suggestedActions: ["缩短时长", "降低分辨率", "重试"])
|
||||
}
|
||||
}
|
||||
|
||||
// 从视频抽帧
|
||||
let asset = AVURLAsset(url: videoURL)
|
||||
let imageGenerator = AVAssetImageGenerator(asset: asset)
|
||||
imageGenerator.appliesPreferredTrackTransform = true
|
||||
imageGenerator.requestedTimeToleranceAfter = CMTime(value: 1, timescale: 100)
|
||||
imageGenerator.requestedTimeToleranceBefore = CMTime(value: 1, timescale: 100)
|
||||
// 设置最大尺寸,让 AVAssetImageGenerator 自动缩放
|
||||
imageGenerator.maximumSize = CGSize(width: maxDimension, height: maxDimension)
|
||||
// AI 超分辨率增强(如果启用)
|
||||
if aiEnhanceConfig.enabled && AIEnhancer.isAvailable() {
|
||||
progress?(LivePhotoBuildProgress(stage: .aiEnhance, fraction: 0))
|
||||
logger.info("Starting AI enhancement for cover image: \(finalImage.width)x\(finalImage.height)")
|
||||
|
||||
let safeSeconds = max(0, min(keyFrameTime, max(0, asset.duration.seconds - 0.1)))
|
||||
let time = CMTime(seconds: safeSeconds, preferredTimescale: asset.duration.timescale)
|
||||
do {
|
||||
let enhancer = AIEnhancer(config: aiEnhanceConfig)
|
||||
try await enhancer.preloadModel()
|
||||
|
||||
let cgImage: CGImage
|
||||
do {
|
||||
cgImage = try imageGenerator.copyCGImage(at: time, actualTime: nil)
|
||||
} catch {
|
||||
throw AppError(code: "LPB-201", stage: .extractKeyFrame, message: "抽帧失败", underlyingErrorDescription: error.localizedDescription, suggestedActions: ["缩短时长", "降低分辨率", "重试"])
|
||||
let result = try await enhancer.enhance(image: finalImage) { p in
|
||||
progress?(LivePhotoBuildProgress(stage: .aiEnhance, fraction: p))
|
||||
}
|
||||
|
||||
finalImage = result.enhancedImage
|
||||
logger.info("AI enhancement complete: \(Int(result.originalSize.width))x\(Int(result.originalSize.height)) -> \(Int(result.enhancedSize.width))x\(Int(result.enhancedSize.height)) in \(Int(result.processingTimeMs))ms")
|
||||
} catch {
|
||||
// AI 增强失败时静默降级,使用原图
|
||||
logger.error("AI enhancement failed, using original image: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
try writeHEIC(cgImage, to: destinationURL)
|
||||
try writeHEIC(finalImage, to: destinationURL)
|
||||
return destinationURL
|
||||
}
|
||||
|
||||
|
||||
BIN
Sources/LivePhotoCore/Resources/RealESRGAN_x4plus.mlmodel
LFS
Normal file
BIN
Sources/LivePhotoCore/Resources/RealESRGAN_x4plus.mlmodel
LFS
Normal file
Binary file not shown.
Reference in New Issue
Block a user