feat: M2-M4 完成，添加 AI 增强、设计系统、App Store 准备

新增功能： - AI 超分辨率模块 (Real-ESRGAN Core ML) - Soft UI 设计系统 (DesignSystem.swift) - 设置页、隐私政策页、引导页 - 最近作品管理器 App Store 准备： - 完善截图 (iPhone 6.7"/6.5", iPad 12.9") - App Store 元数据文档 - 修复应用图标 alpha 通道 - 更新显示名称为 Live Photo Studio 工程配置： - 配置 Git LFS 跟踪 mlmodel 文件 - 添加 Claude skill 开发指南 - 更新 .gitignore 规则 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-16 10:24:31 +08:00
parent 64cdb82459
commit 5aba93e967
46 changed files with 5279 additions and 421 deletions
--- a/Sources/LivePhotoCore/AIEnhancer/AIEnhancer.swift
+++ b/Sources/LivePhotoCore/AIEnhancer/AIEnhancer.swift
@@ -0,0 +1,207 @@
+//
+//  AIEnhancer.swift
+//  LivePhotoCore
+//
+//  AI super-resolution enhancement using Real-ESRGAN Core ML model.
+//
+
+import CoreGraphics
+import CoreML
+import Foundation
+import os
+
+// MARK: - Configuration
+
+/// AI enhancement configuration
+public struct AIEnhanceConfig: Codable, Sendable, Hashable {
+    /// Enable AI super-resolution
+    public var enabled: Bool
+
+    public init(enabled: Bool = false) {
+        self.enabled = enabled
+    }
+
+    /// Disabled configuration
+    public static let disabled = AIEnhanceConfig(enabled: false)
+
+    /// Standard configuration
+    public static let standard = AIEnhanceConfig(enabled: true)
+}
+
+// MARK: - Result
+
+/// AI enhancement result
+public struct AIEnhanceResult: Sendable {
+    /// Enhanced image
+    public let enhancedImage: CGImage
+
+    /// Original image size
+    public let originalSize: CGSize
+
+    /// Enhanced image size
+    public let enhancedSize: CGSize
+
+    /// Processing time in milliseconds
+    public let processingTimeMs: Double
+}
+
+// MARK: - Errors
+
+/// AI enhancement error types
+public enum AIEnhanceError: Error, Sendable, LocalizedError {
+    case modelNotFound
+    case modelLoadFailed(String)
+    case inputImageInvalid
+    case inferenceError(String)
+    case memoryPressure
+    case cancelled
+    case deviceNotSupported
+
+    public var errorDescription: String? {
+        switch self {
+        case .modelNotFound:
+            return "AI model file not found in bundle"
+        case let .modelLoadFailed(reason):
+            return "Failed to load AI model: \(reason)"
+        case .inputImageInvalid:
+            return "Input image is invalid or cannot be processed"
+        case let .inferenceError(reason):
+            return "AI inference failed: \(reason)"
+        case .memoryPressure:
+            return "Not enough memory for AI processing"
+        case .cancelled:
+            return "AI enhancement was cancelled"
+        case .deviceNotSupported:
+            return "Device does not support AI enhancement"
+        }
+    }
+}
+
+// MARK: - Progress
+
+/// Progress callback for AI enhancement
+/// - Parameter progress: Value from 0.0 to 1.0
+public typealias AIEnhanceProgress = @Sendable (Double) -> Void
+
+// MARK: - Main Actor
+
+/// AI enhancement actor for super-resolution processing
+public actor AIEnhancer {
+    private let config: AIEnhanceConfig
+    private var processor: RealESRGANProcessor?
+    private let logger = Logger(subsystem: "LivePhotoCore", category: "AIEnhancer")
+
+    /// Scale factor (4 for Real-ESRGAN x4plus)
+    public static let scaleFactor: Int = 4
+
+    /// Initialize with configuration
+    public init(config: AIEnhanceConfig = .standard) {
+        self.config = config
+    }
+
+    // MARK: - Device Capability
+
+    /// Check if AI enhancement is available on this device
+    public static func isAvailable() -> Bool {
+        // Require iOS 17+
+        guard #available(iOS 17.0, *) else {
+            return false
+        }
+
+        // Check device memory (require at least 4GB)
+        let totalMemory = ProcessInfo.processInfo.physicalMemory
+        let memoryGB = Double(totalMemory) / (1024 * 1024 * 1024)
+        guard memoryGB >= 4.0 else {
+            return false
+        }
+
+        // Neural Engine is available on A12+ (iPhone XS and later)
+        // iOS 17 requirement ensures A12+ is present
+        return true
+    }
+
+    // MARK: - Model Management
+
+    /// Preload the model (call during app launch or settings change)
+    public func preloadModel() async throws {
+        guard AIEnhancer.isAvailable() else {
+            throw AIEnhanceError.deviceNotSupported
+        }
+
+        guard processor == nil else {
+            logger.debug("Model already loaded")
+            return
+        }
+
+        logger.info("Preloading Real-ESRGAN model...")
+
+        processor = RealESRGANProcessor()
+        try await processor?.loadModel()
+
+        logger.info("Model preloaded successfully")
+    }
+
+    /// Release model from memory
+    public func unloadModel() async {
+        await processor?.unloadModel()
+        processor = nil
+        logger.info("Model unloaded")
+    }
+
+    // MARK: - Enhancement
+
+    /// Enhance a single image with AI super-resolution
+    /// - Parameters:
+    ///   - image: Input CGImage to enhance
+    ///   - progress: Optional progress callback (0.0 to 1.0)
+    /// - Returns: Enhanced result with metadata
+    public func enhance(
+        image: CGImage,
+        progress: AIEnhanceProgress? = nil
+    ) async throws -> AIEnhanceResult {
+        guard config.enabled else {
+            throw AIEnhanceError.inputImageInvalid
+        }
+
+        guard AIEnhancer.isAvailable() else {
+            throw AIEnhanceError.deviceNotSupported
+        }
+
+        let startTime = CFAbsoluteTimeGetCurrent()
+        let originalSize = CGSize(width: image.width, height: image.height)
+
+        logger.info("Starting AI enhancement: \(image.width)x\(image.height)")
+
+        // Ensure model is loaded
+        if processor == nil {
+            try await preloadModel()
+        }
+
+        guard let processor = processor else {
+            throw AIEnhanceError.modelNotFound
+        }
+
+        // Process image (no tiling - model has fixed 1280x1280 input)
+        let wholeImageProcessor = WholeImageProcessor()
+
+        let enhancedImage = try await wholeImageProcessor.processImage(
+            image,
+            processor: processor,
+            progress: progress
+        )
+
+        let processingTime = (CFAbsoluteTimeGetCurrent() - startTime) * 1000
+        let enhancedSize = CGSize(width: enhancedImage.width, height: enhancedImage.height)
+
+        logger.info(
+            "AI enhancement complete: \(Int(originalSize.width))x\(Int(originalSize.height)) -> \(Int(enhancedSize.width))x\(Int(enhancedSize.height)) in \(Int(processingTime))ms"
+        )
+
+        return AIEnhanceResult(
+            enhancedImage: enhancedImage,
+            originalSize: originalSize,
+            enhancedSize: enhancedSize,
+            processingTimeMs: processingTime
+        )
+    }
+}
--- a/Sources/LivePhotoCore/AIEnhancer/ImageFormatConverter.swift
+++ b/Sources/LivePhotoCore/AIEnhancer/ImageFormatConverter.swift
@@ -0,0 +1,261 @@
+//
+//  ImageFormatConverter.swift
+//  LivePhotoCore
+//
+//  Utilities for converting between CGImage and CVPixelBuffer formats.
+//
+
+import Accelerate
+import CoreGraphics
+import CoreVideo
+import Foundation
+import VideoToolbox
+
+/// Utilities for image format conversion
+enum ImageFormatConverter {
+    /// Convert CGImage to CVPixelBuffer for Core ML input
+    /// - Parameters:
+    ///   - image: Input CGImage
+    ///   - pixelFormat: Output pixel format (default BGRA)
+    /// - Returns: CVPixelBuffer ready for Core ML
+    static func cgImageToPixelBuffer(
+        _ image: CGImage,
+        pixelFormat: OSType = kCVPixelFormatType_32BGRA
+    ) throws -> CVPixelBuffer {
+        let width = image.width
+        let height = image.height
+
+        // Create pixel buffer
+        var pixelBuffer: CVPixelBuffer?
+        let attrs: [CFString: Any] = [
+            kCVPixelBufferCGImageCompatibilityKey: true,
+            kCVPixelBufferCGBitmapContextCompatibilityKey: true,
+            kCVPixelBufferMetalCompatibilityKey: true,
+        ]
+
+        let status = CVPixelBufferCreate(
+            kCFAllocatorDefault,
+            width,
+            height,
+            pixelFormat,
+            attrs as CFDictionary,
+            &pixelBuffer
+        )
+
+        guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
+            throw AIEnhanceError.inputImageInvalid
+        }
+
+        // Lock buffer for writing
+        CVPixelBufferLockBaseAddress(buffer, [])
+        defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
+
+        guard let baseAddress = CVPixelBufferGetBaseAddress(buffer) else {
+            throw AIEnhanceError.inputImageInvalid
+        }
+
+        let bytesPerRow = CVPixelBufferGetBytesPerRow(buffer)
+        let colorSpace = CGColorSpaceCreateDeviceRGB()
+
+        // Create bitmap context to draw into pixel buffer
+        guard
+            let context = CGContext(
+                data: baseAddress,
+                width: width,
+                height: height,
+                bitsPerComponent: 8,
+                bytesPerRow: bytesPerRow,
+                space: colorSpace,
+                bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue
+                    | CGBitmapInfo.byteOrder32Little.rawValue
+            )
+        else {
+            throw AIEnhanceError.inputImageInvalid
+        }
+
+        // Draw image into context (this converts the format)
+        context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
+
+        return buffer
+    }
+
+    /// Convert CVPixelBuffer to CGImage
+    /// - Parameter pixelBuffer: Input pixel buffer
+    /// - Returns: CGImage representation
+    static func pixelBufferToCGImage(_ pixelBuffer: CVPixelBuffer) throws -> CGImage {
+        var cgImage: CGImage?
+        VTCreateCGImageFromCVPixelBuffer(pixelBuffer, options: nil, imageOut: &cgImage)
+
+        guard let image = cgImage else {
+            throw AIEnhanceError.inferenceError("Failed to create CGImage from pixel buffer")
+        }
+
+        return image
+    }
+
+    /// Extract raw RGBA pixel data from CVPixelBuffer
+    /// - Parameter pixelBuffer: Input pixel buffer
+    /// - Returns: Array of RGBA bytes
+    static func pixelBufferToRGBAData(_ pixelBuffer: CVPixelBuffer) throws -> [UInt8] {
+        CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
+        defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
+
+        let width = CVPixelBufferGetWidth(pixelBuffer)
+        let height = CVPixelBufferGetHeight(pixelBuffer)
+        let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
+
+        guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else {
+            throw AIEnhanceError.inferenceError("Cannot access pixel buffer data")
+        }
+
+        let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
+
+        // Handle BGRA format (most common from Core ML)
+        if pixelFormat == kCVPixelFormatType_32BGRA {
+            return convertBGRAToRGBA(
+                baseAddress: baseAddress,
+                width: width,
+                height: height,
+                bytesPerRow: bytesPerRow
+            )
+        }
+
+        // Handle RGBA format
+        if pixelFormat == kCVPixelFormatType_32RGBA {
+            var result = [UInt8](repeating: 0, count: width * height * 4)
+            for y in 0..<height {
+                let srcRow = baseAddress.advanced(by: y * bytesPerRow)
+                let dstOffset = y * width * 4
+                memcpy(&result[dstOffset], srcRow, width * 4)
+            }
+            return result
+        }
+
+        // Handle ARGB format
+        if pixelFormat == kCVPixelFormatType_32ARGB {
+            return convertARGBToRGBA(
+                baseAddress: baseAddress,
+                width: width,
+                height: height,
+                bytesPerRow: bytesPerRow
+            )
+        }
+
+        throw AIEnhanceError.inferenceError("Unsupported pixel format: \(pixelFormat)")
+    }
+
+    /// Create CVPixelBuffer from raw RGBA data
+    /// - Parameters:
+    ///   - rgbaData: RGBA pixel data
+    ///   - width: Image width
+    ///   - height: Image height
+    /// - Returns: CVPixelBuffer
+    static func rgbaDataToPixelBuffer(
+        _ rgbaData: [UInt8],
+        width: Int,
+        height: Int
+    ) throws -> CVPixelBuffer {
+        var pixelBuffer: CVPixelBuffer?
+        let attrs: [CFString: Any] = [
+            kCVPixelBufferCGImageCompatibilityKey: true,
+            kCVPixelBufferCGBitmapContextCompatibilityKey: true,
+        ]
+
+        let status = CVPixelBufferCreate(
+            kCFAllocatorDefault,
+            width,
+            height,
+            kCVPixelFormatType_32BGRA,
+            attrs as CFDictionary,
+            &pixelBuffer
+        )
+
+        guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
+            throw AIEnhanceError.inputImageInvalid
+        }
+
+        CVPixelBufferLockBaseAddress(buffer, [])
+        defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
+
+        guard let baseAddress = CVPixelBufferGetBaseAddress(buffer) else {
+            throw AIEnhanceError.inputImageInvalid
+        }
+
+        let bytesPerRow = CVPixelBufferGetBytesPerRow(buffer)
+
+        // Convert RGBA to BGRA while copying
+        for y in 0..<height {
+            let dstRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
+            let srcOffset = y * width * 4
+
+            for x in 0..<width {
+                let srcIdx = srcOffset + x * 4
+                let dstIdx = x * 4
+
+                // RGBA -> BGRA swap
+                dstRow[dstIdx + 0] = rgbaData[srcIdx + 2]  // B
+                dstRow[dstIdx + 1] = rgbaData[srcIdx + 1]  // G
+                dstRow[dstIdx + 2] = rgbaData[srcIdx + 0]  // R
+                dstRow[dstIdx + 3] = rgbaData[srcIdx + 3]  // A
+            }
+        }
+
+        return buffer
+    }
+
+    // MARK: - Private Helpers
+
+    private static func convertBGRAToRGBA(
+        baseAddress: UnsafeMutableRawPointer,
+        width: Int,
+        height: Int,
+        bytesPerRow: Int
+    ) -> [UInt8] {
+        var result = [UInt8](repeating: 0, count: width * height * 4)
+
+        for y in 0..<height {
+            let srcRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
+            let dstOffset = y * width * 4
+
+            for x in 0..<width {
+                let srcIdx = x * 4
+                let dstIdx = dstOffset + x * 4
+
+                // BGRA -> RGBA swap
+                result[dstIdx + 0] = srcRow[srcIdx + 2]  // R
+                result[dstIdx + 1] = srcRow[srcIdx + 1]  // G
+                result[dstIdx + 2] = srcRow[srcIdx + 0]  // B
+                result[dstIdx + 3] = srcRow[srcIdx + 3]  // A
+            }
+        }
+
+        return result
+    }
+
+    private static func convertARGBToRGBA(
+        baseAddress: UnsafeMutableRawPointer,
+        width: Int,
+        height: Int,
+        bytesPerRow: Int
+    ) -> [UInt8] {
+        var result = [UInt8](repeating: 0, count: width * height * 4)
+
+        for y in 0..<height {
+            let srcRow = baseAddress.advanced(by: y * bytesPerRow).assumingMemoryBound(to: UInt8.self)
+            let dstOffset = y * width * 4
+
+            for x in 0..<width {
+                let srcIdx = x * 4
+                let dstIdx = dstOffset + x * 4
+
+                // ARGB -> RGBA swap
+                result[dstIdx + 0] = srcRow[srcIdx + 1]  // R
+                result[dstIdx + 1] = srcRow[srcIdx + 2]  // G
+                result[dstIdx + 2] = srcRow[srcIdx + 3]  // B
+                result[dstIdx + 3] = srcRow[srcIdx + 0]  // A
+            }
+        }
+
+        return result
+    }
+}
--- a/Sources/LivePhotoCore/AIEnhancer/RealESRGANProcessor.swift
+++ b/Sources/LivePhotoCore/AIEnhancer/RealESRGANProcessor.swift
@@ -0,0 +1,213 @@
+//
+//  RealESRGANProcessor.swift
+//  LivePhotoCore
+//
+//  Core ML inference logic for Real-ESRGAN model.
+//  This model requires fixed 512x512 input and outputs 2048x2048.
+//
+
+import Accelerate
+import CoreML
+import CoreVideo
+import Foundation
+import os
+
+/// Real-ESRGAN Core ML model processor
+/// Note: This model has fixed input size of 512x512
+actor RealESRGANProcessor {
+    private var model: MLModel?
+    private let logger = Logger(subsystem: "LivePhotoCore", category: "RealESRGANProcessor")
+
+    /// Fixed input size required by the model (512x512)
+    static let inputSize: Int = 512
+
+    /// Scale factor (4x for Real-ESRGAN x4plus)
+    static let scaleFactor: Int = 4
+
+    /// Output size (inputSize * scaleFactor = 2048)
+    static let outputSize: Int = inputSize * scaleFactor  // 2048
+
+    init() {}
+
+    /// Load Core ML model from bundle
+    func loadModel() async throws {
+        guard model == nil else {
+            logger.debug("Model already loaded")
+            return
+        }
+
+        logger.info("Loading Real-ESRGAN Core ML model...")
+
+        // Try to find model in bundle
+        let modelName = "RealESRGAN_x4plus"
+        var modelURL: URL?
+
+        // Try SPM bundle first
+        #if SWIFT_PACKAGE
+            if let url = Bundle.module.url(forResource: modelName, withExtension: "mlmodelc") {
+                modelURL = url
+            } else if let url = Bundle.module.url(forResource: modelName, withExtension: "mlpackage") {
+                modelURL = url
+            }
+        #endif
+
+        // Try main bundle
+        if modelURL == nil {
+            if let url = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") {
+                modelURL = url
+            } else if let url = Bundle.main.url(forResource: modelName, withExtension: "mlpackage") {
+                modelURL = url
+            }
+        }
+
+        guard let url = modelURL else {
+            logger.error("Model file not found: \(modelName)")
+            throw AIEnhanceError.modelNotFound
+        }
+
+        logger.info("Found model at: \(url.path)")
+
+        // Configure model for optimal performance
+        let config = MLModelConfiguration()
+        config.computeUnits = .all  // Use Neural Engine when available
+
+        do {
+            model = try await MLModel.load(contentsOf: url, configuration: config)
+            logger.info("Model loaded successfully")
+        } catch {
+            logger.error("Failed to load model: \(error.localizedDescription)")
+            throw AIEnhanceError.modelLoadFailed(error.localizedDescription)
+        }
+    }
+
+    /// Unload model from memory
+    func unloadModel() {
+        model = nil
+        logger.info("Model unloaded from memory")
+    }
+
+    /// Process a 512x512 image through the model
+    /// - Parameter pixelBuffer: Input image as CVPixelBuffer (must be 512x512, BGRA format)
+    /// - Returns: Enhanced image as RGBA data array (2048x2048)
+    func processImage(_ pixelBuffer: CVPixelBuffer) async throws -> [UInt8] {
+        guard let model else {
+            throw AIEnhanceError.modelNotFound
+        }
+
+        // Verify input size
+        let width = CVPixelBufferGetWidth(pixelBuffer)
+        let height = CVPixelBufferGetHeight(pixelBuffer)
+        guard width == Self.inputSize, height == Self.inputSize else {
+            throw AIEnhanceError.inferenceError(
+                "Invalid input size \(width)x\(height), expected \(Self.inputSize)x\(Self.inputSize)"
+            )
+        }
+
+        // Check for cancellation
+        try Task.checkCancellation()
+
+        logger.info("Running inference on \(width)x\(height) image...")
+
+        // Run inference synchronously (MLModel prediction is thread-safe)
+        let output: [UInt8] = try await withCheckedThrowingContinuation { continuation in
+            DispatchQueue.global(qos: .userInitiated).async {
+                do {
+                    // Create input feature from pixel buffer
+                    let inputFeature = try MLFeatureValue(pixelBuffer: pixelBuffer)
+                    let inputProvider = try MLDictionaryFeatureProvider(
+                        dictionary: ["input": inputFeature]
+                    )
+
+                    // Run inference synchronously
+                    let prediction = try model.prediction(from: inputProvider)
+
+                    // Extract output from model
+                    // The model outputs to "activation_out" as either MultiArray or Image
+                    let rgbaData: [UInt8]
+
+                    if let outputValue = prediction.featureValue(for: "activation_out") {
+                        if let multiArray = outputValue.multiArrayValue {
+                            // Output is MLMultiArray with shape [C, H, W]
+                            self.logger.info("Output is MultiArray: \(multiArray.shape)")
+                            rgbaData = try self.multiArrayToRGBA(multiArray)
+                        } else if let outputBuffer = outputValue.imageBufferValue {
+                            // Output is CVPixelBuffer (image)
+                            let outWidth = CVPixelBufferGetWidth(outputBuffer)
+                            let outHeight = CVPixelBufferGetHeight(outputBuffer)
+                            self.logger.info("Output is Image: \(outWidth)x\(outHeight)")
+                            rgbaData = try ImageFormatConverter.pixelBufferToRGBAData(outputBuffer)
+                        } else {
+                            continuation.resume(throwing: AIEnhanceError.inferenceError(
+                                "Cannot extract data from model output"
+                            ))
+                            return
+                        }
+                    } else {
+                        continuation.resume(throwing: AIEnhanceError.inferenceError(
+                            "Model output 'activation_out' not found"
+                        ))
+                        return
+                    }
+
+                    continuation.resume(returning: rgbaData)
+                } catch let error as AIEnhanceError {
+                    continuation.resume(throwing: error)
+                } catch {
+                    continuation.resume(throwing: AIEnhanceError.inferenceError(error.localizedDescription))
+                }
+            }
+        }
+
+        return output
+    }
+
+    /// Convert MLMultiArray [C, H, W] to RGBA byte array
+    /// - Parameter multiArray: Output from model with shape [3, H, W] (RGB channels)
+    /// - Returns: RGBA byte array with shape [H * W * 4]
+    private func multiArrayToRGBA(_ multiArray: MLMultiArray) throws -> [UInt8] {
+        let shape = multiArray.shape.map { $0.intValue }
+
+        // Expect shape [3, H, W] for RGB
+        guard shape.count == 3, shape[0] == 3 else {
+            throw AIEnhanceError.inferenceError(
+                "Unexpected output shape: \(shape), expected [3, H, W]"
+            )
+        }
+
+        let channels = shape[0]
+        let height = shape[1]
+        let width = shape[2]
+
+        logger.info("Converting MultiArray \(channels)x\(height)x\(width) to RGBA")
+
+        // Output array: RGBA format
+        var rgbaData = [UInt8](repeating: 255, count: width * height * 4)
+
+        // Get pointer to MultiArray data
+        let dataPointer = multiArray.dataPointer.assumingMemoryBound(to: Float32.self)
+        let channelStride = height * width
+
+        // Convert CHW (channel-first) to RGBA (interleaved)
+        // Model output is typically in range [0, 1] or [-1, 1], need to scale to [0, 255]
+        for y in 0..<height {
+            for x in 0..<width {
+                let pixelIndex = y * width + x
+                let rgbaIndex = pixelIndex * 4
+
+                // Read RGB values from CHW layout
+                let r = dataPointer[0 * channelStride + pixelIndex]
+                let g = dataPointer[1 * channelStride + pixelIndex]
+                let b = dataPointer[2 * channelStride + pixelIndex]
+
+                // Clamp and convert to 0-255
+                // Model typically outputs values in [0, 1] range
+                rgbaData[rgbaIndex + 0] = UInt8(clamping: Int(max(0, min(1, r)) * 255))
+                rgbaData[rgbaIndex + 1] = UInt8(clamping: Int(max(0, min(1, g)) * 255))
+                rgbaData[rgbaIndex + 2] = UInt8(clamping: Int(max(0, min(1, b)) * 255))
+                rgbaData[rgbaIndex + 3] = 255  // Alpha
+            }
+        }
+
+        return rgbaData
+    }
+}
--- a/Sources/LivePhotoCore/AIEnhancer/TiledImageProcessor.swift
+++ b/Sources/LivePhotoCore/AIEnhancer/TiledImageProcessor.swift
@@ -0,0 +1,240 @@
+//
+//  WholeImageProcessor.swift
+//  LivePhotoCore
+//
+//  Processes images for Real-ESRGAN model with fixed 512x512 input.
+//  Handles scaling, padding, and cropping to preserve original aspect ratio.
+//
+
+import CoreGraphics
+import CoreVideo
+import Foundation
+import os
+
+/// Processes images for the Real-ESRGAN model
+/// The model requires fixed 512x512 input and outputs 2048x2048
+struct WholeImageProcessor {
+    private let logger = Logger(subsystem: "LivePhotoCore", category: "WholeImageProcessor")
+
+    /// Process an image through the AI model
+    /// - Parameters:
+    ///   - inputImage: Input CGImage to enhance
+    ///   - processor: RealESRGAN processor for inference
+    ///   - progress: Optional progress callback
+    /// - Returns: Enhanced image with original aspect ratio preserved
+    func processImage(
+        _ inputImage: CGImage,
+        processor: RealESRGANProcessor,
+        progress: AIEnhanceProgress?
+    ) async throws -> CGImage {
+        let originalWidth = inputImage.width
+        let originalHeight = inputImage.height
+
+        logger.info("Processing \(originalWidth)x\(originalHeight) image")
+        progress?(0.1)
+
+        // Step 1: Scale and pad to 512x512
+        let (paddedImage, scaleFactor, paddingInfo) = try prepareInputImage(inputImage)
+        progress?(0.2)
+
+        // Step 2: Convert to CVPixelBuffer
+        let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(paddedImage)
+        progress?(0.3)
+
+        // Step 3: Run inference
+        let outputData = try await processor.processImage(pixelBuffer)
+        progress?(0.8)
+
+        // Step 4: Convert output to CGImage
+        let outputImage = try createCGImage(
+            from: outputData,
+            width: RealESRGANProcessor.outputSize,
+            height: RealESRGANProcessor.outputSize
+        )
+        progress?(0.9)
+
+        // Step 5: Crop padding and scale to target size
+        let finalImage = try extractAndScaleOutput(
+            outputImage,
+            originalWidth: originalWidth,
+            originalHeight: originalHeight,
+            scaleFactor: scaleFactor,
+            paddingInfo: paddingInfo
+        )
+        progress?(1.0)
+
+        logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)")
+        return finalImage
+    }
+
+    // MARK: - Private Helpers
+
+    /// Padding information for later extraction
+    private struct PaddingInfo {
+        let paddedX: Int      // X offset of original content in padded image
+        let paddedY: Int      // Y offset of original content in padded image
+        let scaledWidth: Int  // Width of original content after scaling
+        let scaledHeight: Int // Height of original content after scaling
+    }
+
+    /// Prepare input image: scale to fit 1280x1280 while preserving aspect ratio, then pad
+    private func prepareInputImage(_ image: CGImage) throws -> (CGImage, CGFloat, PaddingInfo) {
+        let inputSize = RealESRGANProcessor.inputSize
+        let originalWidth = CGFloat(image.width)
+        let originalHeight = CGFloat(image.height)
+
+        // Calculate scale to fit within inputSize x inputSize
+        let scale = min(
+            CGFloat(inputSize) / originalWidth,
+            CGFloat(inputSize) / originalHeight
+        )
+
+        let scaledWidth = Int(originalWidth * scale)
+        let scaledHeight = Int(originalHeight * scale)
+
+        // Calculate padding to center the image
+        let paddingX = (inputSize - scaledWidth) / 2
+        let paddingY = (inputSize - scaledHeight) / 2
+
+        logger.info("Scaling \(Int(originalWidth))x\(Int(originalHeight)) -> \(scaledWidth)x\(scaledHeight), padding: (\(paddingX), \(paddingY))")
+
+        // Create padded context
+        let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB()
+        guard let context = CGContext(
+            data: nil,
+            width: inputSize,
+            height: inputSize,
+            bitsPerComponent: 8,
+            bytesPerRow: inputSize * 4,
+            space: colorSpace,
+            bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
+        ) else {
+            throw AIEnhanceError.inputImageInvalid
+        }
+
+        // Fill with black (or neutral color)
+        context.setFillColor(gray: 0.0, alpha: 1.0)
+        context.fill(CGRect(x: 0, y: 0, width: inputSize, height: inputSize))
+
+        // Draw scaled image centered
+        // Note: CGContext has origin at bottom-left, so we need to flip Y coordinate
+        let drawRect = CGRect(x: paddingX, y: paddingY, width: scaledWidth, height: scaledHeight)
+        context.draw(image, in: drawRect)
+
+        guard let paddedImage = context.makeImage() else {
+            throw AIEnhanceError.inputImageInvalid
+        }
+
+        let paddingInfo = PaddingInfo(
+            paddedX: paddingX,
+            paddedY: paddingY,
+            scaledWidth: scaledWidth,
+            scaledHeight: scaledHeight
+        )
+
+        return (paddedImage, scale, paddingInfo)
+    }
+
+    /// Extract the enhanced content area and scale to final size
+    private func extractAndScaleOutput(
+        _ outputImage: CGImage,
+        originalWidth: Int,
+        originalHeight: Int,
+        scaleFactor: CGFloat,
+        paddingInfo: PaddingInfo
+    ) throws -> CGImage {
+        let modelScale = RealESRGANProcessor.scaleFactor
+
+        // Calculate crop region in output image (4x the padding info)
+        let cropX = paddingInfo.paddedX * modelScale
+        let cropY = paddingInfo.paddedY * modelScale
+        let cropWidth = paddingInfo.scaledWidth * modelScale
+        let cropHeight = paddingInfo.scaledHeight * modelScale
+
+        logger.info("Cropping output at (\(cropX), \(cropY)) size \(cropWidth)x\(cropHeight)")
+
+        // Crop the content area
+        let cropRect = CGRect(x: cropX, y: cropY, width: cropWidth, height: cropHeight)
+        guard let croppedImage = outputImage.cropping(to: cropRect) else {
+            throw AIEnhanceError.inferenceError("Failed to crop output image")
+        }
+
+        // Calculate final target size (4x original, capped at reasonable limit while preserving aspect ratio)
+        let maxDimension = 4320  // Cap at ~4K
+        let idealWidth = originalWidth * modelScale
+        let idealHeight = originalHeight * modelScale
+
+        let targetWidth: Int
+        let targetHeight: Int
+
+        if idealWidth <= maxDimension && idealHeight <= maxDimension {
+            // Both dimensions fit within limit
+            targetWidth = idealWidth
+            targetHeight = idealHeight
+        } else {
+            // Scale down to fit within maxDimension while preserving aspect ratio
+            let scale = min(Double(maxDimension) / Double(idealWidth), Double(maxDimension) / Double(idealHeight))
+            targetWidth = Int(Double(idealWidth) * scale)
+            targetHeight = Int(Double(idealHeight) * scale)
+        }
+
+        // If cropped image is already the right size, return it
+        if croppedImage.width == targetWidth && croppedImage.height == targetHeight {
+            return croppedImage
+        }
+
+        // Scale to target size
+        let colorSpace = croppedImage.colorSpace ?? CGColorSpaceCreateDeviceRGB()
+        guard let context = CGContext(
+            data: nil,
+            width: targetWidth,
+            height: targetHeight,
+            bitsPerComponent: 8,
+            bytesPerRow: targetWidth * 4,
+            space: colorSpace,
+            bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
+        ) else {
+            throw AIEnhanceError.inferenceError("Failed to create output context")
+        }
+
+        context.interpolationQuality = .high
+        context.draw(croppedImage, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight))
+
+        guard let finalImage = context.makeImage() else {
+            throw AIEnhanceError.inferenceError("Failed to create final image")
+        }
+
+        logger.info("Final image size: \(finalImage.width)x\(finalImage.height)")
+        return finalImage
+    }
+
+    /// Create CGImage from RGBA pixel data
+    private func createCGImage(from pixels: [UInt8], width: Int, height: Int) throws -> CGImage {
+        let colorSpace = CGColorSpaceCreateDeviceRGB()
+        let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue)
+
+        guard
+            let provider = CGDataProvider(data: Data(pixels) as CFData),
+            let image = CGImage(
+                width: width,
+                height: height,
+                bitsPerComponent: 8,
+                bitsPerPixel: 32,
+                bytesPerRow: width * 4,
+                space: colorSpace,
+                bitmapInfo: bitmapInfo,
+                provider: provider,
+                decode: nil,
+                shouldInterpolate: true,
+                intent: .defaultIntent
+            )
+        else {
+            throw AIEnhanceError.inferenceError("Failed to create output image")
+        }
+
+        return image
+    }
+}
+
+// Keep the old name as a typealias for compatibility
+typealias TiledImageProcessor = WholeImageProcessor
--- a/Sources/LivePhotoCore/LivePhotoCore.swift
+++ b/Sources/LivePhotoCore/LivePhotoCore.swift
@@ -10,6 +10,7 @@ import VideoToolbox
 public enum LivePhotoBuildStage: String, Codable, Sendable {
    case normalize
    case extractKeyFrame
+    case aiEnhance
    case writePhotoMetadata
    case writeVideoMetadata
    case saveToAlbum
@@ -131,6 +132,9 @@ public struct ExportParams: Codable, Sendable, Hashable {
    public var maxDimension: Int
    public var cropRect: CropRect
    public var aspectRatio: AspectRatioTemplate
+    public var compatibilityMode: Bool
+    public var targetFrameRate: Int
+    public var aiEnhanceConfig: AIEnhanceConfig

    public init(
        trimStart: Double = 0,
@@ -141,7 +145,10 @@ public struct ExportParams: Codable, Sendable, Hashable {
        hdrPolicy: HDRPolicy = .toneMapToSDR,
        maxDimension: Int = 1920,
        cropRect: CropRect = .full,
-        aspectRatio: AspectRatioTemplate = .original
+        aspectRatio: AspectRatioTemplate = .original,
+        compatibilityMode: Bool = false,
+        targetFrameRate: Int = 60,
+        aiEnhanceConfig: AIEnhanceConfig = .disabled
    ) {
        self.trimStart = trimStart
        self.trimEnd = trimEnd
@@ -152,6 +159,20 @@ public struct ExportParams: Codable, Sendable, Hashable {
        self.maxDimension = maxDimension
        self.cropRect = cropRect
        self.aspectRatio = aspectRatio
+        self.compatibilityMode = compatibilityMode
+        self.targetFrameRate = targetFrameRate
+        self.aiEnhanceConfig = aiEnhanceConfig
+    }
+
+    /// 应用兼容模式的便捷方法
+    public func withCompatibilityMode() -> ExportParams {
+        var params = self
+        params.compatibilityMode = true
+        params.maxDimension = 720
+        params.targetFrameRate = 30
+        params.codecPolicy = .fallbackH264
+        params.hdrPolicy = .toneMapToSDR
+        return params
    }
 }

@@ -440,58 +461,82 @@ public actor LivePhotoBuilder {
        let assetIdentifier = UUID().uuidString
        let paths = try cacheManager.makeWorkPaths(workId: workId)

-        progress?(LivePhotoBuildProgress(stage: .normalize, fraction: 0))
-        let trimmedVideoURL = try await trimVideo(
-            sourceURL: sourceVideoURL,
-            trimStart: exportParams.trimStart,
-            trimEnd: exportParams.trimEnd,
-            destinationURL: paths.workDir.appendingPathComponent("trimmed.mov")
-        )
+        // 临时文件路径（用于清理）
+        let trimmedURL = paths.workDir.appendingPathComponent("trimmed.mov")
+        let scaledURL = paths.workDir.appendingPathComponent("scaled.mov")
+        let keyPhotoTempURL = paths.workDir.appendingPathComponent("keyPhoto").appendingPathExtension("heic")

-        // 关键：将视频变速到约 1 秒，与 metadata.mov 的时间标记匹配
-        // live-wallpaper 项目使用 CMTimeMake(550, 600) = 0.917 秒
-        // 我们使用 1 秒以完全匹配 metadata.mov 的时长
-        let targetDuration = CMTimeMake(value: 550, timescale: 600) // ~0.917 秒，与 live-wallpaper 一致
-        progress?(LivePhotoBuildProgress(stage: .normalize, fraction: 0.5))
-        let scaledVideoURL = try await scaleVideoToTargetDuration(
-            sourceURL: trimmedVideoURL,
-            targetDuration: targetDuration,
-            cropRect: exportParams.cropRect,
-            aspectRatio: exportParams.aspectRatio,
-            destinationURL: paths.workDir.appendingPathComponent("scaled.mov")
-        )
-
-        // 计算关键帧时间：目标视频的中间位置（0.5 秒处，与 metadata.mov 的 still-image-time 匹配）
-        let relativeKeyFrameTime = 0.5 // 固定为 0.5 秒，与 metadata.mov 匹配
-
-        progress?(LivePhotoBuildProgress(stage: .extractKeyFrame, fraction: 0))
-        let keyPhotoURL = try await resolveKeyPhotoURL(
-            videoURL: scaledVideoURL,
-            coverImageURL: coverImageURL,
-            keyFrameTime: relativeKeyFrameTime,
-            destinationURL: paths.workDir.appendingPathComponent("keyPhoto").appendingPathExtension("heic")
-        )
-
-        progress?(LivePhotoBuildProgress(stage: .writePhotoMetadata, fraction: 0))
-        guard let pairedImageURL = addAssetID(
-            assetIdentifier,
-            toImage: keyPhotoURL,
-            saveTo: paths.photoURL
-        ) else {
-            throw AppError(code: "LPB-201", stage: .writePhotoMetadata, message: "封面生成失败", underlyingErrorDescription: nil, suggestedActions: ["缩短时长", "降低分辨率", "重试"])
+        // 内部函数：清理临时文件
+        func cleanupTempFiles() {
+            try? FileManager.default.removeItem(at: trimmedURL)
+            try? FileManager.default.removeItem(at: scaledURL)
+            try? FileManager.default.removeItem(at: keyPhotoTempURL)
        }

-        progress?(LivePhotoBuildProgress(stage: .writeVideoMetadata, fraction: 0))
-        let pairedVideoURL = try await addAssetID(assetIdentifier, toVideo: scaledVideoURL, saveTo: paths.pairedVideoURL, stillImageTimeSeconds: relativeKeyFrameTime, progress: { p in
-            progress?(LivePhotoBuildProgress(stage: .writeVideoMetadata, fraction: p))
-        })
+        do {
+            progress?(LivePhotoBuildProgress(stage: .normalize, fraction: 0))
+            let trimmedVideoURL = try await trimVideo(
+                sourceURL: sourceVideoURL,
+                trimStart: exportParams.trimStart,
+                trimEnd: exportParams.trimEnd,
+                destinationURL: trimmedURL
+            )

-        logger.info("Generated Live Photo files:")
-        logger.info("  Photo: \(pairedImageURL.path)")
-        logger.info("  Video: \(pairedVideoURL.path)")
-        logger.info("  AssetIdentifier: \(assetIdentifier)")
+            // 关键：将视频变速到约 1 秒，与 metadata.mov 的时间标记匹配
+            let targetDuration = CMTimeMake(value: 550, timescale: 600) // ~0.917 秒，与 live-wallpaper 一致
+            progress?(LivePhotoBuildProgress(stage: .normalize, fraction: 0.5))
+            let scaledVideoURL = try await scaleVideoToTargetDuration(
+                sourceURL: trimmedVideoURL,
+                targetDuration: targetDuration,
+                cropRect: exportParams.cropRect,
+                aspectRatio: exportParams.aspectRatio,
+                maxDimension: exportParams.maxDimension,
+                targetFrameRate: exportParams.targetFrameRate,
+                destinationURL: scaledURL
+            )

-        return LivePhotoBuildOutput(workId: workId, assetIdentifier: assetIdentifier, pairedImageURL: pairedImageURL, pairedVideoURL: pairedVideoURL)
+            // 计算关键帧时间：目标视频的中间位置（0.5 秒处，与 metadata.mov 的 still-image-time 匹配）
+            let relativeKeyFrameTime = 0.5 // 固定为 0.5 秒，与 metadata.mov 匹配
+
+            progress?(LivePhotoBuildProgress(stage: .extractKeyFrame, fraction: 0))
+            let keyPhotoURL = try await resolveKeyPhotoURL(
+                videoURL: scaledVideoURL,
+                coverImageURL: coverImageURL,
+                keyFrameTime: relativeKeyFrameTime,
+                destinationURL: keyPhotoTempURL,
+                aiEnhanceConfig: exportParams.aiEnhanceConfig,
+                progress: progress
+            )
+
+            progress?(LivePhotoBuildProgress(stage: .writePhotoMetadata, fraction: 0))
+            guard let pairedImageURL = addAssetID(
+                assetIdentifier,
+                toImage: keyPhotoURL,
+                saveTo: paths.photoURL
+            ) else {
+                cleanupTempFiles()
+                throw AppError(code: "LPB-201", stage: .writePhotoMetadata, message: "封面生成失败", underlyingErrorDescription: nil, suggestedActions: ["缩短时长", "降低分辨率", "重试"])
+            }
+
+            progress?(LivePhotoBuildProgress(stage: .writeVideoMetadata, fraction: 0))
+            let pairedVideoURL = try await addAssetID(assetIdentifier, toVideo: scaledVideoURL, saveTo: paths.pairedVideoURL, stillImageTimeSeconds: relativeKeyFrameTime, progress: { p in
+                progress?(LivePhotoBuildProgress(stage: .writeVideoMetadata, fraction: p))
+            })
+
+            // 清理临时文件（成功后）
+            cleanupTempFiles()
+
+            logger.info("Generated Live Photo files:")
+            logger.info("  Photo: \(pairedImageURL.path)")
+            logger.info("  Video: \(pairedVideoURL.path)")
+            logger.info("  AssetIdentifier: \(assetIdentifier)")
+
+            return LivePhotoBuildOutput(workId: workId, assetIdentifier: assetIdentifier, pairedImageURL: pairedImageURL, pairedVideoURL: pairedVideoURL)
+        } catch {
+            // 清理临时文件（失败后）
+            cleanupTempFiles()
+            throw error
+        }
    }

    private func trimVideo(sourceURL: URL, trimStart: Double, trimEnd: Double, destinationURL: URL) async throws -> URL {
@@ -550,13 +595,15 @@ public actor LivePhotoBuilder {
    }

    /// 将视频处理为 Live Photo 所需的格式
-    /// 包括：时长变速到 ~0.917 秒、裁剪、尺寸调整到 1080x1920（或保持比例）、帧率转换为 60fps
-    /// 完全对齐 live-wallpaper 项目的 accelerateVideo + resizeVideo 流程
+    /// 包括：时长变速到 ~0.917 秒、裁剪、尺寸调整、帧率转换
+    /// 优化：单次导出完成变速+裁剪+缩放（减少一次编码，降低内存峰值）
    private func scaleVideoToTargetDuration(
        sourceURL: URL,
        targetDuration: CMTime,
        cropRect: CropRect,
        aspectRatio: AspectRatioTemplate,
+        maxDimension: Int,
+        targetFrameRate: Int,
        destinationURL: URL
    ) async throws -> URL {
        let asset = AVURLAsset(url: sourceURL)
@@ -573,99 +620,64 @@ public actor LivePhotoBuilder {
        let naturalSize = try await videoTrack.load(.naturalSize)
        let preferredTransform = try await videoTrack.load(.preferredTransform)

-        // 计算应用 transform 后的尺寸（与 live-wallpaper resizeVideo 一致）
+        // 计算应用 transform 后的尺寸
        let originalSize = CGSize(width: naturalSize.width, height: naturalSize.height)
        let transformedSize = originalSize.applying(preferredTransform)
        let absoluteSize = CGSize(width: abs(transformedSize.width), height: abs(transformedSize.height))

+        // 根据 maxDimension 计算基准宽度
+        let baseWidth: CGFloat = maxDimension == 720 ? 720 : 1080
+        let maxHeight: CGFloat = maxDimension == 720 ? 1280 : 1920
+
        // 根据裁剪和比例计算输出尺寸
        let outputSize: CGSize
        if let targetRatio = aspectRatio.ratio {
-            // 根据目标比例决定输出尺寸
-            // 竖屏优先：宽度 1080，高度根据比例计算
-            let width: CGFloat = 1080
+            let width: CGFloat = baseWidth
            let height = width / targetRatio
-            outputSize = CGSize(width: width, height: min(height, 1920))
+            outputSize = CGSize(width: width, height: min(height, maxHeight))
        } else {
-            // 原比例：根据源视频方向决定
            let isLandscape = absoluteSize.width > absoluteSize.height
-            outputSize = isLandscape ? CGSize(width: 1920, height: 1080) : CGSize(width: 1080, height: 1920)
-        }
-
-        // 步骤1：先变速到目标时长（对应 live-wallpaper 的 accelerateVideo）
-        let acceleratedURL = destinationURL.deletingLastPathComponent().appendingPathComponent("accelerated.mov")
-        if FileManager.default.fileExists(atPath: acceleratedURL.path) {
-            try FileManager.default.removeItem(at: acceleratedURL)
+            outputSize = isLandscape ? CGSize(width: maxHeight, height: baseWidth) : CGSize(width: baseWidth, height: maxHeight)
        }

+        // 优化：单次导出完成变速+裁剪+缩放
+        // 使用 AVMutableComposition 进行时间缩放，AVMutableVideoComposition 进行空间变换
        let composition = AVMutableComposition()
        guard let compositionVideoTrack = composition.addMutableTrack(withMediaType: .video, preferredTrackID: kCMPersistentTrackID_Invalid) else {
            throw AppError(code: "LPB-101", stage: .normalize, message: "无法创建视频轨道", suggestedActions: ["重试"])
        }

        try compositionVideoTrack.insertTimeRange(CMTimeRange(start: .zero, duration: originalDuration), of: videoTrack, at: .zero)
-        // 变速：将原始时长缩放到目标时长（与 live-wallpaper accelerateVideo 第 287-288 行一致）
+        // 变速：将原始时长缩放到目标时长
        compositionVideoTrack.scaleTimeRange(CMTimeRange(start: .zero, duration: originalDuration), toDuration: targetDuration)
        compositionVideoTrack.preferredTransform = preferredTransform

-        guard let accelerateExport = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetHighestQuality) else {
+        guard let exportSession = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetHighestQuality) else {
            throw AppError(code: "LPB-101", stage: .normalize, message: "无法创建导出会话", suggestedActions: ["重试"])
        }

-        accelerateExport.outputURL = acceleratedURL
-        accelerateExport.outputFileType = .mov
-
-        await accelerateExport.export()
-
-        guard accelerateExport.status == .completed else {
-            throw AppError(code: "LPB-101", stage: .normalize, message: "视频变速失败", underlyingErrorDescription: accelerateExport.error?.localizedDescription, suggestedActions: ["重试"])
-        }
-
-        // 步骤2：调整尺寸和帧率（对应 live-wallpaper 的 resizeVideo）
-        let acceleratedAsset = AVURLAsset(url: acceleratedURL)
-        guard let acceleratedVideoTrack = try await acceleratedAsset.loadTracks(withMediaType: .video).first else {
-            return acceleratedURL
-        }
-
-        let acceleratedDuration = try await acceleratedAsset.load(.duration)
-        // 加载加速后视频轨道的属性
-        let acceleratedNaturalSize = try await acceleratedVideoTrack.load(.naturalSize)
-        let acceleratedTransform = try await acceleratedVideoTrack.load(.preferredTransform)
-
-        guard let resizeExport = AVAssetExportSession(asset: acceleratedAsset, presetName: AVAssetExportPresetHighestQuality) else {
-            return acceleratedURL
-        }
-
-        // 关键：使用 AVMutableVideoComposition 设置输出尺寸和帧率
+        // 使用 AVMutableVideoComposition 设置输出尺寸和帧率
        let videoComposition = AVMutableVideoComposition()
        videoComposition.renderSize = outputSize
-        // 关键：设置 60fps
-        videoComposition.frameDuration = CMTime(value: 1, timescale: 60)
+        videoComposition.frameDuration = CMTime(value: 1, timescale: CMTimeScale(targetFrameRate))

        let instruction = AVMutableVideoCompositionInstruction()
-        instruction.timeRange = CMTimeRange(start: .zero, duration: acceleratedDuration)
+        instruction.timeRange = CMTimeRange(start: .zero, duration: targetDuration)

-        let layerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: acceleratedVideoTrack)
+        let layerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: compositionVideoTrack)

-        // 关键修复：正确计算变换（支持裁剪）
-        // 变换需要将 naturalSize 坐标系的像素映射到 outputSize 坐标系
-        // 步骤：
-        // 1. 应用 preferredTransform 旋转视频到正确方向
-        // 2. 应用裁剪区域
-        // 3. 根据旋转后的实际尺寸计算缩放和居中
-
-        // 计算旋转后的实际尺寸（用于确定缩放比例）
-        let rotatedSize = acceleratedNaturalSize.applying(acceleratedTransform)
+        // 计算旋转后的实际尺寸
+        let rotatedSize = naturalSize.applying(preferredTransform)
        let rotatedAbsoluteSize = CGSize(width: abs(rotatedSize.width), height: abs(rotatedSize.height))

        // 计算裁剪后的源区域尺寸
        let croppedSourceWidth = rotatedAbsoluteSize.width * cropRect.width
        let croppedSourceHeight = rotatedAbsoluteSize.height * cropRect.height

-        // 基于裁剪后尺寸计算缩放因子（填充模式，确保裁剪区域完全覆盖输出）
+        // 基于裁剪后尺寸计算缩放因子（填充模式）
        let actualWidthRatio = outputSize.width / croppedSourceWidth
        let actualHeightRatio = outputSize.height / croppedSourceHeight
-        let actualScaleFactor = max(actualWidthRatio, actualHeightRatio) // 使用 max 确保填充
+        let actualScaleFactor = max(actualWidthRatio, actualHeightRatio)

        let scaledWidth = rotatedAbsoluteSize.width * actualScaleFactor
        let scaledHeight = rotatedAbsoluteSize.height * actualScaleFactor
@@ -679,35 +691,25 @@ public actor LivePhotoBuilder {
        let centerX = outputCenterX - cropCenterX
        let centerY = outputCenterY - cropCenterY

-        // 构建最终变换：
-        // 对于 preferredTransform，它通常包含旋转+平移，平移部分是为了将旋转后的内容移到正坐标
-        // 变换组合顺序（从右到左应用）：
-        // 1. 先应用 preferredTransform（旋转+平移到正坐标）
-        // 2. 再缩放
-        // 3. 最后平移到目标中心
-        //
-        // 使用 concatenating: A.concatenating(B) 表示先应用 A，再应用 B
+        // 构建最终变换
        let scaleTransform = CGAffineTransform(scaleX: actualScaleFactor, y: actualScaleFactor)
        let translateToCenter = CGAffineTransform(translationX: centerX, y: centerY)
-        let finalTransform = acceleratedTransform.concatenating(scaleTransform).concatenating(translateToCenter)
+        let finalTransform = preferredTransform.concatenating(scaleTransform).concatenating(translateToCenter)

        layerInstruction.setTransform(finalTransform, at: .zero)

        instruction.layerInstructions = [layerInstruction]
        videoComposition.instructions = [instruction]

-        resizeExport.videoComposition = videoComposition
-        resizeExport.outputURL = destinationURL
-        resizeExport.outputFileType = .mov
-        resizeExport.shouldOptimizeForNetworkUse = true
+        exportSession.videoComposition = videoComposition
+        exportSession.outputURL = destinationURL
+        exportSession.outputFileType = .mov
+        exportSession.shouldOptimizeForNetworkUse = true

-        await resizeExport.export()
+        await exportSession.export()

-        // 清理临时文件
-        try? FileManager.default.removeItem(at: acceleratedURL)
-
-        guard resizeExport.status == .completed else {
-            throw AppError(code: "LPB-101", stage: .normalize, message: "视频尺寸调整失败", underlyingErrorDescription: resizeExport.error?.localizedDescription, suggestedActions: ["重试"])
+        guard exportSession.status == .completed else {
+            throw AppError(code: "LPB-101", stage: .normalize, message: "视频处理失败", underlyingErrorDescription: exportSession.error?.localizedDescription, suggestedActions: ["重试"])
        }

        return destinationURL
@@ -717,7 +719,9 @@ public actor LivePhotoBuilder {
        videoURL: URL,
        coverImageURL: URL?,
        keyFrameTime: Double,
-        destinationURL: URL
+        destinationURL: URL,
+        aiEnhanceConfig: AIEnhanceConfig = .disabled,
+        progress: (@Sendable (LivePhotoBuildProgress) -> Void)? = nil
    ) async throws -> URL {
        // 最大分辨率限制（对标竞品 1080p）
        let maxDimension = 1920
@@ -736,60 +740,75 @@ public actor LivePhotoBuilder {
            }
        }

-        // 内部函数：缩放图像
-        func scaleImage(_ image: CGImage, maxDim: Int) -> CGImage {
-            let width = image.width
-            let height = image.height
-            let maxSide = max(width, height)
-            if maxSide <= maxDim { return image }
-
-            let scale = CGFloat(maxDim) / CGFloat(maxSide)
-            let newWidth = Int(CGFloat(width) * scale)
-            let newHeight = Int(CGFloat(height) * scale)
-
-            guard let context = CGContext(
-                data: nil, width: newWidth, height: newHeight,
-                bitsPerComponent: 8, bytesPerRow: 0,
-                space: CGColorSpaceCreateDeviceRGB(),
-                bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue
-            ) else { return image }
-
-            context.interpolationQuality = .high
-            context.draw(image, in: CGRect(x: 0, y: 0, width: newWidth, height: newHeight))
-            return context.makeImage() ?? image
+        // 内部函数：使用 CGImageSource 高效缩放图像（内存优化）
+        func scaleImageFromSource(_ source: CGImageSource, maxDim: Int) -> CGImage? {
+            let options: [CFString: Any] = [
+                kCGImageSourceThumbnailMaxPixelSize: maxDim,
+                kCGImageSourceCreateThumbnailFromImageAlways: true,
+                kCGImageSourceCreateThumbnailWithTransform: true
+            ]
+            return CGImageSourceCreateThumbnailAtIndex(source, 0, options as CFDictionary)
        }

+        var finalImage: CGImage
+
        // 如果用户提供了封面图
        if let coverImageURL {
-            guard let src = CGImageSourceCreateWithURL(coverImageURL as CFURL, nil),
-                  let img = CGImageSourceCreateImageAtIndex(src, 0, nil) else {
+            guard let src = CGImageSourceCreateWithURL(coverImageURL as CFURL, nil) else {
                throw AppError(code: "LPB-201", stage: .extractKeyFrame, message: "封面读取失败", underlyingErrorDescription: nil, suggestedActions: ["更换封面图", "重试"])
            }
-            let scaledImg = scaleImage(img, maxDim: maxDimension)
-            try writeHEIC(scaledImg, to: destinationURL)
-            return destinationURL
+
+            // 使用 CGImageSource 高效缩放，无需加载完整图像到内存
+            if let scaledImg = scaleImageFromSource(src, maxDim: maxDimension) {
+                finalImage = scaledImg
+            } else if let img = CGImageSourceCreateImageAtIndex(src, 0, nil) {
+                // 回退：直接使用原图
+                finalImage = img
+            } else {
+                throw AppError(code: "LPB-201", stage: .extractKeyFrame, message: "封面读取失败", underlyingErrorDescription: nil, suggestedActions: ["更换封面图", "重试"])
+            }
+        } else {
+            // 从视频抽帧
+            let asset = AVURLAsset(url: videoURL)
+            let imageGenerator = AVAssetImageGenerator(asset: asset)
+            imageGenerator.appliesPreferredTrackTransform = true
+            imageGenerator.requestedTimeToleranceAfter = CMTime(value: 1, timescale: 100)
+            imageGenerator.requestedTimeToleranceBefore = CMTime(value: 1, timescale: 100)
+            // 设置最大尺寸，让 AVAssetImageGenerator 自动缩放
+            imageGenerator.maximumSize = CGSize(width: maxDimension, height: maxDimension)
+
+            let safeSeconds = max(0, min(keyFrameTime, max(0, asset.duration.seconds - 0.1)))
+            let time = CMTime(seconds: safeSeconds, preferredTimescale: asset.duration.timescale)
+
+            do {
+                finalImage = try imageGenerator.copyCGImage(at: time, actualTime: nil)
+            } catch {
+                throw AppError(code: "LPB-201", stage: .extractKeyFrame, message: "抽帧失败", underlyingErrorDescription: error.localizedDescription, suggestedActions: ["缩短时长", "降低分辨率", "重试"])
+            }
        }

-        // 从视频抽帧
-        let asset = AVURLAsset(url: videoURL)
-        let imageGenerator = AVAssetImageGenerator(asset: asset)
-        imageGenerator.appliesPreferredTrackTransform = true
-        imageGenerator.requestedTimeToleranceAfter = CMTime(value: 1, timescale: 100)
-        imageGenerator.requestedTimeToleranceBefore = CMTime(value: 1, timescale: 100)
-        // 设置最大尺寸，让 AVAssetImageGenerator 自动缩放
-        imageGenerator.maximumSize = CGSize(width: maxDimension, height: maxDimension)
+        // AI 超分辨率增强（如果启用）
+        if aiEnhanceConfig.enabled && AIEnhancer.isAvailable() {
+            progress?(LivePhotoBuildProgress(stage: .aiEnhance, fraction: 0))
+            logger.info("Starting AI enhancement for cover image: \(finalImage.width)x\(finalImage.height)")

-        let safeSeconds = max(0, min(keyFrameTime, max(0, asset.duration.seconds - 0.1)))
-        let time = CMTime(seconds: safeSeconds, preferredTimescale: asset.duration.timescale)
+            do {
+                let enhancer = AIEnhancer(config: aiEnhanceConfig)
+                try await enhancer.preloadModel()

-        let cgImage: CGImage
-        do {
-            cgImage = try imageGenerator.copyCGImage(at: time, actualTime: nil)
-        } catch {
-            throw AppError(code: "LPB-201", stage: .extractKeyFrame, message: "抽帧失败", underlyingErrorDescription: error.localizedDescription, suggestedActions: ["缩短时长", "降低分辨率", "重试"])
+                let result = try await enhancer.enhance(image: finalImage) { p in
+                    progress?(LivePhotoBuildProgress(stage: .aiEnhance, fraction: p))
+                }
+
+                finalImage = result.enhancedImage
+                logger.info("AI enhancement complete: \(Int(result.originalSize.width))x\(Int(result.originalSize.height)) -> \(Int(result.enhancedSize.width))x\(Int(result.enhancedSize.height)) in \(Int(result.processingTimeMs))ms")
+            } catch {
+                // AI 增强失败时静默降级，使用原图
+                logger.error("AI enhancement failed, using original image: \(error.localizedDescription)")
+            }
        }

-        try writeHEIC(cgImage, to: destinationURL)
+        try writeHEIC(finalImage, to: destinationURL)
        return destinationURL
    }

--- a/Sources/LivePhotoCore/Resources/RealESRGAN_x4plus.mlmodel
+++ b/Sources/LivePhotoCore/Resources/RealESRGAN_x4plus.mlmodel