feat: M2-M4 完成,添加 AI 增强、设计系统、App Store 准备
新增功能: - AI 超分辨率模块 (Real-ESRGAN Core ML) - Soft UI 设计系统 (DesignSystem.swift) - 设置页、隐私政策页、引导页 - 最近作品管理器 App Store 准备: - 完善截图 (iPhone 6.7"/6.5", iPad 12.9") - App Store 元数据文档 - 修复应用图标 alpha 通道 - 更新显示名称为 Live Photo Studio 工程配置: - 配置 Git LFS 跟踪 mlmodel 文件 - 添加 Claude skill 开发指南 - 更新 .gitignore 规则 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
240
Sources/LivePhotoCore/AIEnhancer/TiledImageProcessor.swift
Normal file
240
Sources/LivePhotoCore/AIEnhancer/TiledImageProcessor.swift
Normal file
@@ -0,0 +1,240 @@
|
||||
//
|
||||
// WholeImageProcessor.swift
|
||||
// LivePhotoCore
|
||||
//
|
||||
// Processes images for Real-ESRGAN model with fixed 512x512 input.
|
||||
// Handles scaling, padding, and cropping to preserve original aspect ratio.
|
||||
//
|
||||
|
||||
import CoreGraphics
|
||||
import CoreVideo
|
||||
import Foundation
|
||||
import os
|
||||
|
||||
/// Processes images for the Real-ESRGAN model
|
||||
/// The model requires fixed 512x512 input and outputs 2048x2048
|
||||
struct WholeImageProcessor {
|
||||
private let logger = Logger(subsystem: "LivePhotoCore", category: "WholeImageProcessor")
|
||||
|
||||
/// Process an image through the AI model
|
||||
/// - Parameters:
|
||||
/// - inputImage: Input CGImage to enhance
|
||||
/// - processor: RealESRGAN processor for inference
|
||||
/// - progress: Optional progress callback
|
||||
/// - Returns: Enhanced image with original aspect ratio preserved
|
||||
func processImage(
|
||||
_ inputImage: CGImage,
|
||||
processor: RealESRGANProcessor,
|
||||
progress: AIEnhanceProgress?
|
||||
) async throws -> CGImage {
|
||||
let originalWidth = inputImage.width
|
||||
let originalHeight = inputImage.height
|
||||
|
||||
logger.info("Processing \(originalWidth)x\(originalHeight) image")
|
||||
progress?(0.1)
|
||||
|
||||
// Step 1: Scale and pad to 512x512
|
||||
let (paddedImage, scaleFactor, paddingInfo) = try prepareInputImage(inputImage)
|
||||
progress?(0.2)
|
||||
|
||||
// Step 2: Convert to CVPixelBuffer
|
||||
let pixelBuffer = try ImageFormatConverter.cgImageToPixelBuffer(paddedImage)
|
||||
progress?(0.3)
|
||||
|
||||
// Step 3: Run inference
|
||||
let outputData = try await processor.processImage(pixelBuffer)
|
||||
progress?(0.8)
|
||||
|
||||
// Step 4: Convert output to CGImage
|
||||
let outputImage = try createCGImage(
|
||||
from: outputData,
|
||||
width: RealESRGANProcessor.outputSize,
|
||||
height: RealESRGANProcessor.outputSize
|
||||
)
|
||||
progress?(0.9)
|
||||
|
||||
// Step 5: Crop padding and scale to target size
|
||||
let finalImage = try extractAndScaleOutput(
|
||||
outputImage,
|
||||
originalWidth: originalWidth,
|
||||
originalHeight: originalHeight,
|
||||
scaleFactor: scaleFactor,
|
||||
paddingInfo: paddingInfo
|
||||
)
|
||||
progress?(1.0)
|
||||
|
||||
logger.info("Enhanced to \(finalImage.width)x\(finalImage.height)")
|
||||
return finalImage
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
/// Padding information for later extraction
|
||||
private struct PaddingInfo {
|
||||
let paddedX: Int // X offset of original content in padded image
|
||||
let paddedY: Int // Y offset of original content in padded image
|
||||
let scaledWidth: Int // Width of original content after scaling
|
||||
let scaledHeight: Int // Height of original content after scaling
|
||||
}
|
||||
|
||||
/// Prepare input image: scale to fit 1280x1280 while preserving aspect ratio, then pad
|
||||
private func prepareInputImage(_ image: CGImage) throws -> (CGImage, CGFloat, PaddingInfo) {
|
||||
let inputSize = RealESRGANProcessor.inputSize
|
||||
let originalWidth = CGFloat(image.width)
|
||||
let originalHeight = CGFloat(image.height)
|
||||
|
||||
// Calculate scale to fit within inputSize x inputSize
|
||||
let scale = min(
|
||||
CGFloat(inputSize) / originalWidth,
|
||||
CGFloat(inputSize) / originalHeight
|
||||
)
|
||||
|
||||
let scaledWidth = Int(originalWidth * scale)
|
||||
let scaledHeight = Int(originalHeight * scale)
|
||||
|
||||
// Calculate padding to center the image
|
||||
let paddingX = (inputSize - scaledWidth) / 2
|
||||
let paddingY = (inputSize - scaledHeight) / 2
|
||||
|
||||
logger.info("Scaling \(Int(originalWidth))x\(Int(originalHeight)) -> \(scaledWidth)x\(scaledHeight), padding: (\(paddingX), \(paddingY))")
|
||||
|
||||
// Create padded context
|
||||
let colorSpace = image.colorSpace ?? CGColorSpaceCreateDeviceRGB()
|
||||
guard let context = CGContext(
|
||||
data: nil,
|
||||
width: inputSize,
|
||||
height: inputSize,
|
||||
bitsPerComponent: 8,
|
||||
bytesPerRow: inputSize * 4,
|
||||
space: colorSpace,
|
||||
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
|
||||
) else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
// Fill with black (or neutral color)
|
||||
context.setFillColor(gray: 0.0, alpha: 1.0)
|
||||
context.fill(CGRect(x: 0, y: 0, width: inputSize, height: inputSize))
|
||||
|
||||
// Draw scaled image centered
|
||||
// Note: CGContext has origin at bottom-left, so we need to flip Y coordinate
|
||||
let drawRect = CGRect(x: paddingX, y: paddingY, width: scaledWidth, height: scaledHeight)
|
||||
context.draw(image, in: drawRect)
|
||||
|
||||
guard let paddedImage = context.makeImage() else {
|
||||
throw AIEnhanceError.inputImageInvalid
|
||||
}
|
||||
|
||||
let paddingInfo = PaddingInfo(
|
||||
paddedX: paddingX,
|
||||
paddedY: paddingY,
|
||||
scaledWidth: scaledWidth,
|
||||
scaledHeight: scaledHeight
|
||||
)
|
||||
|
||||
return (paddedImage, scale, paddingInfo)
|
||||
}
|
||||
|
||||
/// Extract the enhanced content area and scale to final size
|
||||
private func extractAndScaleOutput(
|
||||
_ outputImage: CGImage,
|
||||
originalWidth: Int,
|
||||
originalHeight: Int,
|
||||
scaleFactor: CGFloat,
|
||||
paddingInfo: PaddingInfo
|
||||
) throws -> CGImage {
|
||||
let modelScale = RealESRGANProcessor.scaleFactor
|
||||
|
||||
// Calculate crop region in output image (4x the padding info)
|
||||
let cropX = paddingInfo.paddedX * modelScale
|
||||
let cropY = paddingInfo.paddedY * modelScale
|
||||
let cropWidth = paddingInfo.scaledWidth * modelScale
|
||||
let cropHeight = paddingInfo.scaledHeight * modelScale
|
||||
|
||||
logger.info("Cropping output at (\(cropX), \(cropY)) size \(cropWidth)x\(cropHeight)")
|
||||
|
||||
// Crop the content area
|
||||
let cropRect = CGRect(x: cropX, y: cropY, width: cropWidth, height: cropHeight)
|
||||
guard let croppedImage = outputImage.cropping(to: cropRect) else {
|
||||
throw AIEnhanceError.inferenceError("Failed to crop output image")
|
||||
}
|
||||
|
||||
// Calculate final target size (4x original, capped at reasonable limit while preserving aspect ratio)
|
||||
let maxDimension = 4320 // Cap at ~4K
|
||||
let idealWidth = originalWidth * modelScale
|
||||
let idealHeight = originalHeight * modelScale
|
||||
|
||||
let targetWidth: Int
|
||||
let targetHeight: Int
|
||||
|
||||
if idealWidth <= maxDimension && idealHeight <= maxDimension {
|
||||
// Both dimensions fit within limit
|
||||
targetWidth = idealWidth
|
||||
targetHeight = idealHeight
|
||||
} else {
|
||||
// Scale down to fit within maxDimension while preserving aspect ratio
|
||||
let scale = min(Double(maxDimension) / Double(idealWidth), Double(maxDimension) / Double(idealHeight))
|
||||
targetWidth = Int(Double(idealWidth) * scale)
|
||||
targetHeight = Int(Double(idealHeight) * scale)
|
||||
}
|
||||
|
||||
// If cropped image is already the right size, return it
|
||||
if croppedImage.width == targetWidth && croppedImage.height == targetHeight {
|
||||
return croppedImage
|
||||
}
|
||||
|
||||
// Scale to target size
|
||||
let colorSpace = croppedImage.colorSpace ?? CGColorSpaceCreateDeviceRGB()
|
||||
guard let context = CGContext(
|
||||
data: nil,
|
||||
width: targetWidth,
|
||||
height: targetHeight,
|
||||
bitsPerComponent: 8,
|
||||
bytesPerRow: targetWidth * 4,
|
||||
space: colorSpace,
|
||||
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
|
||||
) else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create output context")
|
||||
}
|
||||
|
||||
context.interpolationQuality = .high
|
||||
context.draw(croppedImage, in: CGRect(x: 0, y: 0, width: targetWidth, height: targetHeight))
|
||||
|
||||
guard let finalImage = context.makeImage() else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create final image")
|
||||
}
|
||||
|
||||
logger.info("Final image size: \(finalImage.width)x\(finalImage.height)")
|
||||
return finalImage
|
||||
}
|
||||
|
||||
/// Create CGImage from RGBA pixel data
|
||||
private func createCGImage(from pixels: [UInt8], width: Int, height: Int) throws -> CGImage {
|
||||
let colorSpace = CGColorSpaceCreateDeviceRGB()
|
||||
let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.noneSkipLast.rawValue)
|
||||
|
||||
guard
|
||||
let provider = CGDataProvider(data: Data(pixels) as CFData),
|
||||
let image = CGImage(
|
||||
width: width,
|
||||
height: height,
|
||||
bitsPerComponent: 8,
|
||||
bitsPerPixel: 32,
|
||||
bytesPerRow: width * 4,
|
||||
space: colorSpace,
|
||||
bitmapInfo: bitmapInfo,
|
||||
provider: provider,
|
||||
decode: nil,
|
||||
shouldInterpolate: true,
|
||||
intent: .defaultIntent
|
||||
)
|
||||
else {
|
||||
throw AIEnhanceError.inferenceError("Failed to create output image")
|
||||
}
|
||||
|
||||
return image
|
||||
}
|
||||
}
|
||||
|
||||
// Keep the old name as a typealias for compatibility
|
||||
typealias TiledImageProcessor = WholeImageProcessor
|
||||
Reference in New Issue
Block a user