feat(camera): share jpeg transcoder + default maxWidth

This commit is contained in:
Peter Steinberger
2025-12-14 01:56:49 +00:00
parent e9eb9edc23
commit c3fa1fb736
7 changed files with 223 additions and 129 deletions

View File

@@ -1,7 +1,6 @@
import AVFoundation
import ClawdisKit
import Foundation
import UIKit
actor CameraController {
enum CameraError: LocalizedError, Sendable {
@@ -37,7 +36,9 @@ actor CameraController {
height: Int)
{
let facing = params.facing ?? .front
let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil }
// Default to a reasonable max width to keep bridge payload sizes manageable.
// If you need the full-res photo, explicitly request a larger maxWidth.
let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600
let quality = Self.clampQuality(params.quality)
try await self.ensureAccess(for: .video)
@@ -77,16 +78,16 @@ actor CameraController {
output.capturePhoto(with: settings, delegate: PhotoCaptureDelegate(cont))
}
let (finalData, size) = try Self.reencodeJPEG(
let res = try JPEGTranscoder.transcodeToJPEG(
imageData: rawData,
maxWidth: maxWidth,
maxWidthPx: maxWidth,
quality: quality)
return (
format: "jpg",
base64: finalData.base64EncodedString(),
width: Int(size.width.rounded()),
height: Int(size.height.rounded()))
base64: res.data.base64EncodedString(),
width: res.widthPx,
height: res.heightPx)
}
func clip(params: ClawdisCameraClipParams) async throws -> (
@@ -197,45 +198,6 @@ actor CameraController {
return min(15000, max(250, v))
}
private nonisolated static func reencodeJPEG(
imageData: Data,
maxWidth: Int?,
quality: Double) throws -> (data: Data, size: CGSize)
{
guard let image = UIImage(data: imageData) else {
throw CameraError.captureFailed("Failed to decode captured image")
}
let finalImage: UIImage = if let maxWidth, maxWidth > 0 {
Self.downscale(image: image, maxWidth: CGFloat(maxWidth))
} else {
image
}
guard let out = finalImage.jpegData(compressionQuality: quality) else {
throw CameraError.captureFailed("Failed to encode JPEG")
}
return (out, finalImage.size)
}
private nonisolated static func downscale(image: UIImage, maxWidth: CGFloat) -> UIImage {
let w = image.size.width
let h = image.size.height
guard w > 0, h > 0 else { return image }
guard w > maxWidth else { return image }
let scale = maxWidth / w
let target = CGSize(width: maxWidth, height: max(1, h * scale))
let format = UIGraphicsImageRendererFormat.default()
format.opaque = false
let renderer = UIGraphicsImageRenderer(size: target, format: format)
return renderer.image { _ in
image.draw(in: CGRect(origin: .zero, size: target))
}
}
private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws {
let asset = AVAsset(url: inputURL)
guard let exporter = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetHighestQuality) else {
@@ -291,6 +253,17 @@ private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegat
}
self.continuation.resume(returning: data)
}
func photoOutput(
_ output: AVCapturePhotoOutput,
didFinishCaptureFor resolvedSettings: AVCaptureResolvedPhotoSettings,
error: Error?)
{
guard let error else { return }
guard !self.didResume else { return }
self.didResume = true
self.continuation.resume(throwing: error)
}
}
private final class MovieFileDelegate: NSObject, AVCaptureFileOutputRecordingDelegate {

View File

@@ -1,10 +1,9 @@
import AVFoundation
import ClawdisIPC
import ClawdisKit
import CoreGraphics
import Foundation
import ImageIO
import OSLog
import UniformTypeIdentifiers
actor CameraCaptureService {
enum CameraError: LocalizedError, Sendable {
@@ -34,8 +33,9 @@ actor CameraCaptureService {
func snap(facing: CameraFacing?, maxWidth: Int?, quality: Double?) async throws -> (data: Data, size: CGSize) {
let facing = facing ?? .front
let maxWidth = maxWidth.flatMap { $0 > 0 ? $0 : nil }
let quality = Self.clampQuality(quality)
let normalized = Self.normalizeSnap(maxWidth: maxWidth, quality: quality)
let maxWidth = normalized.maxWidth
let quality = normalized.quality
try await self.ensureAccess(for: .video)
@@ -74,7 +74,8 @@ actor CameraCaptureService {
output.capturePhoto(with: settings, delegate: PhotoCaptureDelegate(cont))
}
return try Self.reencodeJPEG(imageData: rawData, maxWidth: maxWidth, quality: quality)
let res = try JPEGTranscoder.transcodeToJPEG(imageData: rawData, maxWidthPx: maxWidth, quality: quality)
return (data: res.data, size: CGSize(width: res.widthPx, height: res.heightPx))
}
func clip(
@@ -185,71 +186,19 @@ actor CameraCaptureService {
return min(1.0, max(0.05, q))
}
nonisolated static func normalizeSnap(maxWidth: Int?, quality: Double?) -> (maxWidth: Int, quality: Double) {
// Default to a reasonable max width to keep downstream payload sizes manageable.
// If you need full-res, explicitly request a larger maxWidth.
let maxWidth = maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600
let quality = Self.clampQuality(quality)
return (maxWidth: maxWidth, quality: quality)
}
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
let v = ms ?? 3000
return min(15000, max(250, v))
}
private nonisolated static func reencodeJPEG(
imageData: Data,
maxWidth: Int?,
quality: Double) throws -> (data: Data, size: CGSize)
{
guard let src = CGImageSourceCreateWithData(imageData as CFData, nil),
let img = CGImageSourceCreateImageAtIndex(src, 0, nil)
else {
throw CameraError.captureFailed("Failed to decode captured image")
}
let finalImage: CGImage
if let maxWidth, img.width > maxWidth {
guard let scaled = self.downscale(image: img, maxWidth: maxWidth) else {
throw CameraError.captureFailed("Failed to downscale image")
}
finalImage = scaled
} else {
finalImage = img
}
let out = NSMutableData()
guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else {
throw CameraError.captureFailed("Failed to create JPEG destination")
}
let props = [kCGImageDestinationLossyCompressionQuality: quality] as CFDictionary
CGImageDestinationAddImage(dest, finalImage, props)
guard CGImageDestinationFinalize(dest) else {
throw CameraError.captureFailed("Failed to encode JPEG")
}
return (out as Data, CGSize(width: finalImage.width, height: finalImage.height))
}
private nonisolated static func downscale(image: CGImage, maxWidth: Int) -> CGImage? {
guard image.width > 0, image.height > 0 else { return image }
guard image.width > maxWidth else { return image }
let scale = Double(maxWidth) / Double(image.width)
let targetW = maxWidth
let targetH = max(1, Int((Double(image.height) * scale).rounded()))
let cs = CGColorSpaceCreateDeviceRGB()
let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue
guard let ctx = CGContext(
data: nil,
width: targetW,
height: targetH,
bitsPerComponent: 8,
bytesPerRow: 0,
space: cs,
bitmapInfo: bitmapInfo)
else { return nil }
ctx.interpolationQuality = .high
ctx.draw(image, in: CGRect(x: 0, y: 0, width: targetW, height: targetH))
return ctx.makeImage()
}
private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws {
let asset = AVAsset(url: inputURL)
guard let export = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetMediumQuality) else {

View File

@@ -27,5 +27,9 @@ let package = Package(
]),
.testTarget(
name: "ClawdisKitTests",
dependencies: ["ClawdisKit"]),
dependencies: ["ClawdisKit"],
swiftSettings: [
.enableUpcomingFeature("StrictConcurrency"),
.enableExperimentalFeature("SwiftTesting"),
]),
])

View File

@@ -0,0 +1,93 @@
import CoreGraphics
import Foundation
import ImageIO
import UniformTypeIdentifiers
public enum JPEGTranscodeError: LocalizedError, Sendable {
case decodeFailed
case propertiesMissing
case encodeFailed
public var errorDescription: String? {
switch self {
case .decodeFailed:
"Failed to decode image data"
case .propertiesMissing:
"Failed to read image properties"
case .encodeFailed:
"Failed to encode JPEG"
}
}
}
public struct JPEGTranscoder: Sendable {
public static func clampQuality(_ quality: Double) -> Double {
min(1.0, max(0.05, quality))
}
/// Re-encodes image data to JPEG, optionally downscaling so that the *oriented* pixel width is <= `maxWidthPx`.
///
/// - Important: This normalizes EXIF orientation (the output pixels are rotated if needed; orientation tag is not
/// relied on).
public static func transcodeToJPEG(
imageData: Data,
maxWidthPx: Int?,
quality: Double) throws -> (data: Data, widthPx: Int, heightPx: Int)
{
guard let src = CGImageSourceCreateWithData(imageData as CFData, nil) else {
throw JPEGTranscodeError.decodeFailed
}
guard
let props = CGImageSourceCopyPropertiesAtIndex(src, 0, nil) as? [CFString: Any],
let rawWidth = props[kCGImagePropertyPixelWidth] as? NSNumber,
let rawHeight = props[kCGImagePropertyPixelHeight] as? NSNumber
else {
throw JPEGTranscodeError.propertiesMissing
}
let pixelWidth = rawWidth.intValue
let pixelHeight = rawHeight.intValue
let orientation = (props[kCGImagePropertyOrientation] as? NSNumber)?.intValue ?? 1
guard pixelWidth > 0, pixelHeight > 0 else {
throw JPEGTranscodeError.propertiesMissing
}
let rotates90 = orientation == 5 || orientation == 6 || orientation == 7 || orientation == 8
let orientedWidth = rotates90 ? pixelHeight : pixelWidth
let orientedHeight = rotates90 ? pixelWidth : pixelHeight
let maxDim = max(orientedWidth, orientedHeight)
let targetMaxPixelSize: Int = {
guard let maxWidthPx, maxWidthPx > 0 else { return maxDim }
guard orientedWidth > maxWidthPx else { return maxDim } // never upscale
let scale = Double(maxWidthPx) / Double(orientedWidth)
return max(1, Int((Double(maxDim) * scale).rounded(.toNearestOrAwayFromZero)))
}()
let thumbOpts: [CFString: Any] = [
kCGImageSourceCreateThumbnailFromImageAlways: true,
kCGImageSourceCreateThumbnailWithTransform: true,
kCGImageSourceThumbnailMaxPixelSize: targetMaxPixelSize,
kCGImageSourceShouldCacheImmediately: true,
]
guard let img = CGImageSourceCreateThumbnailAtIndex(src, 0, thumbOpts as CFDictionary) else {
throw JPEGTranscodeError.decodeFailed
}
let out = NSMutableData()
guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else {
throw JPEGTranscodeError.encodeFailed
}
let q = self.clampQuality(quality)
let encodeProps = [kCGImageDestinationLossyCompressionQuality: q] as CFDictionary
CGImageDestinationAddImage(dest, img, encodeProps)
guard CGImageDestinationFinalize(dest) else {
throw JPEGTranscodeError.encodeFailed
}
return (out as Data, img.width, img.height)
}
}

View File

@@ -1,28 +1,27 @@
import ClawdisKit
import XCTest
import Testing
final class BonjourEscapesTests: XCTestCase {
func testDecodePassThrough() {
XCTAssertEqual(BonjourEscapes.decode("hello"), "hello")
XCTAssertEqual(BonjourEscapes.decode(""), "")
@Suite struct BonjourEscapesTests {
@Test func decodePassThrough() {
#expect(BonjourEscapes.decode("hello") == "hello")
#expect(BonjourEscapes.decode("") == "")
}
func testDecodeSpaces() {
XCTAssertEqual(BonjourEscapes.decode("Clawdis\\032Gateway"), "Clawdis Gateway")
@Test func decodeSpaces() {
#expect(BonjourEscapes.decode("Clawdis\\032Gateway") == "Clawdis Gateway")
}
func testDecodeMultipleEscapes() {
XCTAssertEqual(
BonjourEscapes.decode("A\\038B\\047C\\032D"),
"A&B/C D")
@Test func decodeMultipleEscapes() {
#expect(BonjourEscapes.decode("A\\038B\\047C\\032D") == "A&B/C D")
}
func testDecodeIgnoresInvalidEscapeSequences() {
XCTAssertEqual(BonjourEscapes.decode("Hello\\03World"), "Hello\\03World")
XCTAssertEqual(BonjourEscapes.decode("Hello\\XYZWorld"), "Hello\\XYZWorld")
@Test func decodeIgnoresInvalidEscapeSequences() {
#expect(BonjourEscapes.decode("Hello\\03World") == "Hello\\03World")
#expect(BonjourEscapes.decode("Hello\\XYZWorld") == "Hello\\XYZWorld")
}
func testDecodeUsesDecimalUnicodeScalarValue() {
XCTAssertEqual(BonjourEscapes.decode("Hello\\065World"), "HelloAWorld")
@Test func decodeUsesDecimalUnicodeScalarValue() {
#expect(BonjourEscapes.decode("Hello\\065World") == "HelloAWorld")
}
}

View File

@@ -0,0 +1,73 @@
import ClawdisKit
import CoreGraphics
import ImageIO
import Testing
import UniformTypeIdentifiers
@Suite struct JPEGTranscoderTests {
private func makeSolidJPEG(width: Int, height: Int, orientation: Int? = nil) throws -> Data {
let cs = CGColorSpaceCreateDeviceRGB()
let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue
guard
let ctx = CGContext(
data: nil,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: 0,
space: cs,
bitmapInfo: bitmapInfo)
else {
throw NSError(domain: "JPEGTranscoderTests", code: 1)
}
ctx.setFillColor(red: 1, green: 0, blue: 0, alpha: 1)
ctx.fill(CGRect(x: 0, y: 0, width: width, height: height))
guard let img = ctx.makeImage() else {
throw NSError(domain: "JPEGTranscoderTests", code: 5)
}
let out = NSMutableData()
guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else {
throw NSError(domain: "JPEGTranscoderTests", code: 2)
}
var props: [CFString: Any] = [
kCGImageDestinationLossyCompressionQuality: 1.0,
]
if let orientation {
props[kCGImagePropertyOrientation] = orientation
}
CGImageDestinationAddImage(dest, img, props as CFDictionary)
guard CGImageDestinationFinalize(dest) else {
throw NSError(domain: "JPEGTranscoderTests", code: 3)
}
return out as Data
}
@Test func downscalesToMaxWidthPx() throws {
let input = try makeSolidJPEG(width: 2000, height: 1000)
let out = try JPEGTranscoder.transcodeToJPEG(imageData: input, maxWidthPx: 1600, quality: 0.9)
#expect(out.widthPx == 1600)
#expect(abs(out.heightPx - 800) <= 1)
#expect(out.data.count > 0)
}
@Test func doesNotUpscaleWhenSmallerThanMaxWidthPx() throws {
let input = try makeSolidJPEG(width: 800, height: 600)
let out = try JPEGTranscoder.transcodeToJPEG(imageData: input, maxWidthPx: 1600, quality: 0.9)
#expect(out.widthPx == 800)
#expect(out.heightPx == 600)
}
@Test func normalizesOrientationAndUsesOrientedWidthForMaxWidthPx() throws {
// Encode a landscape image but mark it rotated 90° (orientation 6). Oriented width becomes 1000.
let input = try makeSolidJPEG(width: 2000, height: 1000, orientation: 6)
let out = try JPEGTranscoder.transcodeToJPEG(imageData: input, maxWidthPx: 1600, quality: 0.9)
#expect(out.widthPx == 1000)
#expect(out.heightPx == 2000)
}
}

View File

@@ -28,7 +28,7 @@ All camera access is gated behind **user-controlled settings**.
- `camera.snap`
- Params:
- `facing`: `front|back` (default: `front`)
- `maxWidth`: number (optional)
- `maxWidth`: number (optional; default `1600` on the iOS node)
- `quality`: `0..1` (optional; default `0.9`)
- `format`: currently `jpg`
- Response payload:
@@ -112,6 +112,9 @@ clawdis-mac camera clip --duration-ms 3000 # prints MEDIA:<path>
clawdis-mac camera clip --no-audio
```
Notes:
- `clawdis-mac camera snap` defaults to `maxWidth=1600` unless overridden.
## Safety + practical limits
- Camera and microphone access trigger the usual OS permission prompts (and require usage strings in Info.plist).