From c3fa1fb7364ce80f017dc521c5aca2fc2eaddd30 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 14 Dec 2025 01:56:49 +0000 Subject: [PATCH] feat(camera): share jpeg transcoder + default maxWidth --- .../ios/Sources/Camera/CameraController.swift | 65 ++++--------- .../Clawdis/CameraCaptureService.swift | 79 +++------------- apps/shared/ClawdisKit/Package.swift | 6 +- .../Sources/ClawdisKit/JPEGTranscoder.swift | 93 +++++++++++++++++++ .../ClawdisKitTests/BonjourEscapesTests.swift | 31 +++---- .../ClawdisKitTests/JPEGTranscoderTests.swift | 73 +++++++++++++++ docs/camera.md | 5 +- 7 files changed, 223 insertions(+), 129 deletions(-) create mode 100644 apps/shared/ClawdisKit/Sources/ClawdisKit/JPEGTranscoder.swift create mode 100644 apps/shared/ClawdisKit/Tests/ClawdisKitTests/JPEGTranscoderTests.swift diff --git a/apps/ios/Sources/Camera/CameraController.swift b/apps/ios/Sources/Camera/CameraController.swift index 5579307b9..1d082077b 100644 --- a/apps/ios/Sources/Camera/CameraController.swift +++ b/apps/ios/Sources/Camera/CameraController.swift @@ -1,7 +1,6 @@ import AVFoundation import ClawdisKit import Foundation -import UIKit actor CameraController { enum CameraError: LocalizedError, Sendable { @@ -37,7 +36,9 @@ actor CameraController { height: Int) { let facing = params.facing ?? .front - let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil } + // Default to a reasonable max width to keep bridge payload sizes manageable. + // If you need the full-res photo, explicitly request a larger maxWidth. + let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600 let quality = Self.clampQuality(params.quality) try await self.ensureAccess(for: .video) @@ -77,16 +78,16 @@ actor CameraController { output.capturePhoto(with: settings, delegate: PhotoCaptureDelegate(cont)) } - let (finalData, size) = try Self.reencodeJPEG( + let res = try JPEGTranscoder.transcodeToJPEG( imageData: rawData, - maxWidth: maxWidth, + maxWidthPx: maxWidth, quality: quality) return ( format: "jpg", - base64: finalData.base64EncodedString(), - width: Int(size.width.rounded()), - height: Int(size.height.rounded())) + base64: res.data.base64EncodedString(), + width: res.widthPx, + height: res.heightPx) } func clip(params: ClawdisCameraClipParams) async throws -> ( @@ -197,45 +198,6 @@ actor CameraController { return min(15000, max(250, v)) } - private nonisolated static func reencodeJPEG( - imageData: Data, - maxWidth: Int?, - quality: Double) throws -> (data: Data, size: CGSize) - { - guard let image = UIImage(data: imageData) else { - throw CameraError.captureFailed("Failed to decode captured image") - } - - let finalImage: UIImage = if let maxWidth, maxWidth > 0 { - Self.downscale(image: image, maxWidth: CGFloat(maxWidth)) - } else { - image - } - - guard let out = finalImage.jpegData(compressionQuality: quality) else { - throw CameraError.captureFailed("Failed to encode JPEG") - } - - return (out, finalImage.size) - } - - private nonisolated static func downscale(image: UIImage, maxWidth: CGFloat) -> UIImage { - let w = image.size.width - let h = image.size.height - guard w > 0, h > 0 else { return image } - guard w > maxWidth else { return image } - - let scale = maxWidth / w - let target = CGSize(width: maxWidth, height: max(1, h * scale)) - - let format = UIGraphicsImageRendererFormat.default() - format.opaque = false - let renderer = UIGraphicsImageRenderer(size: target, format: format) - return renderer.image { _ in - image.draw(in: CGRect(origin: .zero, size: target)) - } - } - private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws { let asset = AVAsset(url: inputURL) guard let exporter = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetHighestQuality) else { @@ -291,6 +253,17 @@ private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegat } self.continuation.resume(returning: data) } + + func photoOutput( + _ output: AVCapturePhotoOutput, + didFinishCaptureFor resolvedSettings: AVCaptureResolvedPhotoSettings, + error: Error?) + { + guard let error else { return } + guard !self.didResume else { return } + self.didResume = true + self.continuation.resume(throwing: error) + } } private final class MovieFileDelegate: NSObject, AVCaptureFileOutputRecordingDelegate { diff --git a/apps/macos/Sources/Clawdis/CameraCaptureService.swift b/apps/macos/Sources/Clawdis/CameraCaptureService.swift index 4d45e81ac..a0dfa88f4 100644 --- a/apps/macos/Sources/Clawdis/CameraCaptureService.swift +++ b/apps/macos/Sources/Clawdis/CameraCaptureService.swift @@ -1,10 +1,9 @@ import AVFoundation import ClawdisIPC +import ClawdisKit import CoreGraphics import Foundation -import ImageIO import OSLog -import UniformTypeIdentifiers actor CameraCaptureService { enum CameraError: LocalizedError, Sendable { @@ -34,8 +33,9 @@ actor CameraCaptureService { func snap(facing: CameraFacing?, maxWidth: Int?, quality: Double?) async throws -> (data: Data, size: CGSize) { let facing = facing ?? .front - let maxWidth = maxWidth.flatMap { $0 > 0 ? $0 : nil } - let quality = Self.clampQuality(quality) + let normalized = Self.normalizeSnap(maxWidth: maxWidth, quality: quality) + let maxWidth = normalized.maxWidth + let quality = normalized.quality try await self.ensureAccess(for: .video) @@ -74,7 +74,8 @@ actor CameraCaptureService { output.capturePhoto(with: settings, delegate: PhotoCaptureDelegate(cont)) } - return try Self.reencodeJPEG(imageData: rawData, maxWidth: maxWidth, quality: quality) + let res = try JPEGTranscoder.transcodeToJPEG(imageData: rawData, maxWidthPx: maxWidth, quality: quality) + return (data: res.data, size: CGSize(width: res.widthPx, height: res.heightPx)) } func clip( @@ -185,71 +186,19 @@ actor CameraCaptureService { return min(1.0, max(0.05, q)) } + nonisolated static func normalizeSnap(maxWidth: Int?, quality: Double?) -> (maxWidth: Int, quality: Double) { + // Default to a reasonable max width to keep downstream payload sizes manageable. + // If you need full-res, explicitly request a larger maxWidth. + let maxWidth = maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600 + let quality = Self.clampQuality(quality) + return (maxWidth: maxWidth, quality: quality) + } + private nonisolated static func clampDurationMs(_ ms: Int?) -> Int { let v = ms ?? 3000 return min(15000, max(250, v)) } - private nonisolated static func reencodeJPEG( - imageData: Data, - maxWidth: Int?, - quality: Double) throws -> (data: Data, size: CGSize) - { - guard let src = CGImageSourceCreateWithData(imageData as CFData, nil), - let img = CGImageSourceCreateImageAtIndex(src, 0, nil) - else { - throw CameraError.captureFailed("Failed to decode captured image") - } - - let finalImage: CGImage - if let maxWidth, img.width > maxWidth { - guard let scaled = self.downscale(image: img, maxWidth: maxWidth) else { - throw CameraError.captureFailed("Failed to downscale image") - } - finalImage = scaled - } else { - finalImage = img - } - - let out = NSMutableData() - guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else { - throw CameraError.captureFailed("Failed to create JPEG destination") - } - - let props = [kCGImageDestinationLossyCompressionQuality: quality] as CFDictionary - CGImageDestinationAddImage(dest, finalImage, props) - guard CGImageDestinationFinalize(dest) else { - throw CameraError.captureFailed("Failed to encode JPEG") - } - - return (out as Data, CGSize(width: finalImage.width, height: finalImage.height)) - } - - private nonisolated static func downscale(image: CGImage, maxWidth: Int) -> CGImage? { - guard image.width > 0, image.height > 0 else { return image } - guard image.width > maxWidth else { return image } - - let scale = Double(maxWidth) / Double(image.width) - let targetW = maxWidth - let targetH = max(1, Int((Double(image.height) * scale).rounded())) - - let cs = CGColorSpaceCreateDeviceRGB() - let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue - guard let ctx = CGContext( - data: nil, - width: targetW, - height: targetH, - bitsPerComponent: 8, - bytesPerRow: 0, - space: cs, - bitmapInfo: bitmapInfo) - else { return nil } - - ctx.interpolationQuality = .high - ctx.draw(image, in: CGRect(x: 0, y: 0, width: targetW, height: targetH)) - return ctx.makeImage() - } - private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws { let asset = AVAsset(url: inputURL) guard let export = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetMediumQuality) else { diff --git a/apps/shared/ClawdisKit/Package.swift b/apps/shared/ClawdisKit/Package.swift index 770d991f2..89e3431e6 100644 --- a/apps/shared/ClawdisKit/Package.swift +++ b/apps/shared/ClawdisKit/Package.swift @@ -27,5 +27,9 @@ let package = Package( ]), .testTarget( name: "ClawdisKitTests", - dependencies: ["ClawdisKit"]), + dependencies: ["ClawdisKit"], + swiftSettings: [ + .enableUpcomingFeature("StrictConcurrency"), + .enableExperimentalFeature("SwiftTesting"), + ]), ]) diff --git a/apps/shared/ClawdisKit/Sources/ClawdisKit/JPEGTranscoder.swift b/apps/shared/ClawdisKit/Sources/ClawdisKit/JPEGTranscoder.swift new file mode 100644 index 000000000..39761f131 --- /dev/null +++ b/apps/shared/ClawdisKit/Sources/ClawdisKit/JPEGTranscoder.swift @@ -0,0 +1,93 @@ +import CoreGraphics +import Foundation +import ImageIO +import UniformTypeIdentifiers + +public enum JPEGTranscodeError: LocalizedError, Sendable { + case decodeFailed + case propertiesMissing + case encodeFailed + + public var errorDescription: String? { + switch self { + case .decodeFailed: + "Failed to decode image data" + case .propertiesMissing: + "Failed to read image properties" + case .encodeFailed: + "Failed to encode JPEG" + } + } +} + +public struct JPEGTranscoder: Sendable { + public static func clampQuality(_ quality: Double) -> Double { + min(1.0, max(0.05, quality)) + } + + /// Re-encodes image data to JPEG, optionally downscaling so that the *oriented* pixel width is <= `maxWidthPx`. + /// + /// - Important: This normalizes EXIF orientation (the output pixels are rotated if needed; orientation tag is not + /// relied on). + public static func transcodeToJPEG( + imageData: Data, + maxWidthPx: Int?, + quality: Double) throws -> (data: Data, widthPx: Int, heightPx: Int) + { + guard let src = CGImageSourceCreateWithData(imageData as CFData, nil) else { + throw JPEGTranscodeError.decodeFailed + } + guard + let props = CGImageSourceCopyPropertiesAtIndex(src, 0, nil) as? [CFString: Any], + let rawWidth = props[kCGImagePropertyPixelWidth] as? NSNumber, + let rawHeight = props[kCGImagePropertyPixelHeight] as? NSNumber + else { + throw JPEGTranscodeError.propertiesMissing + } + + let pixelWidth = rawWidth.intValue + let pixelHeight = rawHeight.intValue + let orientation = (props[kCGImagePropertyOrientation] as? NSNumber)?.intValue ?? 1 + + guard pixelWidth > 0, pixelHeight > 0 else { + throw JPEGTranscodeError.propertiesMissing + } + + let rotates90 = orientation == 5 || orientation == 6 || orientation == 7 || orientation == 8 + let orientedWidth = rotates90 ? pixelHeight : pixelWidth + let orientedHeight = rotates90 ? pixelWidth : pixelHeight + + let maxDim = max(orientedWidth, orientedHeight) + let targetMaxPixelSize: Int = { + guard let maxWidthPx, maxWidthPx > 0 else { return maxDim } + guard orientedWidth > maxWidthPx else { return maxDim } // never upscale + + let scale = Double(maxWidthPx) / Double(orientedWidth) + return max(1, Int((Double(maxDim) * scale).rounded(.toNearestOrAwayFromZero))) + }() + + let thumbOpts: [CFString: Any] = [ + kCGImageSourceCreateThumbnailFromImageAlways: true, + kCGImageSourceCreateThumbnailWithTransform: true, + kCGImageSourceThumbnailMaxPixelSize: targetMaxPixelSize, + kCGImageSourceShouldCacheImmediately: true, + ] + + guard let img = CGImageSourceCreateThumbnailAtIndex(src, 0, thumbOpts as CFDictionary) else { + throw JPEGTranscodeError.decodeFailed + } + + let out = NSMutableData() + guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else { + throw JPEGTranscodeError.encodeFailed + } + let q = self.clampQuality(quality) + let encodeProps = [kCGImageDestinationLossyCompressionQuality: q] as CFDictionary + CGImageDestinationAddImage(dest, img, encodeProps) + guard CGImageDestinationFinalize(dest) else { + throw JPEGTranscodeError.encodeFailed + } + + return (out as Data, img.width, img.height) + } +} diff --git a/apps/shared/ClawdisKit/Tests/ClawdisKitTests/BonjourEscapesTests.swift b/apps/shared/ClawdisKit/Tests/ClawdisKitTests/BonjourEscapesTests.swift index f71a6dcd4..72be0d35c 100644 --- a/apps/shared/ClawdisKit/Tests/ClawdisKitTests/BonjourEscapesTests.swift +++ b/apps/shared/ClawdisKit/Tests/ClawdisKitTests/BonjourEscapesTests.swift @@ -1,28 +1,27 @@ import ClawdisKit -import XCTest +import Testing -final class BonjourEscapesTests: XCTestCase { - func testDecodePassThrough() { - XCTAssertEqual(BonjourEscapes.decode("hello"), "hello") - XCTAssertEqual(BonjourEscapes.decode(""), "") +@Suite struct BonjourEscapesTests { + @Test func decodePassThrough() { + #expect(BonjourEscapes.decode("hello") == "hello") + #expect(BonjourEscapes.decode("") == "") } - func testDecodeSpaces() { - XCTAssertEqual(BonjourEscapes.decode("Clawdis\\032Gateway"), "Clawdis Gateway") + @Test func decodeSpaces() { + #expect(BonjourEscapes.decode("Clawdis\\032Gateway") == "Clawdis Gateway") } - func testDecodeMultipleEscapes() { - XCTAssertEqual( - BonjourEscapes.decode("A\\038B\\047C\\032D"), - "A&B/C D") + @Test func decodeMultipleEscapes() { + #expect(BonjourEscapes.decode("A\\038B\\047C\\032D") == "A&B/C D") } - func testDecodeIgnoresInvalidEscapeSequences() { - XCTAssertEqual(BonjourEscapes.decode("Hello\\03World"), "Hello\\03World") - XCTAssertEqual(BonjourEscapes.decode("Hello\\XYZWorld"), "Hello\\XYZWorld") + @Test func decodeIgnoresInvalidEscapeSequences() { + #expect(BonjourEscapes.decode("Hello\\03World") == "Hello\\03World") + #expect(BonjourEscapes.decode("Hello\\XYZWorld") == "Hello\\XYZWorld") } - func testDecodeUsesDecimalUnicodeScalarValue() { - XCTAssertEqual(BonjourEscapes.decode("Hello\\065World"), "HelloAWorld") + @Test func decodeUsesDecimalUnicodeScalarValue() { + #expect(BonjourEscapes.decode("Hello\\065World") == "HelloAWorld") } } + diff --git a/apps/shared/ClawdisKit/Tests/ClawdisKitTests/JPEGTranscoderTests.swift b/apps/shared/ClawdisKit/Tests/ClawdisKitTests/JPEGTranscoderTests.swift new file mode 100644 index 000000000..9c8fcfbda --- /dev/null +++ b/apps/shared/ClawdisKit/Tests/ClawdisKitTests/JPEGTranscoderTests.swift @@ -0,0 +1,73 @@ +import ClawdisKit +import CoreGraphics +import ImageIO +import Testing +import UniformTypeIdentifiers + +@Suite struct JPEGTranscoderTests { + private func makeSolidJPEG(width: Int, height: Int, orientation: Int? = nil) throws -> Data { + let cs = CGColorSpaceCreateDeviceRGB() + let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue + guard + let ctx = CGContext( + data: nil, + width: width, + height: height, + bitsPerComponent: 8, + bytesPerRow: 0, + space: cs, + bitmapInfo: bitmapInfo) + else { + throw NSError(domain: "JPEGTranscoderTests", code: 1) + } + + ctx.setFillColor(red: 1, green: 0, blue: 0, alpha: 1) + ctx.fill(CGRect(x: 0, y: 0, width: width, height: height)) + guard let img = ctx.makeImage() else { + throw NSError(domain: "JPEGTranscoderTests", code: 5) + } + + let out = NSMutableData() + guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else { + throw NSError(domain: "JPEGTranscoderTests", code: 2) + } + + var props: [CFString: Any] = [ + kCGImageDestinationLossyCompressionQuality: 1.0, + ] + if let orientation { + props[kCGImagePropertyOrientation] = orientation + } + + CGImageDestinationAddImage(dest, img, props as CFDictionary) + guard CGImageDestinationFinalize(dest) else { + throw NSError(domain: "JPEGTranscoderTests", code: 3) + } + + return out as Data + } + + @Test func downscalesToMaxWidthPx() throws { + let input = try makeSolidJPEG(width: 2000, height: 1000) + let out = try JPEGTranscoder.transcodeToJPEG(imageData: input, maxWidthPx: 1600, quality: 0.9) + #expect(out.widthPx == 1600) + #expect(abs(out.heightPx - 800) <= 1) + #expect(out.data.count > 0) + } + + @Test func doesNotUpscaleWhenSmallerThanMaxWidthPx() throws { + let input = try makeSolidJPEG(width: 800, height: 600) + let out = try JPEGTranscoder.transcodeToJPEG(imageData: input, maxWidthPx: 1600, quality: 0.9) + #expect(out.widthPx == 800) + #expect(out.heightPx == 600) + } + + @Test func normalizesOrientationAndUsesOrientedWidthForMaxWidthPx() throws { + // Encode a landscape image but mark it rotated 90° (orientation 6). Oriented width becomes 1000. + let input = try makeSolidJPEG(width: 2000, height: 1000, orientation: 6) + let out = try JPEGTranscoder.transcodeToJPEG(imageData: input, maxWidthPx: 1600, quality: 0.9) + #expect(out.widthPx == 1000) + #expect(out.heightPx == 2000) + } +} + diff --git a/docs/camera.md b/docs/camera.md index 23fbab6e5..9157c2b8b 100644 --- a/docs/camera.md +++ b/docs/camera.md @@ -28,7 +28,7 @@ All camera access is gated behind **user-controlled settings**. - `camera.snap` - Params: - `facing`: `front|back` (default: `front`) - - `maxWidth`: number (optional) + - `maxWidth`: number (optional; default `1600` on the iOS node) - `quality`: `0..1` (optional; default `0.9`) - `format`: currently `jpg` - Response payload: @@ -112,6 +112,9 @@ clawdis-mac camera clip --duration-ms 3000 # prints MEDIA: clawdis-mac camera clip --no-audio ``` +Notes: +- `clawdis-mac camera snap` defaults to `maxWidth=1600` unless overridden. + ## Safety + practical limits - Camera and microphone access trigger the usual OS permission prompts (and require usage strings in Info.plist).