fix: cap camera snap payload size

This commit is contained in:
Peter Steinberger
2025-12-29 23:12:20 +01:00
parent a61b7056d5
commit 8f0c8a6561
8 changed files with 267 additions and 29 deletions

View File

@@ -9,6 +9,8 @@
- macOS menu: device list now shows connected nodes only.
- iOS node: fix ReplayKit screen recording crash caused by queue isolation assertions during capture.
- iOS/Android nodes: bridge auto-connect refreshes stale tokens and settings now show richer bridge/device details.
- iOS/Android nodes: status pill now surfaces camera activity instead of overlay toasts.
- iOS/Android nodes: camera snaps recompress to keep base64 payloads under 5 MB.
- CLI: avoid spurious gateway close errors after successful request/response cycles.
- Tests: add Swift Testing coverage for camera errors and Kotest coverage for Android bridge endpoints.

View File

@@ -28,6 +28,7 @@ import kotlinx.coroutines.withContext
import java.io.ByteArrayOutputStream
import java.io.File
import java.util.concurrent.Executor
import kotlin.math.roundToInt
import kotlin.coroutines.resume
import kotlin.coroutines.resumeWithException
@@ -99,14 +100,35 @@ class CameraCaptureManager(private val context: Context) {
decoded
}
val out = ByteArrayOutputStream()
val jpegQuality = (quality * 100.0).toInt().coerceIn(10, 100)
if (!scaled.compress(Bitmap.CompressFormat.JPEG, jpegQuality, out)) {
throw IllegalStateException("UNAVAILABLE: failed to encode JPEG")
}
val base64 = Base64.encodeToString(out.toByteArray(), Base64.NO_WRAP)
val maxPayloadBytes = 5 * 1024 * 1024
val maxEncodedBytes = (maxPayloadBytes / 4) * 3
val result =
JpegSizeLimiter.compressToLimit(
initialWidth = scaled.width,
initialHeight = scaled.height,
startQuality = (quality * 100.0).roundToInt().coerceIn(10, 100),
maxBytes = maxEncodedBytes,
encode = { width, height, q ->
val bitmap =
if (width == scaled.width && height == scaled.height) {
scaled
} else {
scaled.scale(width, height)
}
val out = ByteArrayOutputStream()
if (!bitmap.compress(Bitmap.CompressFormat.JPEG, q, out)) {
if (bitmap !== scaled) bitmap.recycle()
throw IllegalStateException("UNAVAILABLE: failed to encode JPEG")
}
if (bitmap !== scaled) {
bitmap.recycle()
}
out.toByteArray()
},
)
val base64 = Base64.encodeToString(result.bytes, Base64.NO_WRAP)
Payload(
"""{"format":"jpg","base64":"$base64","width":${scaled.width},"height":${scaled.height}}""",
"""{"format":"jpg","base64":"$base64","width":${result.width},"height":${result.height}}""",
)
}

View File

@@ -0,0 +1,61 @@
package com.steipete.clawdis.node.node
import kotlin.math.max
import kotlin.math.min
import kotlin.math.roundToInt
internal data class JpegSizeLimiterResult(
val bytes: ByteArray,
val width: Int,
val height: Int,
val quality: Int,
)
internal object JpegSizeLimiter {
fun compressToLimit(
initialWidth: Int,
initialHeight: Int,
startQuality: Int,
maxBytes: Int,
minQuality: Int = 20,
minSize: Int = 256,
scaleStep: Double = 0.85,
maxScaleAttempts: Int = 6,
maxQualityAttempts: Int = 6,
encode: (width: Int, height: Int, quality: Int) -> ByteArray,
): JpegSizeLimiterResult {
require(initialWidth > 0 && initialHeight > 0) { "Invalid image size" }
require(maxBytes > 0) { "Invalid maxBytes" }
var width = initialWidth
var height = initialHeight
val clampedStartQuality = startQuality.coerceIn(minQuality, 100)
var best = JpegSizeLimiterResult(bytes = encode(width, height, clampedStartQuality), width = width, height = height, quality = clampedStartQuality)
if (best.bytes.size <= maxBytes) return best
repeat(maxScaleAttempts) {
var quality = clampedStartQuality
repeat(maxQualityAttempts) {
val bytes = encode(width, height, quality)
best = JpegSizeLimiterResult(bytes = bytes, width = width, height = height, quality = quality)
if (bytes.size <= maxBytes) return best
if (quality <= minQuality) return@repeat
quality = max(minQuality, (quality * 0.75).roundToInt())
}
val minScale = (minSize.toDouble() / min(width, height).toDouble()).coerceAtMost(1.0)
val nextScale = max(scaleStep, minScale)
val nextWidth = max(minSize, (width * nextScale).roundToInt())
val nextHeight = max(minSize, (height * nextScale).roundToInt())
if (nextWidth == width && nextHeight == height) return@repeat
width = min(nextWidth, width)
height = min(nextHeight, height)
}
if (best.bytes.size > maxBytes) {
throw IllegalStateException("CAMERA_TOO_LARGE: ${best.bytes.size} bytes > $maxBytes bytes")
}
return best
}
}

View File

@@ -0,0 +1,47 @@
package com.steipete.clawdis.node.node
import org.junit.Assert.assertEquals
import org.junit.Assert.assertTrue
import org.junit.Test
import kotlin.math.min
class JpegSizeLimiterTest {
@Test
fun compressesLargePayloadsUnderLimit() {
val maxBytes = 5 * 1024 * 1024
val result =
JpegSizeLimiter.compressToLimit(
initialWidth = 4000,
initialHeight = 3000,
startQuality = 95,
maxBytes = maxBytes,
encode = { width, height, quality ->
val estimated = (width.toLong() * height.toLong() * quality.toLong()) / 100
val size = min(maxBytes.toLong() * 2, estimated).toInt()
ByteArray(size)
},
)
assertTrue(result.bytes.size <= maxBytes)
assertTrue(result.width <= 4000)
assertTrue(result.height <= 3000)
assertTrue(result.quality <= 95)
}
@Test
fun keepsSmallPayloadsAsIs() {
val maxBytes = 5 * 1024 * 1024
val result =
JpegSizeLimiter.compressToLimit(
initialWidth = 800,
initialHeight = 600,
startQuality = 90,
maxBytes = maxBytes,
encode = { _, _, _ -> ByteArray(120_000) },
)
assertEquals(800, result.width)
assertEquals(600, result.height)
assertEquals(90, result.quality)
}
}

View File

@@ -84,10 +84,14 @@ actor CameraController {
}
withExtendedLifetime(delegate) {}
let maxPayloadBytes = 5 * 1024 * 1024
// Base64 inflates payloads by ~4/3, so cap encoded bytes to keep payload <= 5MB.
let maxEncodedBytes = (maxPayloadBytes / 4) * 3
let res = try JPEGTranscoder.transcodeToJPEG(
imageData: rawData,
maxWidthPx: maxWidth,
quality: quality)
quality: quality,
maxBytes: maxEncodedBytes)
return (
format: format.rawValue,

View File

@@ -7,6 +7,7 @@ public enum JPEGTranscodeError: LocalizedError, Sendable {
case decodeFailed
case propertiesMissing
case encodeFailed
case sizeLimitExceeded(maxBytes: Int, actualBytes: Int)
public var errorDescription: String? {
switch self {
@@ -16,6 +17,8 @@ public enum JPEGTranscodeError: LocalizedError, Sendable {
"Failed to read image properties"
case .encodeFailed:
"Failed to encode JPEG"
case let .sizeLimitExceeded(maxBytes, actualBytes):
"JPEG exceeds size limit (\(actualBytes) bytes > \(maxBytes) bytes)"
}
}
}
@@ -32,7 +35,8 @@ public struct JPEGTranscoder: Sendable {
public static func transcodeToJPEG(
imageData: Data,
maxWidthPx: Int?,
quality: Double) throws -> (data: Data, widthPx: Int, heightPx: Int)
quality: Double,
maxBytes: Int? = nil) throws -> (data: Data, widthPx: Int, heightPx: Int)
{
guard let src = CGImageSourceCreateWithData(imageData as CFData, nil) else {
throw JPEGTranscodeError.decodeFailed
@@ -58,7 +62,7 @@ public struct JPEGTranscoder: Sendable {
let orientedHeight = rotates90 ? pixelWidth : pixelHeight
let maxDim = max(orientedWidth, orientedHeight)
let targetMaxPixelSize: Int = {
var targetMaxPixelSize: Int = {
guard let maxWidthPx, maxWidthPx > 0 else { return maxDim }
guard orientedWidth > maxWidthPx else { return maxDim } // never upscale
@@ -66,28 +70,66 @@ public struct JPEGTranscoder: Sendable {
return max(1, Int((Double(maxDim) * scale).rounded(.toNearestOrAwayFromZero)))
}()
let thumbOpts: [CFString: Any] = [
kCGImageSourceCreateThumbnailFromImageAlways: true,
kCGImageSourceCreateThumbnailWithTransform: true,
kCGImageSourceThumbnailMaxPixelSize: targetMaxPixelSize,
kCGImageSourceShouldCacheImmediately: true,
]
func encode(maxPixelSize: Int, quality: Double) throws -> (data: Data, widthPx: Int, heightPx: Int) {
let thumbOpts: [CFString: Any] = [
kCGImageSourceCreateThumbnailFromImageAlways: true,
kCGImageSourceCreateThumbnailWithTransform: true,
kCGImageSourceThumbnailMaxPixelSize: maxPixelSize,
kCGImageSourceShouldCacheImmediately: true,
]
guard let img = CGImageSourceCreateThumbnailAtIndex(src, 0, thumbOpts as CFDictionary) else {
throw JPEGTranscodeError.decodeFailed
guard let img = CGImageSourceCreateThumbnailAtIndex(src, 0, thumbOpts as CFDictionary) else {
throw JPEGTranscodeError.decodeFailed
}
let out = NSMutableData()
guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else {
throw JPEGTranscodeError.encodeFailed
}
let q = self.clampQuality(quality)
let encodeProps = [kCGImageDestinationLossyCompressionQuality: q] as CFDictionary
CGImageDestinationAddImage(dest, img, encodeProps)
guard CGImageDestinationFinalize(dest) else {
throw JPEGTranscodeError.encodeFailed
}
return (out as Data, img.width, img.height)
}
let out = NSMutableData()
guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else {
throw JPEGTranscodeError.encodeFailed
}
let q = self.clampQuality(quality)
let encodeProps = [kCGImageDestinationLossyCompressionQuality: q] as CFDictionary
CGImageDestinationAddImage(dest, img, encodeProps)
guard CGImageDestinationFinalize(dest) else {
throw JPEGTranscodeError.encodeFailed
guard let maxBytes, maxBytes > 0 else {
return try encode(maxPixelSize: targetMaxPixelSize, quality: quality)
}
return (out as Data, img.width, img.height)
let minQuality = max(0.2, self.clampQuality(quality) * 0.35)
let minPixelSize = 256
var best = try encode(maxPixelSize: targetMaxPixelSize, quality: quality)
if best.data.count <= maxBytes {
return best
}
for _ in 0..<6 {
var q = self.clampQuality(quality)
for _ in 0..<6 {
let candidate = try encode(maxPixelSize: targetMaxPixelSize, quality: q)
best = candidate
if candidate.data.count <= maxBytes {
return candidate
}
if q <= minQuality { break }
q = max(minQuality, q * 0.75)
}
let nextPixelSize = max(Int(Double(targetMaxPixelSize) * 0.85), minPixelSize)
if nextPixelSize == targetMaxPixelSize {
break
}
targetMaxPixelSize = nextPixelSize
}
if best.data.count > maxBytes {
throw JPEGTranscodeError.sizeLimitExceeded(maxBytes: maxBytes, actualBytes: best.data.count)
}
return best
}
}

View File

@@ -47,6 +47,52 @@ import UniformTypeIdentifiers
return out as Data
}
private func makeNoiseJPEG(width: Int, height: Int) throws -> Data {
let bytesPerPixel = 4
let byteCount = width * height * bytesPerPixel
var data = Data(count: byteCount)
let cs = CGColorSpaceCreateDeviceRGB()
let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue
let out = try data.withUnsafeMutableBytes { rawBuffer -> Data in
guard let base = rawBuffer.baseAddress?.assumingMemoryBound(to: UInt8.self) else {
throw NSError(domain: "JPEGTranscoderTests", code: 6)
}
for idx in 0..<byteCount {
base[idx] = UInt8.random(in: 0...255)
}
guard
let ctx = CGContext(
data: base,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: width * bytesPerPixel,
space: cs,
bitmapInfo: bitmapInfo)
else {
throw NSError(domain: "JPEGTranscoderTests", code: 7)
}
guard let img = ctx.makeImage() else {
throw NSError(domain: "JPEGTranscoderTests", code: 8)
}
let encoded = NSMutableData()
guard let dest = CGImageDestinationCreateWithData(encoded, UTType.jpeg.identifier as CFString, 1, nil) else {
throw NSError(domain: "JPEGTranscoderTests", code: 9)
}
CGImageDestinationAddImage(dest, img, nil)
guard CGImageDestinationFinalize(dest) else {
throw NSError(domain: "JPEGTranscoderTests", code: 10)
}
return encoded as Data
}
return out
}
@Test func downscalesToMaxWidthPx() throws {
let input = try makeSolidJPEG(width: 2000, height: 1000)
let out = try JPEGTranscoder.transcodeToJPEG(imageData: input, maxWidthPx: 1600, quality: 0.9)
@@ -69,5 +115,14 @@ import UniformTypeIdentifiers
#expect(out.widthPx == 1000)
#expect(out.heightPx == 2000)
}
}
@Test func respectsMaxBytes() throws {
let input = try makeNoiseJPEG(width: 1600, height: 1200)
let out = try JPEGTranscoder.transcodeToJPEG(
imageData: input,
maxWidthPx: 1600,
quality: 0.95,
maxBytes: 180_000)
#expect(out.data.count <= 180_000)
}
}

View File

@@ -35,6 +35,7 @@ All camera access is gated behind **user-controlled settings**.
- `format: "jpg"`
- `base64: "<...>"`
- `width`, `height`
- Payload guard: photos are recompressed to keep the base64 payload under 5 MB.
- `camera.clip`
- Params:
@@ -90,6 +91,10 @@ If permissions are missing, the app will prompt when possible; if denied, `camer
Like `canvas.*`, the Android node only allows `camera.*` commands in the **foreground**. Background invocations return `NODE_BACKGROUND_UNAVAILABLE`.
### Payload guard
Photos are recompressed to keep the base64 payload under 5 MB.
## macOS app
### User setting (default off)