fix: align camera payload caps

This commit is contained in:
Peter Steinberger
2025-12-29 23:20:35 +01:00
parent 6e83f95c83
commit 6927b0fb8d
7 changed files with 17 additions and 3 deletions

View File

@@ -2,6 +2,9 @@
## 2.0.0-beta5 — Unreleased ## 2.0.0-beta5 — Unreleased
### Features
- Talk mode: continuous speech conversations (macOS/iOS/Android) with ElevenLabs TTS, reply directives, and optional interrupt-on-speech.
### Fixes ### Fixes
- macOS: Voice Wake now fully tears down the Speech pipeline when disabled (cancel pending restarts, drop stale callbacks) to avoid high CPU in the background. - macOS: Voice Wake now fully tears down the Speech pipeline when disabled (cancel pending restarts, drop stale callbacks) to avoid high CPU in the background.
- iOS/Android nodes: enable scrolling for loaded web pages in the Canvas WebView (default scaffold stays touch-first). - iOS/Android nodes: enable scrolling for loaded web pages in the Canvas WebView (default scaffold stays touch-first).
@@ -10,7 +13,7 @@
- iOS node: fix ReplayKit screen recording crash caused by queue isolation assertions during capture. - iOS node: fix ReplayKit screen recording crash caused by queue isolation assertions during capture.
- iOS/Android nodes: bridge auto-connect refreshes stale tokens and settings now show richer bridge/device details. - iOS/Android nodes: bridge auto-connect refreshes stale tokens and settings now show richer bridge/device details.
- iOS/Android nodes: status pill now surfaces camera activity instead of overlay toasts. - iOS/Android nodes: status pill now surfaces camera activity instead of overlay toasts.
- iOS/Android nodes: camera snaps recompress to keep base64 payloads under 5 MB. - iOS/Android/macOS nodes: camera snaps recompress to keep base64 payloads under 5 MB.
- CLI: avoid spurious gateway close errors after successful request/response cycles. - CLI: avoid spurious gateway close errors after successful request/response cycles.
- Agent runtime: clamp tool-result images to the 5MB Anthropic limit to avoid hard request rejections. - Agent runtime: clamp tool-result images to the 5MB Anthropic limit to avoid hard request rejections.
- Tests: add Swift Testing coverage for camera errors and Kotest coverage for Android bridge endpoints. - Tests: add Swift Testing coverage for camera errors and Kotest coverage for Android bridge endpoints.

View File

@@ -101,6 +101,7 @@ class CameraCaptureManager(private val context: Context) {
} }
val maxPayloadBytes = 5 * 1024 * 1024 val maxPayloadBytes = 5 * 1024 * 1024
// Base64 inflates payloads by ~4/3; cap encoded bytes so the payload stays under 5MB (API limit).
val maxEncodedBytes = (maxPayloadBytes / 4) * 3 val maxEncodedBytes = (maxPayloadBytes / 4) * 3
val result = val result =
JpegSizeLimiter.compressToLimit( JpegSizeLimiter.compressToLimit(

View File

@@ -67,6 +67,7 @@ fun RootScreen(viewModel: MainViewModel) {
val cameraFlashToken by viewModel.cameraFlashToken.collectAsState() val cameraFlashToken by viewModel.cameraFlashToken.collectAsState()
val activity = val activity =
remember(cameraHud) { remember(cameraHud) {
// Status pill owns transient capture state so it doesn't overlap the connection indicator.
cameraHud?.let { hud -> cameraHud?.let { hud ->
when (hud.kind) { when (hud.kind) {
CameraHudKind.Photo -> CameraHudKind.Photo ->

View File

@@ -85,7 +85,7 @@ actor CameraController {
withExtendedLifetime(delegate) {} withExtendedLifetime(delegate) {}
let maxPayloadBytes = 5 * 1024 * 1024 let maxPayloadBytes = 5 * 1024 * 1024
// Base64 inflates payloads by ~4/3, so cap encoded bytes to keep payload <= 5MB. // Base64 inflates payloads by ~4/3; cap encoded bytes so the payload stays under 5MB (API limit).
let maxEncodedBytes = (maxPayloadBytes / 4) * 3 let maxEncodedBytes = (maxPayloadBytes / 4) * 3
let res = try JPEGTranscoder.transcodeToJPEG( let res = try JPEGTranscoder.transcodeToJPEG(
imageData: rawData, imageData: rawData,

View File

@@ -173,6 +173,7 @@ private struct CanvasContent: View {
} }
private var statusActivity: StatusPill.Activity? { private var statusActivity: StatusPill.Activity? {
// Status pill owns transient capture state so it doesn't overlap the connection indicator.
guard let cameraHUDText, !cameraHUDText.isEmpty, let cameraHUDKind else { return nil } guard let cameraHUDText, !cameraHUDText.isEmpty, let cameraHUDKind else { return nil }
let systemImage: String let systemImage: String
let tint: Color? let tint: Color?

View File

@@ -79,7 +79,14 @@ actor CameraCaptureService {
} }
withExtendedLifetime(delegate) {} withExtendedLifetime(delegate) {}
let res = try JPEGTranscoder.transcodeToJPEG(imageData: rawData, maxWidthPx: maxWidth, quality: quality) let maxPayloadBytes = 5 * 1024 * 1024
// Base64 inflates payloads by ~4/3; cap encoded bytes so the payload stays under 5MB (API limit).
let maxEncodedBytes = (maxPayloadBytes / 4) * 3
let res = try JPEGTranscoder.transcodeToJPEG(
imageData: rawData,
maxWidthPx: maxWidth,
quality: quality,
maxBytes: maxEncodedBytes)
return (data: res.data, size: CGSize(width: res.widthPx, height: res.heightPx)) return (data: res.data, size: CGSize(width: res.widthPx, height: res.heightPx))
} }

View File

@@ -121,6 +121,7 @@ clawdis nodes camera clip --node <id> --no-audio
Notes: Notes:
- `clawdis nodes camera snap` defaults to `maxWidth=1600` unless overridden. - `clawdis nodes camera snap` defaults to `maxWidth=1600` unless overridden.
- Photo payloads are recompressed to keep base64 under 5 MB.
## Safety + practical limits ## Safety + practical limits