feat: native image injection for vision-capable models
- Auto-detect and load images referenced in user prompts - Inject history images at their original message positions - Fix EXIF orientation - rotate before resizing in resizeToJpeg - Sandbox security: validate paths, block remote URLs when sandbox enabled - Prevent duplicate history image injection across turns - Handle string-based user message content (convert to array) - Add bounds check for message index in history processing - Fix regex to properly match relative paths (./ ../) - Add multi-image support for iMessage attachments - Pass MAX_IMAGE_BYTES limit to image loading Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
committed by
Peter Steinberger
parent
f7123ec30a
commit
8d74578ceb
@@ -41,12 +41,15 @@ export function createClawdbotTools(options?: {
|
||||
replyToMode?: "off" | "first" | "all";
|
||||
/** Mutable ref to track if a reply was sent (for "first" mode). */
|
||||
hasRepliedRef?: { value: boolean };
|
||||
/** If true, the model has native vision capability */
|
||||
modelHasVision?: boolean;
|
||||
}): AnyAgentTool[] {
|
||||
const imageTool = options?.agentDir?.trim()
|
||||
? createImageTool({
|
||||
config: options?.config,
|
||||
agentDir: options.agentDir,
|
||||
sandboxRoot: options?.sandboxRoot,
|
||||
modelHasVision: options?.modelHasVision,
|
||||
})
|
||||
: null;
|
||||
const webSearchTool = createWebSearchTool({
|
||||
|
||||
@@ -2,7 +2,7 @@ import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import type { AssistantMessage } from "@mariozechner/pi-ai";
|
||||
import type { AssistantMessage, ImageContent } from "@mariozechner/pi-ai";
|
||||
import { streamSimple } from "@mariozechner/pi-ai";
|
||||
import { createAgentSession, SessionManager, SettingsManager } from "@mariozechner/pi-coding-agent";
|
||||
|
||||
@@ -69,7 +69,9 @@ import { resolveSandboxRuntimeStatus } from "../../sandbox/runtime-status.js";
|
||||
import { isTimeoutError } from "../../failover-error.js";
|
||||
import { getGlobalHookRunner } from "../../../plugins/hook-runner-global.js";
|
||||
|
||||
import { MAX_IMAGE_BYTES } from "../../../media/constants.js";
|
||||
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
|
||||
import { detectAndLoadPromptImages } from "./images.js";
|
||||
|
||||
export async function runEmbeddedAttempt(
|
||||
params: EmbeddedRunAttemptParams,
|
||||
@@ -133,6 +135,9 @@ export async function runEmbeddedAttempt(
|
||||
|
||||
const agentDir = params.agentDir ?? resolveClawdbotAgentDir();
|
||||
|
||||
// Check if the model supports native image input
|
||||
const modelHasVision = params.model.input?.includes("image") ?? false;
|
||||
|
||||
const toolsRaw = createClawdbotCodingTools({
|
||||
exec: {
|
||||
...params.execOverrides,
|
||||
@@ -153,6 +158,7 @@ export async function runEmbeddedAttempt(
|
||||
currentThreadTs: params.currentThreadTs,
|
||||
replyToMode: params.replyToMode,
|
||||
hasRepliedRef: params.hasRepliedRef,
|
||||
modelHasVision,
|
||||
});
|
||||
const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider: params.provider });
|
||||
logToolSchemasForGoogle({ tools, provider: params.provider });
|
||||
@@ -530,7 +536,60 @@ export async function runEmbeddedAttempt(
|
||||
}
|
||||
|
||||
try {
|
||||
await abortable(activeSession.prompt(effectivePrompt, { images: params.images }));
|
||||
// Detect and load images referenced in the prompt for vision-capable models.
|
||||
// This eliminates the need for an explicit "view" tool call by injecting
|
||||
// images directly into the prompt when the model supports it.
|
||||
// Also scans conversation history to enable follow-up questions about earlier images.
|
||||
const imageResult = await detectAndLoadPromptImages({
|
||||
prompt: effectivePrompt,
|
||||
workspaceDir: effectiveWorkspace,
|
||||
model: params.model,
|
||||
existingImages: params.images,
|
||||
historyMessages: activeSession.messages,
|
||||
maxBytes: MAX_IMAGE_BYTES,
|
||||
// Enforce sandbox path restrictions when sandbox is enabled
|
||||
sandboxRoot: sandbox?.enabled ? sandbox.workspaceDir : undefined,
|
||||
});
|
||||
|
||||
// Inject history images into their original message positions.
|
||||
// This ensures the model sees images in context (e.g., "compare to the first image").
|
||||
if (imageResult.historyImagesByIndex.size > 0) {
|
||||
for (const [msgIndex, images] of imageResult.historyImagesByIndex) {
|
||||
// Bounds check: ensure index is valid before accessing
|
||||
if (msgIndex < 0 || msgIndex >= activeSession.messages.length) continue;
|
||||
const msg = activeSession.messages[msgIndex];
|
||||
if (msg && msg.role === "user") {
|
||||
// Convert string content to array format if needed
|
||||
if (typeof msg.content === "string") {
|
||||
msg.content = [{ type: "text", text: msg.content }];
|
||||
}
|
||||
if (Array.isArray(msg.content)) {
|
||||
// Check for existing image content to avoid duplicates across turns
|
||||
const existingImageData = new Set(
|
||||
msg.content
|
||||
.filter((c): c is ImageContent =>
|
||||
c != null && typeof c === "object" && c.type === "image" && typeof c.data === "string",
|
||||
)
|
||||
.map((c) => c.data),
|
||||
);
|
||||
for (const img of images) {
|
||||
// Only add if this image isn't already in the message
|
||||
if (!existingImageData.has(img.data)) {
|
||||
msg.content.push(img);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only pass images option if there are actually images to pass
|
||||
// This avoids potential issues with models that don't expect the images parameter
|
||||
if (imageResult.images.length > 0) {
|
||||
await abortable(activeSession.prompt(effectivePrompt, { images: imageResult.images }));
|
||||
} else {
|
||||
await abortable(activeSession.prompt(effectivePrompt));
|
||||
}
|
||||
} catch (err) {
|
||||
promptError = err;
|
||||
} finally {
|
||||
|
||||
218
src/agents/pi-embedded-runner/run/images.test.ts
Normal file
218
src/agents/pi-embedded-runner/run/images.test.ts
Normal file
@@ -0,0 +1,218 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { detectImageReferences, modelSupportsImages } from "./images.js";
|
||||
|
||||
describe("detectImageReferences", () => {
|
||||
it("detects absolute file paths with common extensions", () => {
|
||||
const prompt = "Check this image /path/to/screenshot.png and tell me what you see";
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]).toEqual({
|
||||
raw: "/path/to/screenshot.png",
|
||||
type: "path",
|
||||
resolved: "/path/to/screenshot.png",
|
||||
});
|
||||
});
|
||||
|
||||
it("detects relative paths starting with ./", () => {
|
||||
const prompt = "Look at ./images/photo.jpg";
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.raw).toBe("./images/photo.jpg");
|
||||
expect(refs[0]?.type).toBe("path");
|
||||
});
|
||||
|
||||
it("detects relative paths starting with ../", () => {
|
||||
const prompt = "The file is at ../screenshots/test.jpeg";
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.raw).toBe("../screenshots/test.jpeg");
|
||||
expect(refs[0]?.type).toBe("path");
|
||||
});
|
||||
|
||||
it("detects home directory paths starting with ~/", () => {
|
||||
const prompt = "My photo is at ~/Pictures/vacation.png";
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.raw).toBe("~/Pictures/vacation.png");
|
||||
expect(refs[0]?.type).toBe("path");
|
||||
// Resolved path should expand ~
|
||||
expect(refs[0]?.resolved).not.toContain("~");
|
||||
});
|
||||
|
||||
it("detects HTTP URLs with image extensions", () => {
|
||||
const prompt = "Check this URL: https://mysite.com/images/logo.png";
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]).toEqual({
|
||||
raw: "https://mysite.com/images/logo.png",
|
||||
type: "url",
|
||||
resolved: "https://mysite.com/images/logo.png",
|
||||
});
|
||||
});
|
||||
|
||||
it("detects HTTPS URLs with query parameters", () => {
|
||||
const prompt = "Image from https://cdn.mysite.com/img.jpg?size=large&v=2";
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.type).toBe("url");
|
||||
expect(refs[0]?.raw).toContain("https://cdn.mysite.com/img.jpg");
|
||||
});
|
||||
|
||||
it("detects multiple image references in a prompt", () => {
|
||||
const prompt = `
|
||||
Compare these two images:
|
||||
1. /home/user/photo1.png
|
||||
2. https://mysite.com/photo2.jpg
|
||||
`;
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(2);
|
||||
expect(refs.some((r) => r.type === "path")).toBe(true);
|
||||
expect(refs.some((r) => r.type === "url")).toBe(true);
|
||||
});
|
||||
|
||||
it("handles various image extensions", () => {
|
||||
const extensions = ["png", "jpg", "jpeg", "gif", "webp", "bmp", "tiff", "heic"];
|
||||
for (const ext of extensions) {
|
||||
const prompt = `Image: /test/image.${ext}`;
|
||||
const refs = detectImageReferences(prompt);
|
||||
expect(refs.length).toBeGreaterThanOrEqual(1);
|
||||
expect(refs[0]?.raw).toContain(`.${ext}`);
|
||||
}
|
||||
});
|
||||
|
||||
it("deduplicates repeated image references", () => {
|
||||
const prompt = "Look at /path/image.png and also /path/image.png again";
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("returns empty array when no images found", () => {
|
||||
const prompt = "Just some text without any image references";
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("ignores non-image file extensions", () => {
|
||||
const prompt = "Check /path/to/document.pdf and /code/file.ts";
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("handles paths inside quotes (without spaces)", () => {
|
||||
const prompt = 'The file is at "/path/to/image.png"';
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.raw).toBe("/path/to/image.png");
|
||||
});
|
||||
|
||||
it("handles paths in parentheses", () => {
|
||||
const prompt = "See the image (./screenshot.png) for details";
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.raw).toBe("./screenshot.png");
|
||||
});
|
||||
|
||||
it("detects [Image: source: ...] format from messaging systems", () => {
|
||||
const prompt = `What does this image show?
|
||||
[Image: source: /Users/tyleryust/Library/Messages/Attachments/IMG_0043.jpeg]`;
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.raw).toBe("/Users/tyleryust/Library/Messages/Attachments/IMG_0043.jpeg");
|
||||
expect(refs[0]?.type).toBe("path");
|
||||
});
|
||||
|
||||
it("handles complex message attachment paths", () => {
|
||||
const prompt = `[Image: source: /Users/tyleryust/Library/Messages/Attachments/23/03/AA4726EA-DB27-4269-BA56-1436936CC134/5E3E286A-F585-4E5E-9043-5BC2AFAFD81BIMG_0043.jpeg]`;
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.resolved).toContain("IMG_0043.jpeg");
|
||||
});
|
||||
|
||||
it("detects multiple images in [media attached: ...] format", () => {
|
||||
// Multi-file format uses separate brackets on separate lines
|
||||
const prompt = `[media attached: 2 files]
|
||||
[media attached 1/2: /Users/tyleryust/.clawdbot/media/IMG_6430.jpeg (image/jpeg)]
|
||||
[media attached 2/2: /Users/tyleryust/.clawdbot/media/IMG_6431.jpeg (image/jpeg)]
|
||||
what about these images?`;
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(2);
|
||||
expect(refs[0]?.resolved).toContain("IMG_6430.jpeg");
|
||||
expect(refs[1]?.resolved).toContain("IMG_6431.jpeg");
|
||||
});
|
||||
|
||||
it("does not double-count path and url in same bracket", () => {
|
||||
// Single file with URL (| separates path from url, not multiple files)
|
||||
const prompt = `[media attached: /cache/IMG_6430.jpeg (image/jpeg) | /cache/IMG_6430.jpeg]`;
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.resolved).toContain("IMG_6430.jpeg");
|
||||
});
|
||||
|
||||
it("skips example.com URLs as they are documentation examples", () => {
|
||||
const prompt = `To send an image: MEDIA:https://example.com/image.jpg
|
||||
Here is my actual image: /path/to/real.png`;
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.raw).toBe("/path/to/real.png");
|
||||
});
|
||||
|
||||
it("handles single file format with URL (no index)", () => {
|
||||
const prompt = `[media attached: /cache/photo.jpeg (image/jpeg) | https://example.com/url]
|
||||
what is this?`;
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.resolved).toContain("photo.jpeg");
|
||||
});
|
||||
|
||||
it("handles paths with spaces in filename", () => {
|
||||
// URL after | is https, not a local path, so only the local path should be detected
|
||||
const prompt = `[media attached: /Users/test/.clawdbot/media/ChatGPT Image Apr 21, 2025.png (image/png) | https://example.com/same.png]
|
||||
what is this?`;
|
||||
const refs = detectImageReferences(prompt);
|
||||
|
||||
// Only 1 ref - the local path (example.com URLs are skipped)
|
||||
expect(refs).toHaveLength(1);
|
||||
expect(refs[0]?.resolved).toContain("ChatGPT Image Apr 21, 2025.png");
|
||||
});
|
||||
});
|
||||
|
||||
describe("modelSupportsImages", () => {
|
||||
it("returns true when model input includes image", () => {
|
||||
const model = { input: ["text", "image"] };
|
||||
expect(modelSupportsImages(model)).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false when model input does not include image", () => {
|
||||
const model = { input: ["text"] };
|
||||
expect(modelSupportsImages(model)).toBe(false);
|
||||
});
|
||||
|
||||
it("returns false when model input is undefined", () => {
|
||||
const model = {};
|
||||
expect(modelSupportsImages(model)).toBe(false);
|
||||
});
|
||||
|
||||
it("returns false when model input is empty", () => {
|
||||
const model = { input: [] };
|
||||
expect(modelSupportsImages(model)).toBe(false);
|
||||
});
|
||||
});
|
||||
379
src/agents/pi-embedded-runner/run/images.ts
Normal file
379
src/agents/pi-embedded-runner/run/images.ts
Normal file
@@ -0,0 +1,379 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
import type { ImageContent } from "@mariozechner/pi-ai";
|
||||
|
||||
import { assertSandboxPath } from "../../sandbox-paths.js";
|
||||
import { extractTextFromMessage } from "../../../tui/tui-formatters.js";
|
||||
import { loadWebMedia } from "../../../web/media.js";
|
||||
import { resolveUserPath } from "../../../utils.js";
|
||||
import { log } from "../logger.js";
|
||||
|
||||
/**
|
||||
* Common image file extensions for detection.
|
||||
*/
|
||||
const IMAGE_EXTENSIONS = new Set([
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".gif",
|
||||
".webp",
|
||||
".bmp",
|
||||
".tiff",
|
||||
".tif",
|
||||
".heic",
|
||||
".heif",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Result of detecting an image reference in text.
|
||||
*/
|
||||
export interface DetectedImageRef {
|
||||
/** The raw matched string from the prompt */
|
||||
raw: string;
|
||||
/** The type of reference (path or url) */
|
||||
type: "path" | "url";
|
||||
/** The resolved/normalized path or URL */
|
||||
resolved: string;
|
||||
/** Index of the message this ref was found in (for history images) */
|
||||
messageIndex?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a file extension indicates an image file.
|
||||
*/
|
||||
function isImageExtension(filePath: string): boolean {
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
return IMAGE_EXTENSIONS.has(ext);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects image references in a user prompt.
|
||||
*
|
||||
* Patterns detected:
|
||||
* - Absolute paths: /path/to/image.png
|
||||
* - Relative paths: ./image.png, ../images/photo.jpg
|
||||
* - Home paths: ~/Pictures/screenshot.png
|
||||
* - HTTP(S) URLs: https://example.com/image.png
|
||||
* - Message attachments: [Image: source: /path/to/image.jpg]
|
||||
*
|
||||
* @param prompt The user prompt text to scan
|
||||
* @returns Array of detected image references
|
||||
*/
|
||||
export function detectImageReferences(prompt: string): DetectedImageRef[] {
|
||||
const refs: DetectedImageRef[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
// Helper to add a path ref
|
||||
const addPathRef = (raw: string) => {
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed || seen.has(trimmed.toLowerCase())) return;
|
||||
if (!isImageExtension(trimmed)) return;
|
||||
seen.add(trimmed.toLowerCase());
|
||||
const resolved = trimmed.startsWith("~") ? resolveUserPath(trimmed) : trimmed;
|
||||
refs.push({ raw: trimmed, type: "path", resolved });
|
||||
};
|
||||
|
||||
// Pattern for [media attached: path (type) | url] or [media attached N/M: path (type) | url] format
|
||||
// Each bracket = ONE file. The | separates path from URL, not multiple files.
|
||||
// Multi-file format uses separate brackets on separate lines.
|
||||
const mediaAttachedPattern = /\[media attached(?:\s+\d+\/\d+)?:\s*([^\]]+)\]/gi;
|
||||
let match: RegExpExecArray | null;
|
||||
while ((match = mediaAttachedPattern.exec(prompt)) !== null) {
|
||||
const content = match[1];
|
||||
|
||||
// Skip "[media attached: N files]" header lines
|
||||
if (/^\d+\s+files?$/i.test(content.trim())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract path before the (mime/type) or | delimiter
|
||||
// Format is: path (type) | url OR just: path (type)
|
||||
// Path may contain spaces (e.g., "ChatGPT Image Apr 21.png")
|
||||
// Use non-greedy .+? to stop at first image extension
|
||||
const pathMatch = content.match(
|
||||
/^\s*(.+?\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))\s*(?:\(|$|\|)/i,
|
||||
);
|
||||
if (pathMatch?.[1]) {
|
||||
addPathRef(pathMatch[1].trim());
|
||||
}
|
||||
}
|
||||
|
||||
// Pattern for [Image: source: /path/...] format from messaging systems
|
||||
const messageImagePattern = /\[Image:\s*source:\s*([^\]]+\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))\]/gi;
|
||||
while ((match = messageImagePattern.exec(prompt)) !== null) {
|
||||
const raw = match[1]?.trim();
|
||||
if (raw) addPathRef(raw);
|
||||
}
|
||||
|
||||
// Pattern for HTTP(S) URLs ending in image extensions
|
||||
// Skip example.com URLs as they're often just documentation examples
|
||||
const urlPattern =
|
||||
/https?:\/\/[^\s<>"'`\]]+\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif)(?:\?[^\s<>"'`\]]*)?/gi;
|
||||
while ((match = urlPattern.exec(prompt)) !== null) {
|
||||
const raw = match[0];
|
||||
// Skip example.com URLs - they're documentation, not real images
|
||||
if (raw.includes("example.com")) continue;
|
||||
if (seen.has(raw.toLowerCase())) continue;
|
||||
seen.add(raw.toLowerCase());
|
||||
refs.push({ raw, type: "url", resolved: raw });
|
||||
}
|
||||
|
||||
// Pattern for file paths (absolute, relative, or home)
|
||||
// Matches:
|
||||
// - /absolute/path/to/file.ext (including paths with special chars like Messages/Attachments)
|
||||
// - ./relative/path.ext
|
||||
// - ../parent/path.ext
|
||||
// - ~/home/path.ext
|
||||
const pathPattern = /(?:^|\s|["'`(])((\.\.?\/|[~/])[^\s"'`()[\]]*\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))/gi;
|
||||
while ((match = pathPattern.exec(prompt)) !== null) {
|
||||
const raw = match[1] || match[0];
|
||||
addPathRef(raw);
|
||||
}
|
||||
|
||||
return refs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads an image from a file path or URL and returns it as ImageContent.
|
||||
*
|
||||
* @param ref The detected image reference
|
||||
* @param workspaceDir The current workspace directory for resolving relative paths
|
||||
* @param options Optional settings for sandbox and size limits
|
||||
* @returns The loaded image content, or null if loading failed
|
||||
*/
|
||||
export async function loadImageFromRef(
|
||||
ref: DetectedImageRef,
|
||||
workspaceDir: string,
|
||||
options?: {
|
||||
maxBytes?: number;
|
||||
/** If set, enforce that file paths are within this sandbox root */
|
||||
sandboxRoot?: string;
|
||||
},
|
||||
): Promise<ImageContent | null> {
|
||||
try {
|
||||
let targetPath = ref.resolved;
|
||||
|
||||
// When sandbox is enabled, block remote URL loading to maintain network boundary
|
||||
if (ref.type === "url" && options?.sandboxRoot) {
|
||||
log.debug(`Native image: rejecting remote URL in sandboxed session: ${ref.resolved}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// For file paths, resolve relative to the appropriate root:
|
||||
// - When sandbox is enabled, resolve relative to sandboxRoot for security
|
||||
// - Otherwise, resolve relative to workspaceDir
|
||||
if (ref.type === "path" && !path.isAbsolute(targetPath)) {
|
||||
const resolveRoot = options?.sandboxRoot ?? workspaceDir;
|
||||
targetPath = path.resolve(resolveRoot, targetPath);
|
||||
}
|
||||
|
||||
// Enforce sandbox restrictions if sandboxRoot is set
|
||||
if (ref.type === "path" && options?.sandboxRoot) {
|
||||
try {
|
||||
const validated = await assertSandboxPath({
|
||||
filePath: targetPath,
|
||||
cwd: options.sandboxRoot,
|
||||
root: options.sandboxRoot,
|
||||
});
|
||||
targetPath = validated.resolved;
|
||||
} catch (err) {
|
||||
// Log the actual error for debugging (sandbox violation or other path error)
|
||||
log.debug(`Native image: sandbox validation failed for ${ref.resolved}: ${err instanceof Error ? err.message : String(err)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Check file exists for local paths
|
||||
if (ref.type === "path") {
|
||||
try {
|
||||
await fs.stat(targetPath);
|
||||
} catch {
|
||||
log.debug(`Native image: file not found: ${targetPath}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// loadWebMedia handles both file paths and HTTP(S) URLs
|
||||
const media = await loadWebMedia(targetPath, options?.maxBytes);
|
||||
|
||||
if (media.kind !== "image") {
|
||||
log.debug(`Native image: not an image file: ${targetPath} (got ${media.kind})`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// EXIF orientation is already normalized by loadWebMedia -> resizeToJpeg
|
||||
const mimeType = media.contentType ?? "image/png";
|
||||
const data = media.buffer.toString("base64");
|
||||
|
||||
return { type: "image", data, mimeType };
|
||||
} catch (err) {
|
||||
// Log the actual error for debugging (size limits, network failures, etc.)
|
||||
log.debug(`Native image: failed to load ${ref.resolved}: ${err instanceof Error ? err.message : String(err)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a model supports image input based on its input capabilities.
|
||||
*
|
||||
* @param model The model object with input capability array
|
||||
* @returns True if the model supports image input
|
||||
*/
|
||||
export function modelSupportsImages(model: { input?: string[] }): boolean {
|
||||
return model.input?.includes("image") ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts image references from conversation history messages.
|
||||
* Scans user messages for image paths/URLs that can be loaded.
|
||||
* Each ref includes the messageIndex so images can be injected at their original location.
|
||||
*
|
||||
* Note: Global deduplication is intentional - if the same image appears in multiple
|
||||
* messages, we only inject it at the FIRST occurrence. This is sufficient because:
|
||||
* 1. The model sees all message content including the image
|
||||
* 2. Later references to "the image" or "that picture" will work since it's in context
|
||||
* 3. Injecting duplicates would waste tokens and potentially hit size limits
|
||||
*/
|
||||
function detectImagesFromHistory(messages: unknown[]): DetectedImageRef[] {
|
||||
const allRefs: DetectedImageRef[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
const msg = messages[i];
|
||||
if (!msg || typeof msg !== "object") continue;
|
||||
const message = msg as { role?: string };
|
||||
// Only scan user messages for image references
|
||||
if (message.role !== "user") continue;
|
||||
|
||||
const text = extractTextFromMessage(msg);
|
||||
if (!text) continue;
|
||||
|
||||
const refs = detectImageReferences(text);
|
||||
for (const ref of refs) {
|
||||
const key = ref.resolved.toLowerCase();
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
allRefs.push({ ...ref, messageIndex: i });
|
||||
}
|
||||
}
|
||||
|
||||
return allRefs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects and loads images referenced in a prompt for models with vision capability.
|
||||
*
|
||||
* This function scans the prompt for image references (file paths and URLs),
|
||||
* loads them, and returns them as ImageContent array ready to be passed to
|
||||
* the model's prompt method.
|
||||
*
|
||||
* Also scans conversation history for images from previous turns and returns
|
||||
* them mapped by message index so they can be injected at their original location.
|
||||
*
|
||||
* @param params Configuration for image detection and loading
|
||||
* @returns Object with loaded images for current prompt and history images by message index
|
||||
*/
|
||||
export async function detectAndLoadPromptImages(params: {
|
||||
prompt: string;
|
||||
workspaceDir: string;
|
||||
model: { input?: string[] };
|
||||
existingImages?: ImageContent[];
|
||||
historyMessages?: unknown[];
|
||||
maxBytes?: number;
|
||||
/** If set, enforce that file paths are within this sandbox root */
|
||||
sandboxRoot?: string;
|
||||
}): Promise<{
|
||||
/** Images for the current prompt (existingImages + detected in current prompt) */
|
||||
images: ImageContent[];
|
||||
/** Images from history messages, keyed by message index */
|
||||
historyImagesByIndex: Map<number, ImageContent[]>;
|
||||
detectedRefs: DetectedImageRef[];
|
||||
loadedCount: number;
|
||||
skippedCount: number;
|
||||
}> {
|
||||
// If model doesn't support images, return empty results
|
||||
if (!modelSupportsImages(params.model)) {
|
||||
return {
|
||||
images: params.existingImages ?? [],
|
||||
historyImagesByIndex: new Map(),
|
||||
detectedRefs: [],
|
||||
loadedCount: 0,
|
||||
skippedCount: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Detect images from current prompt
|
||||
const promptRefs = detectImageReferences(params.prompt);
|
||||
|
||||
// Detect images from conversation history (with message indices)
|
||||
const historyRefs = params.historyMessages
|
||||
? detectImagesFromHistory(params.historyMessages)
|
||||
: [];
|
||||
|
||||
// Deduplicate: if an image is in the current prompt, don't also load it from history.
|
||||
// Current prompt images are passed via the `images` parameter to prompt(), while history
|
||||
// images are injected into their original message positions. We don't want the same
|
||||
// image loaded and sent twice (wasting tokens and potentially causing confusion).
|
||||
const seenPaths = new Set(promptRefs.map((r) => r.resolved.toLowerCase()));
|
||||
const uniqueHistoryRefs = historyRefs.filter(
|
||||
(r) => !seenPaths.has(r.resolved.toLowerCase()),
|
||||
);
|
||||
|
||||
const allRefs = [...promptRefs, ...uniqueHistoryRefs];
|
||||
|
||||
if (allRefs.length === 0) {
|
||||
return {
|
||||
images: params.existingImages ?? [],
|
||||
historyImagesByIndex: new Map(),
|
||||
detectedRefs: [],
|
||||
loadedCount: 0,
|
||||
skippedCount: 0,
|
||||
};
|
||||
}
|
||||
|
||||
log.debug(
|
||||
`Native image: detected ${allRefs.length} image refs (${promptRefs.length} in prompt, ${uniqueHistoryRefs.length} in history)`,
|
||||
);
|
||||
|
||||
// Load images for current prompt
|
||||
const promptImages: ImageContent[] = [...(params.existingImages ?? [])];
|
||||
// Load images for history, grouped by message index
|
||||
const historyImagesByIndex = new Map<number, ImageContent[]>();
|
||||
|
||||
let loadedCount = 0;
|
||||
let skippedCount = 0;
|
||||
|
||||
for (const ref of allRefs) {
|
||||
const image = await loadImageFromRef(ref, params.workspaceDir, {
|
||||
maxBytes: params.maxBytes,
|
||||
sandboxRoot: params.sandboxRoot,
|
||||
});
|
||||
if (image) {
|
||||
if (ref.messageIndex !== undefined) {
|
||||
// History image - add to the appropriate message index
|
||||
const existing = historyImagesByIndex.get(ref.messageIndex);
|
||||
if (existing) {
|
||||
existing.push(image);
|
||||
} else {
|
||||
historyImagesByIndex.set(ref.messageIndex, [image]);
|
||||
}
|
||||
} else {
|
||||
// Current prompt image
|
||||
promptImages.push(image);
|
||||
}
|
||||
loadedCount++;
|
||||
log.debug(`Native image: loaded ${ref.type} ${ref.resolved}`);
|
||||
} else {
|
||||
skippedCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
images: promptImages,
|
||||
historyImagesByIndex,
|
||||
detectedRefs: allRefs,
|
||||
loadedCount,
|
||||
skippedCount,
|
||||
};
|
||||
}
|
||||
@@ -127,6 +127,8 @@ export function createClawdbotCodingTools(options?: {
|
||||
replyToMode?: "off" | "first" | "all";
|
||||
/** Mutable ref to track if a reply was sent (for "first" mode). */
|
||||
hasRepliedRef?: { value: boolean };
|
||||
/** If true, the model has native vision capability */
|
||||
modelHasVision?: boolean;
|
||||
}): AnyAgentTool[] {
|
||||
const execToolName = "exec";
|
||||
const sandbox = options?.sandbox?.enabled ? options.sandbox : undefined;
|
||||
@@ -280,6 +282,7 @@ export function createClawdbotCodingTools(options?: {
|
||||
currentThreadTs: options?.currentThreadTs,
|
||||
replyToMode: options?.replyToMode,
|
||||
hasRepliedRef: options?.hasRepliedRef,
|
||||
modelHasVision: options?.modelHasVision,
|
||||
}),
|
||||
];
|
||||
const pluginGroups = buildPluginToolGroups({
|
||||
|
||||
@@ -102,7 +102,11 @@ describe("image tool implicit imageModel config", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("disables image tool when primary model already supports images", async () => {
|
||||
it("keeps image tool available when primary model supports images (for explicit requests)", async () => {
|
||||
// When the primary model supports images, we still keep the tool available
|
||||
// because images are auto-injected into prompts. The tool description is
|
||||
// adjusted via modelHasVision to discourage redundant usage.
|
||||
vi.stubEnv("OPENAI_API_KEY", "test-key");
|
||||
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-"));
|
||||
const cfg: ClawdbotConfig = {
|
||||
agents: {
|
||||
@@ -119,8 +123,13 @@ describe("image tool implicit imageModel config", () => {
|
||||
},
|
||||
},
|
||||
};
|
||||
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toBeNull();
|
||||
expect(createImageTool({ config: cfg, agentDir })).toBeNull();
|
||||
// Tool should still be available for explicit image analysis requests
|
||||
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toEqual({
|
||||
primary: "openai/gpt-5-mini",
|
||||
});
|
||||
const tool = createImageTool({ config: cfg, agentDir, modelHasVision: true });
|
||||
expect(tool).not.toBeNull();
|
||||
expect(tool?.description).toContain("Only use this tool when the image was NOT already provided");
|
||||
});
|
||||
|
||||
it("sandboxes image paths like the read tool", async () => {
|
||||
|
||||
@@ -145,11 +145,10 @@ export function resolveImageModelConfigForTool(params: {
|
||||
cfg?: ClawdbotConfig;
|
||||
agentDir: string;
|
||||
}): ImageModelConfig | null {
|
||||
const primarySupportsImages = resolvePrimaryModelSupportsImages({
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
if (primarySupportsImages === true) return null;
|
||||
// Note: We intentionally do NOT gate based on primarySupportsImages here.
|
||||
// Even when the primary model supports images, we keep the tool available
|
||||
// because images are auto-injected into prompts (see attempt.ts detectAndLoadPromptImages).
|
||||
// The tool description is adjusted via modelHasVision to discourage redundant usage.
|
||||
const explicit = coerceImageModelConfig(params.cfg);
|
||||
if (explicit.primary?.trim() || (explicit.fallbacks?.length ?? 0) > 0) {
|
||||
return explicit;
|
||||
@@ -368,6 +367,8 @@ export function createImageTool(options?: {
|
||||
config?: ClawdbotConfig;
|
||||
agentDir?: string;
|
||||
sandboxRoot?: string;
|
||||
/** If true, the model has native vision capability and images in the prompt are auto-injected */
|
||||
modelHasVision?: boolean;
|
||||
}): AnyAgentTool | null {
|
||||
const agentDir = options?.agentDir?.trim();
|
||||
if (!agentDir) {
|
||||
@@ -382,11 +383,17 @@ export function createImageTool(options?: {
|
||||
agentDir,
|
||||
});
|
||||
if (!imageModelConfig) return null;
|
||||
|
||||
// If model has native vision, images in the prompt are auto-injected
|
||||
// so this tool is only needed when image wasn't provided in the prompt
|
||||
const description = options?.modelHasVision
|
||||
? "Analyze an image with a vision model. Only use this tool when the image was NOT already provided in the user's message. Images mentioned in the prompt are automatically visible to you."
|
||||
: "Analyze an image with the configured image model (agents.defaults.imageModel). Provide a prompt and image path or URL.";
|
||||
|
||||
return {
|
||||
label: "Image",
|
||||
name: "image",
|
||||
description:
|
||||
"Analyze an image with the configured image model (agents.defaults.imageModel). Provide a prompt and image path or URL.",
|
||||
description,
|
||||
parameters: Type.Object({
|
||||
prompt: Type.Optional(Type.String()),
|
||||
image: Type.String(),
|
||||
|
||||
@@ -307,9 +307,16 @@ export async function monitorIMessageProvider(opts: MonitorIMessageOpts = {}): P
|
||||
const mentionRegexes = buildMentionRegexes(cfg, route.agentId);
|
||||
const messageText = (message.text ?? "").trim();
|
||||
const attachments = includeAttachments ? (message.attachments ?? []) : [];
|
||||
const firstAttachment = attachments?.find((entry) => entry?.original_path && !entry?.missing);
|
||||
// Filter to valid attachments with paths
|
||||
const validAttachments = attachments.filter(
|
||||
(entry) => entry?.original_path && !entry?.missing,
|
||||
);
|
||||
const firstAttachment = validAttachments[0];
|
||||
const mediaPath = firstAttachment?.original_path ?? undefined;
|
||||
const mediaType = firstAttachment?.mime_type ?? undefined;
|
||||
// Build arrays for all attachments (for multi-image support)
|
||||
const mediaPaths = validAttachments.map((a) => a.original_path).filter(Boolean) as string[];
|
||||
const mediaTypes = validAttachments.map((a) => a.mime_type ?? undefined);
|
||||
const kind = mediaKindFromMime(mediaType ?? undefined);
|
||||
const placeholder = kind ? `<media:${kind}>` : attachments?.length ? "<media:attachment>" : "";
|
||||
const bodyText = messageText || placeholder;
|
||||
@@ -445,6 +452,9 @@ export async function monitorIMessageProvider(opts: MonitorIMessageOpts = {}): P
|
||||
MediaPath: mediaPath,
|
||||
MediaType: mediaType,
|
||||
MediaUrl: mediaPath,
|
||||
MediaPaths: mediaPaths.length > 0 ? mediaPaths : undefined,
|
||||
MediaTypes: mediaTypes.length > 0 ? mediaTypes : undefined,
|
||||
MediaUrls: mediaPaths.length > 0 ? mediaPaths : undefined,
|
||||
MediaRemoteHost: remoteHost,
|
||||
WasMentioned: effectiveWasMentioned,
|
||||
CommandAuthorized: commandAuthorized,
|
||||
|
||||
@@ -28,6 +28,98 @@ async function loadSharp(): Promise<(buffer: Buffer) => ReturnType<Sharp>> {
|
||||
return (buffer) => sharp(buffer, { failOnError: false });
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads EXIF orientation from JPEG buffer.
|
||||
* Returns orientation value 1-8, or null if not found/not JPEG.
|
||||
*
|
||||
* EXIF orientation values:
|
||||
* 1 = Normal, 2 = Flip H, 3 = Rotate 180, 4 = Flip V,
|
||||
* 5 = Rotate 270 CW + Flip H, 6 = Rotate 90 CW, 7 = Rotate 90 CW + Flip H, 8 = Rotate 270 CW
|
||||
*/
|
||||
function readJpegExifOrientation(buffer: Buffer): number | null {
|
||||
// Check JPEG magic bytes
|
||||
if (buffer.length < 2 || buffer[0] !== 0xff || buffer[1] !== 0xd8) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let offset = 2;
|
||||
while (offset < buffer.length - 4) {
|
||||
// Look for marker
|
||||
if (buffer[offset] !== 0xff) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const marker = buffer[offset + 1];
|
||||
// Skip padding FF bytes
|
||||
if (marker === 0xff) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// APP1 marker (EXIF)
|
||||
if (marker === 0xe1) {
|
||||
const segmentLength = buffer.readUInt16BE(offset + 2);
|
||||
const exifStart = offset + 4;
|
||||
|
||||
// Check for "Exif\0\0" header
|
||||
if (
|
||||
buffer.length > exifStart + 6 &&
|
||||
buffer.toString("ascii", exifStart, exifStart + 4) === "Exif" &&
|
||||
buffer[exifStart + 4] === 0 &&
|
||||
buffer[exifStart + 5] === 0
|
||||
) {
|
||||
const tiffStart = exifStart + 6;
|
||||
if (buffer.length < tiffStart + 8) return null;
|
||||
|
||||
// Check byte order (II = little-endian, MM = big-endian)
|
||||
const byteOrder = buffer.toString("ascii", tiffStart, tiffStart + 2);
|
||||
const isLittleEndian = byteOrder === "II";
|
||||
|
||||
const readU16 = (pos: number) =>
|
||||
isLittleEndian ? buffer.readUInt16LE(pos) : buffer.readUInt16BE(pos);
|
||||
const readU32 = (pos: number) =>
|
||||
isLittleEndian ? buffer.readUInt32LE(pos) : buffer.readUInt32BE(pos);
|
||||
|
||||
// Read IFD0 offset
|
||||
const ifd0Offset = readU32(tiffStart + 4);
|
||||
const ifd0Start = tiffStart + ifd0Offset;
|
||||
if (buffer.length < ifd0Start + 2) return null;
|
||||
|
||||
const numEntries = readU16(ifd0Start);
|
||||
for (let i = 0; i < numEntries; i++) {
|
||||
const entryOffset = ifd0Start + 2 + i * 12;
|
||||
if (buffer.length < entryOffset + 12) break;
|
||||
|
||||
const tag = readU16(entryOffset);
|
||||
// Orientation tag = 0x0112
|
||||
if (tag === 0x0112) {
|
||||
const value = readU16(entryOffset + 8);
|
||||
return value >= 1 && value <= 8 ? value : null;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Skip other segments
|
||||
if (marker >= 0xe0 && marker <= 0xef) {
|
||||
const segmentLength = buffer.readUInt16BE(offset + 2);
|
||||
offset += 2 + segmentLength;
|
||||
continue;
|
||||
}
|
||||
|
||||
// SOF, SOS, or other marker - stop searching
|
||||
if (marker === 0xc0 || marker === 0xda) {
|
||||
break;
|
||||
}
|
||||
|
||||
offset++;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function withTempDir<T>(fn: (dir: string) => Promise<T>): Promise<T> {
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-img-"));
|
||||
try {
|
||||
@@ -108,6 +200,81 @@ export async function getImageMetadata(buffer: Buffer): Promise<ImageMetadata |
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies rotation/flip to image buffer using sips based on EXIF orientation.
|
||||
*/
|
||||
async function sipsApplyOrientation(buffer: Buffer, orientation: number): Promise<Buffer> {
|
||||
// Map EXIF orientation to sips operations
|
||||
// sips -r rotates clockwise, -f flips (horizontal/vertical)
|
||||
const ops: string[] = [];
|
||||
switch (orientation) {
|
||||
case 2: // Flip horizontal
|
||||
ops.push("-f", "horizontal");
|
||||
break;
|
||||
case 3: // Rotate 180
|
||||
ops.push("-r", "180");
|
||||
break;
|
||||
case 4: // Flip vertical
|
||||
ops.push("-f", "vertical");
|
||||
break;
|
||||
case 5: // Rotate 270 CW + flip horizontal
|
||||
ops.push("-r", "270", "-f", "horizontal");
|
||||
break;
|
||||
case 6: // Rotate 90 CW
|
||||
ops.push("-r", "90");
|
||||
break;
|
||||
case 7: // Rotate 90 CW + flip horizontal
|
||||
ops.push("-r", "90", "-f", "horizontal");
|
||||
break;
|
||||
case 8: // Rotate 270 CW
|
||||
ops.push("-r", "270");
|
||||
break;
|
||||
default:
|
||||
// Orientation 1 or unknown - no change needed
|
||||
return buffer;
|
||||
}
|
||||
|
||||
return await withTempDir(async (dir) => {
|
||||
const input = path.join(dir, "in.jpg");
|
||||
const output = path.join(dir, "out.jpg");
|
||||
await fs.writeFile(input, buffer);
|
||||
await runExec(
|
||||
"/usr/bin/sips",
|
||||
[...ops, input, "--out", output],
|
||||
{ timeoutMs: 20_000, maxBuffer: 1024 * 1024 },
|
||||
);
|
||||
return await fs.readFile(output);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes EXIF orientation in an image buffer.
|
||||
* Returns the buffer with correct pixel orientation (rotated if needed).
|
||||
* Falls back to original buffer if normalization fails.
|
||||
*/
|
||||
export async function normalizeExifOrientation(buffer: Buffer): Promise<Buffer> {
|
||||
if (prefersSips()) {
|
||||
try {
|
||||
const orientation = readJpegExifOrientation(buffer);
|
||||
if (!orientation || orientation === 1) {
|
||||
return buffer; // No rotation needed
|
||||
}
|
||||
return await sipsApplyOrientation(buffer, orientation);
|
||||
} catch {
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const sharp = await loadSharp();
|
||||
// .rotate() with no args auto-rotates based on EXIF orientation
|
||||
return await sharp(buffer).rotate().toBuffer();
|
||||
} catch {
|
||||
// Sharp not available or failed - return original buffer
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
|
||||
export async function resizeToJpeg(params: {
|
||||
buffer: Buffer;
|
||||
maxSide: number;
|
||||
@@ -115,14 +282,17 @@ export async function resizeToJpeg(params: {
|
||||
withoutEnlargement?: boolean;
|
||||
}): Promise<Buffer> {
|
||||
if (prefersSips()) {
|
||||
// Normalize EXIF orientation BEFORE resizing (sips resize doesn't auto-rotate)
|
||||
const normalized = await normalizeExifOrientationSips(params.buffer);
|
||||
|
||||
// Avoid enlarging by checking dimensions first (sips has no withoutEnlargement flag).
|
||||
if (params.withoutEnlargement !== false) {
|
||||
const meta = await getImageMetadata(params.buffer);
|
||||
const meta = await getImageMetadata(normalized);
|
||||
if (meta) {
|
||||
const maxDim = Math.max(meta.width, meta.height);
|
||||
if (maxDim > 0 && maxDim <= params.maxSide) {
|
||||
return await sipsResizeToJpeg({
|
||||
buffer: params.buffer,
|
||||
buffer: normalized,
|
||||
maxSide: maxDim,
|
||||
quality: params.quality,
|
||||
});
|
||||
@@ -130,14 +300,16 @@ export async function resizeToJpeg(params: {
|
||||
}
|
||||
}
|
||||
return await sipsResizeToJpeg({
|
||||
buffer: params.buffer,
|
||||
buffer: normalized,
|
||||
maxSide: params.maxSide,
|
||||
quality: params.quality,
|
||||
});
|
||||
}
|
||||
|
||||
const sharp = await loadSharp();
|
||||
// Use .rotate() BEFORE .resize() to auto-rotate based on EXIF orientation
|
||||
return await sharp(params.buffer)
|
||||
.rotate() // Auto-rotate based on EXIF before resizing
|
||||
.resize({
|
||||
width: params.maxSide,
|
||||
height: params.maxSide,
|
||||
@@ -147,3 +319,19 @@ export async function resizeToJpeg(params: {
|
||||
.jpeg({ quality: params.quality, mozjpeg: true })
|
||||
.toBuffer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal sips-only EXIF normalization (no sharp fallback).
|
||||
* Used by resizeToJpeg to normalize before sips resize.
|
||||
*/
|
||||
async function normalizeExifOrientationSips(buffer: Buffer): Promise<Buffer> {
|
||||
try {
|
||||
const orientation = readJpegExifOrientation(buffer);
|
||||
if (!orientation || orientation === 1) {
|
||||
return buffer;
|
||||
}
|
||||
return await sipsApplyOrientation(buffer, orientation);
|
||||
} catch {
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user