fix: stage sandbox media for inbound attachments

This commit is contained in:
Peter Steinberger
2026-01-05 06:18:11 +01:00
parent a7d33c06f9
commit 995f5959af
19 changed files with 326 additions and 83 deletions

View File

@@ -1,6 +1,6 @@
import fs from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { basename, join } from "node:path";
import { afterEach, describe, expect, it, vi } from "vitest";
vi.mock("../agents/pi-embedded.js", () => ({
@@ -14,6 +14,8 @@ vi.mock("../agents/pi-embedded.js", () => ({
}));
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import { ensureSandboxWorkspaceForSession } from "../agents/sandbox.js";
import { resolveSessionKey } from "../config/sessions.js";
import { getReplyFromConfig } from "./reply.js";
import { HEARTBEAT_TOKEN } from "./tokens.js";
@@ -712,6 +714,81 @@ describe("trigger handling", () => {
expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
});
});
it("stages inbound media into the sandbox workspace", async () => {
await withTempHome(async (home) => {
const inboundDir = join(home, ".clawdbot", "media", "inbound");
await fs.mkdir(inboundDir, { recursive: true });
const mediaPath = join(inboundDir, "photo.jpg");
await fs.writeFile(mediaPath, "test");
vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
payloads: [{ text: "ok" }],
meta: {
durationMs: 1,
agentMeta: { sessionId: "s", provider: "p", model: "m" },
},
});
const cfg = {
agent: {
model: "anthropic/claude-opus-4-5",
workspace: join(home, "clawd"),
sandbox: {
mode: "non-main" as const,
workspaceRoot: join(home, "sandboxes"),
},
},
whatsapp: {
allowFrom: ["*"],
},
session: {
store: join(home, "sessions.json"),
},
};
const ctx = {
Body: "hi",
From: "group:whatsapp:demo",
To: "+2000",
ChatType: "group" as const,
Surface: "whatsapp" as const,
MediaPath: mediaPath,
MediaType: "image/jpeg",
MediaUrl: mediaPath,
};
const res = await getReplyFromConfig(ctx, {}, cfg);
const text = Array.isArray(res) ? res[0]?.text : res?.text;
expect(text).toBe("ok");
expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
const prompt =
vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0]?.prompt ?? "";
const stagedPath = `media/inbound/${basename(mediaPath)}`;
expect(prompt).toContain(stagedPath);
expect(prompt).not.toContain(mediaPath);
const sessionKey = resolveSessionKey(
cfg.session?.scope ?? "per-sender",
ctx,
cfg.session?.mainKey,
);
const sandbox = await ensureSandboxWorkspaceForSession({
config: cfg,
sessionKey,
workspaceDir: cfg.agent.workspace,
});
expect(sandbox).not.toBeNull();
const stagedFullPath = join(
sandbox!.workspaceDir,
"media",
"inbound",
basename(mediaPath),
);
await expect(fs.stat(stagedFullPath)).resolves.toBeTruthy();
});
});
});
describe("group intro prompts", () => {

View File

@@ -1,4 +1,7 @@
import crypto from "node:crypto";
import fs from "node:fs/promises";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { resolveModelRefFromString } from "../agents/model-selection.js";
import {
@@ -7,6 +10,7 @@ import {
isEmbeddedPiRunStreaming,
resolveEmbeddedSessionLane,
} from "../agents/pi-embedded.js";
import { ensureSandboxWorkspaceForSession } from "../agents/sandbox.js";
import {
DEFAULT_AGENT_WORKSPACE_DIR,
ensureAgentWorkspace,
@@ -49,7 +53,7 @@ import {
prependSystemEvents,
} from "./reply/session-updates.js";
import { createTypingController } from "./reply/typing.js";
import type { MsgContext } from "./templating.js";
import type { MsgContext, TemplateContext } from "./templating.js";
import {
type ElevatedLevel,
normalizeThinkLevel,
@@ -478,6 +482,15 @@ export async function getReplyFromConfig(
typing.cleanup();
return commandResult.reply;
}
await stageSandboxMedia({
ctx,
sessionCtx,
cfg,
sessionKey,
workspaceDir,
});
const isFirstTurnInSession = isNewSession || !systemSent;
const isGroupChat = sessionCtx.ChatType === "group";
const wasMentioned = ctx.WasMentioned === true;
@@ -681,3 +694,65 @@ export async function getReplyFromConfig(
shouldInjectGroupIntro,
});
}
async function stageSandboxMedia(params: {
ctx: MsgContext;
sessionCtx: TemplateContext;
cfg: ClawdbotConfig;
sessionKey?: string;
workspaceDir: string;
}) {
const { ctx, sessionCtx, cfg, sessionKey, workspaceDir } = params;
const rawPath = ctx.MediaPath?.trim();
if (!rawPath || !sessionKey) return;
const sandbox = await ensureSandboxWorkspaceForSession({
config: cfg,
sessionKey,
workspaceDir,
});
if (!sandbox) return;
let source = rawPath;
if (source.startsWith("file://")) {
try {
source = fileURLToPath(source);
} catch {
return;
}
}
if (!path.isAbsolute(source)) return;
const originalMediaPath = ctx.MediaPath;
const originalMediaUrl = ctx.MediaUrl;
try {
const fileName = path.basename(source);
if (!fileName) return;
const destDir = path.join(sandbox.workspaceDir, "media", "inbound");
await fs.mkdir(destDir, { recursive: true });
const dest = path.join(destDir, fileName);
await fs.copyFile(source, dest);
const relative = path.posix.join("media", "inbound", fileName);
ctx.MediaPath = relative;
sessionCtx.MediaPath = relative;
if (originalMediaUrl) {
let normalizedUrl = originalMediaUrl;
if (normalizedUrl.startsWith("file://")) {
try {
normalizedUrl = fileURLToPath(normalizedUrl);
} catch {
normalizedUrl = originalMediaUrl;
}
}
if (normalizedUrl === originalMediaPath || normalizedUrl === source) {
ctx.MediaUrl = relative;
sessionCtx.MediaUrl = relative;
}
}
} catch (err) {
logVerbose(`Failed to stage inbound media for sandbox: ${String(err)}`);
}
}