From 995f5959af1685e4fb1cbc7e42a4dcde179c54c6 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 5 Jan 2026 06:18:11 +0100 Subject: [PATCH] fix: stage sandbox media for inbound attachments --- CHANGELOG.md | 1 + src/agents/clawdbot-tools.sessions.test.ts | 24 ++++--- src/agents/sandbox.ts | 30 ++++++++ src/auto-reply/reply.triggers.test.ts | 79 +++++++++++++++++++++- src/auto-reply/reply.ts | 77 ++++++++++++++++++++- src/browser/profiles-service.test.ts | 12 ++-- src/browser/server.test.ts | 34 ++++++---- src/commands/health.snapshot.test.ts | 10 ++- src/commands/send.test.ts | 10 ++- src/commands/sessions.test.ts | 14 ++-- src/commands/status.test.ts | 10 ++- src/gateway/call.test.ts | 12 ++-- src/imessage/monitor.test.ts | 10 ++- src/imessage/send.test.ts | 10 ++- src/telegram/bot.media.test.ts | 10 ++- src/telegram/bot.test.ts | 10 ++- src/web/inbound.media.test.ts | 28 ++++---- src/web/monitor-inbox.test.ts | 10 ++- src/web/test-helpers.ts | 18 +++-- 19 files changed, 326 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7837582f..1bce0604c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - macOS: local gateway now connects via tailnet IP when bind mode is `tailnet`/`auto`. - macOS: Settings now use a sidebar layout to avoid toolbar overflow in Connections. - macOS: drop deprecated `afterMs` from agent wait params to match gateway schema. +- Sandbox: copy inbound media into sandbox workspaces so agent tools can read attachments. ### Maintenance - Deps: bump pi-* stack, Slack SDK, discord-api-types, file-type, zod, and Biome. diff --git a/src/agents/clawdbot-tools.sessions.test.ts b/src/agents/clawdbot-tools.sessions.test.ts index 5a04986a9..10b6b5b34 100644 --- a/src/agents/clawdbot-tools.sessions.test.ts +++ b/src/agents/clawdbot-tools.sessions.test.ts @@ -5,16 +5,20 @@ vi.mock("../gateway/call.js", () => ({ callGateway: (opts: unknown) => callGatewayMock(opts), })); -vi.mock("../config/config.js", () => ({ - loadConfig: () => ({ - session: { - mainKey: "main", - scope: "per-sender", - agentToAgent: { maxPingPongTurns: 2 }, - }, - }), - resolveGatewayPort: () => 18789, -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => ({ + session: { + mainKey: "main", + scope: "per-sender", + agentToAgent: { maxPingPongTurns: 2 }, + }, + }), + resolveGatewayPort: () => 18789, + }; +}); import { createClawdbotTools } from "./clawdbot-tools.js"; diff --git a/src/agents/sandbox.ts b/src/agents/sandbox.ts index 0eb6a8471..6166f3349 100644 --- a/src/agents/sandbox.ts +++ b/src/agents/sandbox.ts @@ -99,6 +99,11 @@ export type SandboxContext = { browser?: SandboxBrowserContext; }; +export type SandboxWorkspaceInfo = { + workspaceDir: string; + containerWorkdir: string; +}; + const DEFAULT_SANDBOX_WORKSPACE_ROOT = path.join( os.homedir(), ".clawdbot", @@ -866,3 +871,28 @@ export async function resolveSandboxContext(params: { browser: browser ?? undefined, }; } + +export async function ensureSandboxWorkspaceForSession(params: { + config?: ClawdbotConfig; + sessionKey?: string; + workspaceDir?: string; +}): Promise { + const rawSessionKey = params.sessionKey?.trim(); + if (!rawSessionKey) return null; + const cfg = defaultSandboxConfig(params.config); + const mainKey = params.config?.session?.mainKey?.trim() || "main"; + if (!shouldSandboxSession(cfg, rawSessionKey, mainKey)) return null; + + const workspaceRoot = resolveUserPath(cfg.workspaceRoot); + const workspaceDir = cfg.perSession + ? resolveSandboxWorkspaceDir(workspaceRoot, rawSessionKey) + : workspaceRoot; + const seedWorkspace = + params.workspaceDir?.trim() || DEFAULT_AGENT_WORKSPACE_DIR; + await ensureSandboxWorkspace(workspaceDir, seedWorkspace); + + return { + workspaceDir, + containerWorkdir: cfg.docker.workdir, + }; +} diff --git a/src/auto-reply/reply.triggers.test.ts b/src/auto-reply/reply.triggers.test.ts index 5d235deb8..3f68b62e4 100644 --- a/src/auto-reply/reply.triggers.test.ts +++ b/src/auto-reply/reply.triggers.test.ts @@ -1,6 +1,6 @@ import fs from "node:fs/promises"; import { tmpdir } from "node:os"; -import { join } from "node:path"; +import { basename, join } from "node:path"; import { afterEach, describe, expect, it, vi } from "vitest"; vi.mock("../agents/pi-embedded.js", () => ({ @@ -14,6 +14,8 @@ vi.mock("../agents/pi-embedded.js", () => ({ })); import { runEmbeddedPiAgent } from "../agents/pi-embedded.js"; +import { ensureSandboxWorkspaceForSession } from "../agents/sandbox.js"; +import { resolveSessionKey } from "../config/sessions.js"; import { getReplyFromConfig } from "./reply.js"; import { HEARTBEAT_TOKEN } from "./tokens.js"; @@ -712,6 +714,81 @@ describe("trigger handling", () => { expect(runEmbeddedPiAgent).toHaveBeenCalledOnce(); }); }); + + it("stages inbound media into the sandbox workspace", async () => { + await withTempHome(async (home) => { + const inboundDir = join(home, ".clawdbot", "media", "inbound"); + await fs.mkdir(inboundDir, { recursive: true }); + const mediaPath = join(inboundDir, "photo.jpg"); + await fs.writeFile(mediaPath, "test"); + + vi.mocked(runEmbeddedPiAgent).mockResolvedValue({ + payloads: [{ text: "ok" }], + meta: { + durationMs: 1, + agentMeta: { sessionId: "s", provider: "p", model: "m" }, + }, + }); + + const cfg = { + agent: { + model: "anthropic/claude-opus-4-5", + workspace: join(home, "clawd"), + sandbox: { + mode: "non-main" as const, + workspaceRoot: join(home, "sandboxes"), + }, + }, + whatsapp: { + allowFrom: ["*"], + }, + session: { + store: join(home, "sessions.json"), + }, + }; + + const ctx = { + Body: "hi", + From: "group:whatsapp:demo", + To: "+2000", + ChatType: "group" as const, + Surface: "whatsapp" as const, + MediaPath: mediaPath, + MediaType: "image/jpeg", + MediaUrl: mediaPath, + }; + + const res = await getReplyFromConfig(ctx, {}, cfg); + const text = Array.isArray(res) ? res[0]?.text : res?.text; + expect(text).toBe("ok"); + expect(runEmbeddedPiAgent).toHaveBeenCalledOnce(); + + const prompt = + vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0]?.prompt ?? ""; + const stagedPath = `media/inbound/${basename(mediaPath)}`; + expect(prompt).toContain(stagedPath); + expect(prompt).not.toContain(mediaPath); + + const sessionKey = resolveSessionKey( + cfg.session?.scope ?? "per-sender", + ctx, + cfg.session?.mainKey, + ); + const sandbox = await ensureSandboxWorkspaceForSession({ + config: cfg, + sessionKey, + workspaceDir: cfg.agent.workspace, + }); + expect(sandbox).not.toBeNull(); + const stagedFullPath = join( + sandbox!.workspaceDir, + "media", + "inbound", + basename(mediaPath), + ); + await expect(fs.stat(stagedFullPath)).resolves.toBeTruthy(); + }); + }); }); describe("group intro prompts", () => { diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index 0580088ff..a1b083302 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -1,4 +1,7 @@ import crypto from "node:crypto"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; import { resolveModelRefFromString } from "../agents/model-selection.js"; import { @@ -7,6 +10,7 @@ import { isEmbeddedPiRunStreaming, resolveEmbeddedSessionLane, } from "../agents/pi-embedded.js"; +import { ensureSandboxWorkspaceForSession } from "../agents/sandbox.js"; import { DEFAULT_AGENT_WORKSPACE_DIR, ensureAgentWorkspace, @@ -49,7 +53,7 @@ import { prependSystemEvents, } from "./reply/session-updates.js"; import { createTypingController } from "./reply/typing.js"; -import type { MsgContext } from "./templating.js"; +import type { MsgContext, TemplateContext } from "./templating.js"; import { type ElevatedLevel, normalizeThinkLevel, @@ -478,6 +482,15 @@ export async function getReplyFromConfig( typing.cleanup(); return commandResult.reply; } + + await stageSandboxMedia({ + ctx, + sessionCtx, + cfg, + sessionKey, + workspaceDir, + }); + const isFirstTurnInSession = isNewSession || !systemSent; const isGroupChat = sessionCtx.ChatType === "group"; const wasMentioned = ctx.WasMentioned === true; @@ -681,3 +694,65 @@ export async function getReplyFromConfig( shouldInjectGroupIntro, }); } + +async function stageSandboxMedia(params: { + ctx: MsgContext; + sessionCtx: TemplateContext; + cfg: ClawdbotConfig; + sessionKey?: string; + workspaceDir: string; +}) { + const { ctx, sessionCtx, cfg, sessionKey, workspaceDir } = params; + const rawPath = ctx.MediaPath?.trim(); + if (!rawPath || !sessionKey) return; + + const sandbox = await ensureSandboxWorkspaceForSession({ + config: cfg, + sessionKey, + workspaceDir, + }); + if (!sandbox) return; + + let source = rawPath; + if (source.startsWith("file://")) { + try { + source = fileURLToPath(source); + } catch { + return; + } + } + if (!path.isAbsolute(source)) return; + + const originalMediaPath = ctx.MediaPath; + const originalMediaUrl = ctx.MediaUrl; + + try { + const fileName = path.basename(source); + if (!fileName) return; + const destDir = path.join(sandbox.workspaceDir, "media", "inbound"); + await fs.mkdir(destDir, { recursive: true }); + const dest = path.join(destDir, fileName); + await fs.copyFile(source, dest); + + const relative = path.posix.join("media", "inbound", fileName); + ctx.MediaPath = relative; + sessionCtx.MediaPath = relative; + + if (originalMediaUrl) { + let normalizedUrl = originalMediaUrl; + if (normalizedUrl.startsWith("file://")) { + try { + normalizedUrl = fileURLToPath(normalizedUrl); + } catch { + normalizedUrl = originalMediaUrl; + } + } + if (normalizedUrl === originalMediaPath || normalizedUrl === source) { + ctx.MediaUrl = relative; + sessionCtx.MediaUrl = relative; + } + } + } catch (err) { + logVerbose(`Failed to stage inbound media for sandbox: ${String(err)}`); + } +} diff --git a/src/browser/profiles-service.test.ts b/src/browser/profiles-service.test.ts index b5d8f3a87..4fa80a8cb 100644 --- a/src/browser/profiles-service.test.ts +++ b/src/browser/profiles-service.test.ts @@ -10,10 +10,14 @@ import type { BrowserServerState, } from "./server-context.js"; -vi.mock("../config/config.js", () => ({ - loadConfig: vi.fn(), - writeConfigFile: vi.fn(async () => {}), -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: vi.fn(), + writeConfigFile: vi.fn(async () => {}), + }; +}); vi.mock("./trash.js", () => ({ movePathToTrash: vi.fn(async (targetPath: string) => targetPath), diff --git a/src/browser/server.test.ts b/src/browser/server.test.ts index 26b01acc2..d1318771b 100644 --- a/src/browser/server.test.ts +++ b/src/browser/server.test.ts @@ -64,22 +64,26 @@ function makeProc(pid = 123) { const proc = makeProc(); -vi.mock("../config/config.js", () => ({ - loadConfig: () => ({ - browser: { - enabled: true, - controlUrl: `http://127.0.0.1:${testPort}`, - color: "#FF4500", - attachOnly: cfgAttachOnly, - headless: true, - defaultProfile: "clawd", - profiles: { - clawd: { cdpPort: testPort + 1, color: "#FF4500" }, +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => ({ + browser: { + enabled: true, + controlUrl: `http://127.0.0.1:${testPort}`, + color: "#FF4500", + attachOnly: cfgAttachOnly, + headless: true, + defaultProfile: "clawd", + profiles: { + clawd: { cdpPort: testPort + 1, color: "#FF4500" }, + }, }, - }, - }), - writeConfigFile: vi.fn(async () => {}), -})); + }), + writeConfigFile: vi.fn(async () => {}), + }; +}); const launchCalls = vi.hoisted(() => [] as Array<{ port: number }>); vi.mock("./chrome.js", () => ({ diff --git a/src/commands/health.snapshot.test.ts b/src/commands/health.snapshot.test.ts index 1a2b01a39..9d7d4dae6 100644 --- a/src/commands/health.snapshot.test.ts +++ b/src/commands/health.snapshot.test.ts @@ -10,9 +10,13 @@ import { getHealthSnapshot } from "./health.js"; let testConfig: Record = {}; let testStore: Record = {}; -vi.mock("../config/config.js", () => ({ - loadConfig: () => testConfig, -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => testConfig, + }; +}); vi.mock("../config/sessions.js", () => ({ resolveStorePath: () => "/tmp/sessions.json", diff --git a/src/commands/send.test.ts b/src/commands/send.test.ts index 5fd946a4b..03ced5bf2 100644 --- a/src/commands/send.test.ts +++ b/src/commands/send.test.ts @@ -5,9 +5,13 @@ import type { RuntimeEnv } from "../runtime.js"; import { sendCommand } from "./send.js"; let testConfig: Record = {}; -vi.mock("../config/config.js", () => ({ - loadConfig: () => testConfig, -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => testConfig, + }; +}); const callGatewayMock = vi.fn(); vi.mock("../gateway/call.js", () => ({ diff --git a/src/commands/sessions.test.ts b/src/commands/sessions.test.ts index 1ee0f8e68..f6ef8c626 100644 --- a/src/commands/sessions.test.ts +++ b/src/commands/sessions.test.ts @@ -7,11 +7,15 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; // Disable colors for deterministic snapshots. process.env.FORCE_COLOR = "0"; -vi.mock("../config/config.js", () => ({ - loadConfig: () => ({ - agent: { model: "pi:opus", contextTokens: 32000 }, - }), -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => ({ + agent: { model: "pi:opus", contextTokens: 32000 }, + }), + }; +}); import { sessionsCommand } from "./sessions.js"; diff --git a/src/commands/status.test.ts b/src/commands/status.test.ts index 83b784a36..7b80ef758 100644 --- a/src/commands/status.test.ts +++ b/src/commands/status.test.ts @@ -31,9 +31,13 @@ vi.mock("../web/session.js", () => ({ readWebSelfId: mocks.readWebSelfId, logWebSelfId: mocks.logWebSelfId, })); -vi.mock("../config/config.js", () => ({ - loadConfig: () => ({ session: {} }), -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => ({ session: {} }), + }; +}); import { statusCommand } from "./status.js"; diff --git a/src/gateway/call.test.ts b/src/gateway/call.test.ts index 640acf548..e01260a9a 100644 --- a/src/gateway/call.test.ts +++ b/src/gateway/call.test.ts @@ -9,10 +9,14 @@ let lastClientOptions: { onHelloOk?: () => void | Promise; } | null = null; -vi.mock("../config/config.js", () => ({ - loadConfig, - resolveGatewayPort, -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig, + resolveGatewayPort, + }; +}); vi.mock("../infra/tailnet.js", () => ({ pickPrimaryTailnetIPv4, diff --git a/src/imessage/monitor.test.ts b/src/imessage/monitor.test.ts index 433fbc239..070afc4ca 100644 --- a/src/imessage/monitor.test.ts +++ b/src/imessage/monitor.test.ts @@ -14,9 +14,13 @@ let notificationHandler: | undefined; let closeResolve: (() => void) | undefined; -vi.mock("../config/config.js", () => ({ - loadConfig: () => config, -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => config, + }; +}); vi.mock("../auto-reply/reply.js", () => ({ getReplyFromConfig: (...args: unknown[]) => replyMock(...args), diff --git a/src/imessage/send.test.ts b/src/imessage/send.test.ts index 2984345b0..3fa040ff5 100644 --- a/src/imessage/send.test.ts +++ b/src/imessage/send.test.ts @@ -5,9 +5,13 @@ import { sendMessageIMessage } from "./send.js"; const requestMock = vi.fn(); const stopMock = vi.fn(); -vi.mock("../config/config.js", () => ({ - loadConfig: () => ({}), -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => ({}), + }; +}); vi.mock("./client.js", () => ({ createIMessageRpcClient: vi.fn().mockResolvedValue({ diff --git a/src/telegram/bot.media.test.ts b/src/telegram/bot.media.test.ts index cf474e08e..0157d0831 100644 --- a/src/telegram/bot.media.test.ts +++ b/src/telegram/bot.media.test.ts @@ -31,9 +31,13 @@ vi.mock("@grammyjs/transformer-throttler", () => ({ apiThrottler: () => throttlerSpy(), })); -vi.mock("../config/config.js", () => ({ - loadConfig: () => ({}), -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => ({}), + }; +}); vi.mock("../auto-reply/reply.js", () => { const replySpy = vi.fn(async (_ctx, opts) => { diff --git a/src/telegram/bot.test.ts b/src/telegram/bot.test.ts index b113c892b..364c90188 100644 --- a/src/telegram/bot.test.ts +++ b/src/telegram/bot.test.ts @@ -5,9 +5,13 @@ import { createTelegramBot } from "./bot.js"; const { loadConfig } = vi.hoisted(() => ({ loadConfig: vi.fn(() => ({})), })); -vi.mock("../config/config.js", () => ({ - loadConfig, -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig, + }; +}); const useSpy = vi.fn(); const onSpy = vi.fn(); diff --git a/src/web/inbound.media.test.ts b/src/web/inbound.media.test.ts index 031bb114d..a1cfb9a6b 100644 --- a/src/web/inbound.media.test.ts +++ b/src/web/inbound.media.test.ts @@ -5,18 +5,22 @@ import path from "node:path"; import { afterAll, beforeAll, describe, expect, it, vi } from "vitest"; -vi.mock("../config/config.js", () => ({ - loadConfig: vi.fn().mockReturnValue({ - whatsapp: { - allowFrom: ["*"], // Allow all in tests - }, - messages: { - messagePrefix: undefined, - responsePrefix: undefined, - timestampPrefix: false, - }, - }), -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: vi.fn().mockReturnValue({ + whatsapp: { + allowFrom: ["*"], // Allow all in tests + }, + messages: { + messagePrefix: undefined, + responsePrefix: undefined, + timestampPrefix: false, + }, + }), + }; +}); const HOME = path.join( os.tmpdir(), diff --git a/src/web/monitor-inbox.test.ts b/src/web/monitor-inbox.test.ts index 9a47fa78a..ab8aa4525 100644 --- a/src/web/monitor-inbox.test.ts +++ b/src/web/monitor-inbox.test.ts @@ -20,9 +20,13 @@ const mockLoadConfig = vi.fn().mockReturnValue({ }, }); -vi.mock("../config/config.js", () => ({ - loadConfig: () => mockLoadConfig(), -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => mockLoadConfig(), + }; +}); vi.mock("./session.js", () => { const { EventEmitter } = require("node:events"); diff --git a/src/web/test-helpers.ts b/src/web/test-helpers.ts index ed01b0f8e..968d7649c 100644 --- a/src/web/test-helpers.ts +++ b/src/web/test-helpers.ts @@ -31,13 +31,17 @@ export function resetLoadConfigMock() { (globalThis as Record)[CONFIG_KEY] = () => DEFAULT_CONFIG; } -vi.mock("../config/config.js", () => ({ - loadConfig: () => { - const getter = (globalThis as Record)[CONFIG_KEY]; - if (typeof getter === "function") return getter(); - return DEFAULT_CONFIG; - }, -})); +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: () => { + const getter = (globalThis as Record)[CONFIG_KEY]; + if (typeof getter === "function") return getter(); + return DEFAULT_CONFIG; + }, + }; +}); vi.mock("../media/store.js", () => ({ saveMediaBuffer: vi