diff --git a/docs/images.md b/docs/images.md new file mode 100644 index 000000000..4ebe3722b --- /dev/null +++ b/docs/images.md @@ -0,0 +1,74 @@ +# Image Support Specification (draft) — 2025-11-25 + +This document defines how `warelay` should handle sending and replying with images across both providers. It is intentionally implementation-ready and keeps the UX consistent with existing CLI patterns and Tailscale Funnel usage. + +## Goals +- Allow sending an image with an optional caption via `warelay send` for both providers. +- Allow auto-replies (Twilio webhook, Twilio poller, Web inbox) to return an image (optionally with text) when configured. +- Keep the “one command at a time” queue intact; media fetch/serve must not block other replies longer than necessary. +- Avoid introducing new external services: reuse the existing Tailscale Funnel port to host media for Twilio. + +## CLI & Config Surface +- `warelay send --media [--message ] [--provider twilio|web]` + - `--media` optional; `--message` remains required for now (caption can be empty string to send only media). + - `--dry-run` prints the resolved payload including hosted URL (twilio) or file path (web). + - `--json` emits `{ provider, to, sid/messageId, mediaUrl, caption }`. +- Config auto-reply (`~/.warelay/warelay.json`): + - Add `inbound.reply.mediaUrl?: string` (templated like `reply.text`). + - Return shape from `getReplyFromConfig` becomes `{ text?: string; mediaUrl?: string }`. + - Both `text` and `mediaUrl` optional; at least one must be present to send a reply. + +## Provider Behavior +### Web (Baileys) +- Input: local file path **or** HTTP(S) URL. +- Flow: load into Buffer (max 5 MB), send via `sock.sendMessage(jid, { image: buffer, caption })`. +- Caption uses `--message` or `reply.text`; if caption is empty, send media-only. +- Logging: non-verbose shows `↩️`/`✅` with caption; verbose includes `(media, B, ms fetch)`. + +### Twilio +- Twilio API requires a public HTTPS `MediaUrl`; it will not accept local paths. +- Hosting strategy: reuse the webhook/Funnel port. + - When `--media` is a local path, copy to temp dir (`~/.warelay/media/`), serve at `/media/` on the existing Express app started for webhook/up, or spin up a short-lived server on demand for `send`. + - `MediaUrl` = `https://.ts.net/media/`. + - Files auto-removed after TTL (default 2 minutes) or after first successful fetch (best-effort). + - Enforce size limit 5 MB; reject early with clear error. +- If `--media` is already an HTTPS URL, pass through unchanged. +- Fallback: if Funnel is not enabled (or host unknown) and a local path is provided, fail with guidance to run `warelay up` (or pass a URL instead). + +## Hosting/Server Details +- Extend `startWebhook` Express app: + - Static media route `/media/:id` reading from temp dir. + - 404/410 if expired or missing. + - Optional `?delete=1` to self-delete after fetch (used by Twilio fetch hook if we detect first hit). +- Temp storage: `~/.warelay/media`; cleaned on startup (remove files older than 15 minutes) and during TTL eviction. +- Security: no directory listing; only UUID file names; CORS open (Twilio fetch); content-type derived from `mime-types` lookup by extension or `content-type` header on download, else `application/octet-stream`. + +## Auto-Reply Pipeline +- `getReplyFromConfig` returns `{ text?, mediaUrl? }`. +- Webhook / Twilio poller: + - If `mediaUrl` present, include `mediaUrl` in Twilio message payload; caption = `text` (may be empty). + - If only `text`, behave as today. +- Web inbox: + - If `mediaUrl` present, fetch/resolve same as send (local path or URL), send via Baileys with caption. + +## Inbound Media to Commands (Claude etc.) +- For completeness: when inbound Twilio/Web messages include media, download to temp file, expose templating variables: + - `{{MediaUrl}}` original URL (Twilio) or pseudo-URL (web). + - `{{MediaPath}}` local temp path written before running the command. +- Size guard: only download if ≤5 MB; else skip and log. + +## Errors & Messaging +- Local path with twilio + Funnel disabled → error: “Twilio media needs a public URL; start `warelay up`/`warelay webhook` with Funnel or pass an https:// URL.” +- File too large (>5 MB) → “Media exceeds 5 MB limit; resize or host elsewhere.” +- Download failure for web provider → “Failed to load media from ; skipping send.” + +## Tests to Add +- Twilio: dry-run shows hosted URL; send payload includes `mediaUrl`; rejects when Funnel host missing. +- Web: local path sends image (mock Baileys buffer assertion). +- Config: zod allows `mediaUrl`, returns combined object; command auto-reply handles `text+media`, `media-only`. +- Media server: serves file, enforces TTL, returns 404 after cleanup. + +## Open Decisions (confirm before coding) +- TTL for temp media (proposal: 2 minutes, cleanup at start + interval). +- One-file-per-send vs. batching: default to one-file-per-send; multi-attach not supported. +- Should `warelay send --provider twilio --media` implicitly start the media server (even if webhook not running), or require `warelay up/webhook` already active? (Proposal: auto-start lightweight server on demand, auto-stop after media is fetched or TTL.) diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index 6c099f92c..f5a19fb71 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -79,12 +79,17 @@ function summarizeClaudeMetadata(payload: unknown): string | undefined { return parts.length ? parts.join(", ") : undefined; } +export type ReplyPayload = { + text?: string; + mediaUrl?: string; +}; + export async function getReplyFromConfig( ctx: MsgContext, opts?: GetReplyOptions, configOverride?: WarelayConfig, commandRunner: typeof runCommandWithTimeout = runCommandWithTimeout, -): Promise { +): Promise { // Choose reply from config: static text or external command stdout. const cfg = configOverride ?? loadConfig(); const reply = cfg.inbound?.reply; @@ -186,7 +191,7 @@ export async function getReplyFromConfig( if (reply.mode === "text" && reply.text) { await onReplyStart(); logVerbose("Using text auto-reply from config"); - return applyTemplate(reply.text, templatingCtx); + return { text: applyTemplate(reply.text, templatingCtx), mediaUrl: reply.mediaUrl }; } if (reply.mode === "command" && reply.command?.length) { @@ -306,7 +311,7 @@ export async function getReplyFromConfig( ); return undefined; } - return trimmed || undefined; + return trimmed ? { text: trimmed, mediaUrl: reply.mediaUrl } : undefined; } catch (err) { const elapsed = Date.now() - started; const anyErr = err as { killed?: boolean; signal?: string }; @@ -356,14 +361,14 @@ export async function autoReplyIfConfigured( MessageSid: message.sid, }; - const replyText = await getReplyFromConfig( + const replyResult = await getReplyFromConfig( ctx, { onReplyStart: () => sendTypingIndicator(client, runtime, message.sid), }, configOverride, ); - if (!replyText) return; + if (!replyResult || (!replyResult.text && !replyResult.mediaUrl)) return; const replyFrom = message.to; const replyTo = message.from; @@ -376,19 +381,26 @@ export async function autoReplyIfConfigured( return; } - logVerbose( - `Auto-replying via Twilio: from ${replyFrom} to ${replyTo}, body length ${replyText.length}`, - ); + if (replyResult.text) { + logVerbose( + `Auto-replying via Twilio: from ${replyFrom} to ${replyTo}, body length ${replyResult.text.length}`, + ); + } else { + logVerbose(`Auto-replying via Twilio: from ${replyFrom} to ${replyTo} (media)`); + } try { await client.messages.create({ from: replyFrom, to: replyTo, - body: replyText, + body: replyResult.text ?? "", + ...(replyResult.mediaUrl ? { mediaUrl: [replyResult.mediaUrl] } : {}), }); if (isVerbose()) { console.log( - info(`↩️ Auto-replied to ${replyTo} (sid ${message.sid ?? "no-sid"})`), + info( + `↩️ Auto-replied to ${replyTo} (sid ${message.sid ?? "no-sid"}${replyResult.mediaUrl ? ", media" : ""})`, + ), ); } } catch (err) { diff --git a/src/cli/deps.ts b/src/cli/deps.ts index b4c911ea4..bc18f4f9e 100644 --- a/src/cli/deps.ts +++ b/src/cli/deps.ts @@ -15,6 +15,7 @@ import { startWebhook } from "../webhook/server.js"; import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; import { info } from "../globals.js"; import { autoReplyIfConfigured } from "../auto-reply/reply.js"; +import { ensureMediaHosted } from "../media/host.js"; export type CliDeps = { sendMessage: typeof sendMessage; @@ -35,6 +36,10 @@ export type CliDeps = { updateWebhook: typeof updateWebhook; handlePortError: typeof handlePortError; monitorWebProvider: typeof monitorWebProvider; + resolveTwilioMediaUrl: ( + source: string, + opts: { serveMedia: boolean; runtime: RuntimeEnv }, + ) => Promise; }; export async function monitorTwilio( @@ -79,6 +84,14 @@ export function createDefaultDeps(): CliDeps { updateWebhook, handlePortError, monitorWebProvider, + resolveTwilioMediaUrl: async (source, { serveMedia, runtime }) => { + if (/^https?:\/\//i.test(source)) return source; + const hosted = await ensureMediaHosted(source, { + startServer: serveMedia, + runtime, + }); + return hosted.url; + }, }; } diff --git a/src/cli/program.ts b/src/cli/program.ts index 72d91e03a..233f1ce03 100644 --- a/src/cli/program.ts +++ b/src/cli/program.ts @@ -56,6 +56,8 @@ export function buildProgram() { "Recipient number in E.164 (e.g. +15551234567)", ) .requiredOption("-m, --message ", "Message body") + .option("--media ", "Attach image (<=5MB). Web: path or URL. Twilio: https URL or local path hosted via webhook/funnel.") + .option("--serve-media", "For Twilio: start a temporary media server if webhook is not running", false) .option("-w, --wait ", "Wait for delivery status (0 to skip)", "20") .option("-p, --poll ", "Polling interval while waiting", "2") .option("--provider ", "Provider: twilio | web", "twilio") diff --git a/src/commands/send.ts b/src/commands/send.ts index 37959fdcb..e1ad32e4d 100644 --- a/src/commands/send.ts +++ b/src/commands/send.ts @@ -12,6 +12,8 @@ export async function sendCommand( provider: Provider; json?: boolean; dryRun?: boolean; + media?: string; + serveMedia?: boolean; }, deps: CliDeps, runtime: RuntimeEnv, @@ -30,20 +32,30 @@ export async function sendCommand( if (opts.provider === "web") { if (opts.dryRun) { runtime.log( - `[dry-run] would send via web -> ${opts.to}: ${opts.message}`, + `[dry-run] would send via web -> ${opts.to}: ${opts.message}${opts.media ? ` (media ${opts.media})` : ""}`, ); return; } if (waitSeconds !== 0) { runtime.log(info("Wait/poll are Twilio-only; ignored for provider=web.")); } - const res = await deps.sendMessageWeb(opts.to, opts.message, { - verbose: false, - }); + const res = await deps.sendMessageWeb( + opts.to, + opts.message, + { + verbose: false, + mediaUrl: opts.media, + }, + ); if (opts.json) { runtime.log( JSON.stringify( - { provider: "web", to: opts.to, messageId: res.messageId }, + { + provider: "web", + to: opts.to, + messageId: res.messageId, + mediaUrl: opts.media ?? null, + }, null, 2, ), @@ -54,16 +66,34 @@ export async function sendCommand( if (opts.dryRun) { runtime.log( - `[dry-run] would send via twilio -> ${opts.to}: ${opts.message}`, + `[dry-run] would send via twilio -> ${opts.to}: ${opts.message}${opts.media ? ` (media ${opts.media})` : ""}`, ); return; } - const result = await deps.sendMessage(opts.to, opts.message, runtime); + let mediaUrl: string | undefined = undefined; + if (opts.media) { + mediaUrl = await deps.resolveTwilioMediaUrl(opts.media, { + serveMedia: Boolean(opts.serveMedia), + runtime, + }); + } + + const result = await deps.sendMessage( + opts.to, + opts.message, + { mediaUrl }, + runtime, + ); if (opts.json) { runtime.log( JSON.stringify( - { provider: "twilio", to: opts.to, sid: result?.sid ?? null }, + { + provider: "twilio", + to: opts.to, + sid: result?.sid ?? null, + mediaUrl: mediaUrl ?? null, + }, null, 2, ), diff --git a/src/config/config.ts b/src/config/config.ts index 1b4b6e50a..e89c376b9 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -29,6 +29,7 @@ export type WarelayConfig = { template?: string; // prepend template string when building command/prompt timeoutSeconds?: number; // optional command timeout; defaults to 600s bodyPrefix?: string; // optional string prepended to Body before templating + mediaUrl?: string; // optional media attachment (path or URL) session?: SessionConfig; claudeOutputFormat?: ClaudeOutputFormat; // when command starts with `claude`, force an output format }; @@ -45,6 +46,7 @@ const ReplySchema = z template: z.string().optional(), timeoutSeconds: z.number().int().positive().optional(), bodyPrefix: z.string().optional(), + mediaUrl: z.string().optional(), session: z .object({ scope: z.union([z.literal("per-sender"), z.literal("global")]).optional(), diff --git a/src/index.core.test.ts b/src/index.core.test.ts index dd024e23a..05a55f22b 100644 --- a/src/index.core.test.ts +++ b/src/index.core.test.ts @@ -86,9 +86,9 @@ describe("config and templating", () => { { onReplyStart }, cfg, ); - expect(result).toBe("Hello whatsapp:+1555 [pfx] hi"); - expect(onReplyStart).toHaveBeenCalled(); - }); + expect(result?.text).toBe("Hello whatsapp:+1555 [pfx] hi"); + expect(onReplyStart).toHaveBeenCalled(); + }); it("getReplyFromConfig runs command and manages session store", async () => { const tmpStore = path.join(os.tmpdir(), `warelay-store-${Date.now()}.json`); @@ -123,7 +123,7 @@ describe("config and templating", () => { cfg, runSpy, ); - expect(first).toBe("cmd output"); + expect(first?.text).toBe("cmd output"); const argvFirst = runSpy.mock.calls[0][0]; expect(argvFirst).toEqual([ "echo", @@ -139,7 +139,7 @@ describe("config and templating", () => { cfg, runSpy, ); - expect(second).toBe("cmd output"); + expect(second?.text).toBe("cmd output"); const argvSecond = runSpy.mock.calls[1][0]; expect(argvSecond[2]).toBe("--resume"); }); @@ -200,15 +200,15 @@ describe("config and templating", () => { }, }; - const result = await index.getReplyFromConfig( - { Body: "hi", From: "+1", To: "+2" }, - undefined, - cfg, - runSpy, - ); + const result = await index.getReplyFromConfig( + { Body: "hi", From: "+1", To: "+2" }, + undefined, + cfg, + runSpy, + ); - expect(result).toBe("hello world"); - }); + expect(result?.text).toBe("hello world"); + }); it("parses Claude JSON output even without explicit claudeOutputFormat when using claude bin", async () => { const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({ @@ -228,15 +228,15 @@ describe("config and templating", () => { }, }; - const result = await index.getReplyFromConfig( - { Body: "hi", From: "+1", To: "+2" }, - undefined, - cfg, - runSpy, - ); + const result = await index.getReplyFromConfig( + { Body: "hi", From: "+1", To: "+2" }, + undefined, + cfg, + runSpy, + ); - expect(result).toBe("Sure! What's up?"); - }); + expect(result?.text).toBe("Sure! What's up?"); + }); it("serializes command auto-replies via the queue", async () => { let active = 0; @@ -376,11 +376,11 @@ describe("twilio interactions", () => { }); describe("webhook and messaging", () => { - it("startWebhook responds and auto-replies", async () => { - const client = twilioFactory._createClient(); - client.messages.create.mockResolvedValue({}); - twilioFactory.mockReturnValue(client); - vi.spyOn(index, "getReplyFromConfig").mockResolvedValue("Auto"); + it("startWebhook responds and auto-replies", async () => { + const client = twilioFactory._createClient(); + client.messages.create.mockResolvedValue({}); + twilioFactory.mockReturnValue(client); + vi.spyOn(index, "getReplyFromConfig").mockResolvedValue({ text: "Auto" }); const server = await index.startWebhook(0, "/hook", undefined, false); const address = server.address() as net.AddressInfo; @@ -667,27 +667,29 @@ describe("monitoring", () => { expect(runtime.exit).toHaveBeenCalledWith(1); }); - it("monitorWebProvider triggers replies and stops when asked", async () => { - const replySpy = vi.fn(); - const listenerFactory = vi.fn( - async ( - opts: Parameters[1] extends undefined - ? never - : NonNullable[1]>, - ) => { - await opts.onMessage({ - body: "hello", - from: "+1", - to: "+2", - id: "id1", - sendComposing: vi.fn(), - reply: replySpy, - }); - return { close: vi.fn() }; - }, - ); - const resolver = vi.fn().mockResolvedValue("auto"); - await index.monitorWebProvider(false, listenerFactory, false, resolver); - expect(replySpy).toHaveBeenCalledWith("auto"); + it("monitorWebProvider triggers replies and stops when asked", async () => { + const replySpy = vi.fn(); + const sendMediaSpy = vi.fn(); + const listenerFactory = vi.fn( + async ( + opts: Parameters[1] extends undefined + ? never + : NonNullable[1]>, + ) => { + await opts.onMessage({ + body: "hello", + from: "+1", + to: "+2", + id: "id1", + sendComposing: vi.fn(), + reply: replySpy, + sendMedia: sendMediaSpy, + }); + return { close: vi.fn() }; + }, + ); + const resolver = vi.fn().mockResolvedValue({ text: "auto" }); + await index.monitorWebProvider(false, listenerFactory, false, resolver); + expect(replySpy).toHaveBeenCalledWith("auto"); + }); }); -}); diff --git a/src/media/host.ts b/src/media/host.ts new file mode 100644 index 000000000..dfc161d64 --- /dev/null +++ b/src/media/host.ts @@ -0,0 +1,68 @@ +import { once } from "node:events"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { danger, warn } from "../globals.js"; +import { logInfo, logWarn } from "../logger.js"; +import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; +import { ensurePortAvailable, PortInUseError } from "../infra/ports.js"; +import { getTailnetHostname } from "../infra/tailscale.js"; +import { saveMediaSource } from "./store.js"; +import { startMediaServer } from "./server.js"; + +const DEFAULT_PORT = 42873; +const TTL_MS = 2 * 60 * 1000; + +let mediaServer: import("http").Server | null = null; + +export type HostedMedia = { + url: string; + id: string; + size: number; +}; + +export async function ensureMediaHosted( + source: string, + opts: { + port?: number; + startServer?: boolean; + runtime?: RuntimeEnv; + } = {}, +): Promise { + const port = opts.port ?? DEFAULT_PORT; + const runtime = opts.runtime ?? defaultRuntime; + + const saved = await saveMediaSource(source); + const hostname = await getTailnetHostname(); + + // Decide whether we must start a media server. + const needsServerStart = await isPortFree(port); + if (needsServerStart && !opts.startServer) { + await fs.rm(saved.path).catch(() => {}); + throw new Error( + "Media hosting requires the webhook/Funnel server. Start `warelay webhook`/`warelay up` or re-run with --serve-media.", + ); + } + if (needsServerStart && opts.startServer) { + if (!mediaServer) { + mediaServer = await startMediaServer(port, TTL_MS, runtime); + logInfo( + `📡 Started temporary media host on http://localhost:${port}/media/:id (TTL ${TTL_MS / 1000}s)`, + runtime, + ); + mediaServer.unref?.(); + } + } + + const url = `https://${hostname}/media/${saved.id}`; + return { url, id: saved.id, size: saved.size }; +} + +async function isPortFree(port: number) { + try { + await ensurePortAvailable(port); + return true; + } catch (err) { + if (err instanceof PortInUseError) return false; + throw err; + } +} diff --git a/src/media/server.ts b/src/media/server.ts new file mode 100644 index 000000000..4af87a8d5 --- /dev/null +++ b/src/media/server.ts @@ -0,0 +1,62 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import express, { type Express } from "express"; +import type { Server } from "http"; +import { danger } from "../globals.js"; +import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; +import { cleanOldMedia, getMediaDir } from "./store.js"; + +const DEFAULT_TTL_MS = 2 * 60 * 1000; + +export function attachMediaRoutes( + app: Express, + ttlMs = DEFAULT_TTL_MS, + runtime: RuntimeEnv = defaultRuntime, +) { + const mediaDir = getMediaDir(); + + app.get("/media/:id", async (req, res) => { + const id = req.params.id; + const file = path.join(mediaDir, id); + try { + const stat = await fs.stat(file); + if (Date.now() - stat.mtimeMs > ttlMs) { + await fs.rm(file).catch(() => {}); + res.status(410).send("expired"); + return; + } + res.sendFile(file); + // best-effort single-use cleanup after response ends + res.on("finish", () => { + setTimeout(() => { + fs.rm(file).catch(() => {}); + }, 500); + }); + } catch { + res.status(404).send("not found"); + } + }); + + // periodic cleanup + setInterval(() => { + void cleanOldMedia(ttlMs); + }, ttlMs).unref(); + +} + +export async function startMediaServer( + port: number, + ttlMs = DEFAULT_TTL_MS, + runtime: RuntimeEnv = defaultRuntime, +): Promise { + const app = express(); + attachMediaRoutes(app, ttlMs, runtime); + return await new Promise((resolve, reject) => { + const server = app.listen(port); + server.once("listening", () => resolve(server)); + server.once("error", (err) => { + runtime.error(danger(`Media server failed: ${String(err)}`)); + reject(err); + }); + }); +} diff --git a/src/media/store.ts b/src/media/store.ts new file mode 100644 index 000000000..71a7ed50f --- /dev/null +++ b/src/media/store.ts @@ -0,0 +1,93 @@ +import crypto from "node:crypto"; +import fs from "node:fs/promises"; +import path from "node:path"; +import os from "node:os"; +import { pipeline } from "node:stream/promises"; +import { createWriteStream } from "node:fs"; +import { request } from "node:https"; + +const MEDIA_DIR = path.join(os.homedir(), ".warelay", "media"); +const MAX_BYTES = 5 * 1024 * 1024; // 5MB +const DEFAULT_TTL_MS = 2 * 60 * 1000; // 2 minutes + +export function getMediaDir() { + return MEDIA_DIR; +} + +export async function ensureMediaDir() { + await fs.mkdir(MEDIA_DIR, { recursive: true }); + return MEDIA_DIR; +} + +export async function cleanOldMedia(ttlMs = DEFAULT_TTL_MS) { + await ensureMediaDir(); + const entries = await fs.readdir(MEDIA_DIR).catch(() => []); + const now = Date.now(); + await Promise.all( + entries.map(async (file) => { + const full = path.join(MEDIA_DIR, file); + const stat = await fs.stat(full).catch(() => null); + if (!stat) return; + if (now - stat.mtimeMs > ttlMs) { + await fs.rm(full).catch(() => {}); + } + }), + ); +} + +function looksLikeUrl(src: string) { + return /^https?:\/\//i.test(src); +} + +async function downloadToFile(url: string, dest: string) { + await new Promise((resolve, reject) => { + const req = request(url, (res) => { + if (!res.statusCode || res.statusCode >= 400) { + reject(new Error(`HTTP ${res.statusCode ?? "?"} downloading media`)); + return; + } + let total = 0; + const out = createWriteStream(dest); + res.on("data", (chunk) => { + total += chunk.length; + if (total > MAX_BYTES) { + req.destroy(new Error("Media exceeds 5MB limit")); + } + }); + pipeline(res, out).then(() => resolve()).catch(reject); + }); + req.on("error", reject); + req.end(); + }); +} + +export type SavedMedia = { + id: string; + path: string; + size: number; + contentType?: string; +}; + +export async function saveMediaSource( + source: string, +): Promise { + await ensureMediaDir(); + await cleanOldMedia(); + const id = crypto.randomUUID(); + const dest = path.join(MEDIA_DIR, id); + if (looksLikeUrl(source)) { + await downloadToFile(source, dest); + const stat = await fs.stat(dest); + return { id, path: dest, size: stat.size }; + } + // local path + const stat = await fs.stat(source); + if (!stat.isFile()) { + throw new Error("Media path is not a file"); + } + if (stat.size > MAX_BYTES) { + throw new Error("Media exceeds 5MB limit"); + } + await fs.copyFile(source, dest); + return { id, path: dest, size: stat.size }; +} diff --git a/src/provider-web.ts b/src/provider-web.ts index e41285e28..acbe589e9 100644 --- a/src/provider-web.ts +++ b/src/provider-web.ts @@ -9,6 +9,7 @@ import { makeCacheableSignalKeyStore, makeWASocket, useMultiFileAuthState, + type AnyMessageContent, } from "@whiskeysockets/baileys"; import pino from "pino"; import qrcode from "qrcode-terminal"; @@ -101,7 +102,7 @@ export async function waitForWaConnection( export async function sendMessageWeb( to: string, body: string, - options: { verbose: boolean }, + options: { verbose: boolean; mediaUrl?: string }, ): Promise<{ messageId: string; toJid: string }> { const sock = await createWaSocket(false, options.verbose); try { @@ -112,9 +113,20 @@ export async function sendMessageWeb( } catch (err) { logVerbose(`Presence update skipped: ${String(err)}`); } - const result = await sock.sendMessage(jid, { text: body }); + let payload: AnyMessageContent = { text: body }; + if (options.mediaUrl) { + const media = await loadWebMedia(options.mediaUrl); + payload = { + image: media.buffer, + caption: body || undefined, + mimetype: media.contentType, + }; + } + const result = await sock.sendMessage(jid, payload); const messageId = result?.key?.id ?? "unknown"; - logInfo(`✅ Sent via web session. Message ID: ${messageId} -> ${jid}`); + logInfo( + `✅ Sent via web session. Message ID: ${messageId} -> ${jid}${options.mediaUrl ? " (media)" : ""}`, + ); return { messageId, toJid: jid }; } finally { try { @@ -209,6 +221,7 @@ export type WebInboundMessage = { timestamp?: number; sendComposing: () => Promise; reply: (text: string) => Promise; + sendMedia: (payload: { image: Buffer; caption?: string; mimetype?: string }) => Promise; }; export async function monitorWebInbox(options: { @@ -249,6 +262,13 @@ export async function monitorWebInbox(options: { const reply = async (text: string) => { await sock.sendMessage(chatJid, { text }); }; + const sendMedia = async (payload: { + image: Buffer; + caption?: string; + mimetype?: string; + }) => { + await sock.sendMessage(chatJid, payload); + }; const timestamp = msg.messageTimestamp ? Number(msg.messageTimestamp) * 1000 : undefined; @@ -262,6 +282,7 @@ export async function monitorWebInbox(options: { timestamp, sendComposing, reply, + sendMedia, }); } catch (err) { console.error( @@ -299,7 +320,7 @@ export async function monitorWebProvider( console.log(`\n[${ts}] ${msg.from} -> ${msg.to}: ${msg.body}`); const replyStarted = Date.now(); - const replyText = await replyResolver( + const replyResult = await replyResolver( { Body: msg.body, From: msg.from, @@ -310,18 +331,31 @@ export async function monitorWebProvider( onReplyStart: msg.sendComposing, }, ); - if (!replyText) return; + if (!replyResult || (!replyResult.text && !replyResult.mediaUrl)) return; try { - await msg.reply(replyText); + if (replyResult.mediaUrl) { + const media = await loadWebMedia(replyResult.mediaUrl); + await msg.sendMedia({ + image: media.buffer, + caption: replyResult.text || undefined, + mimetype: media.contentType, + }); + } else { + await msg.reply(replyResult.text ?? ""); + } const durationMs = Date.now() - replyStarted; if (isVerbose()) { console.log( success( - `↩️ Auto-replied to ${msg.from} (web, ${replyText.length} chars, ${durationMs}ms)`, + `↩️ Auto-replied to ${msg.from} (web, ${replyResult.text?.length ?? 0} chars${replyResult.mediaUrl ? ", media" : ""}, ${durationMs}ms)`, ), ); } else { - console.log(success(`↩️ ${replyText}`)); + console.log( + success( + `↩️ ${replyResult.text ?? ""}${replyResult.mediaUrl ? " (media)" : ""}`, + ), + ); } } catch (err) { console.error( @@ -403,6 +437,27 @@ function extractText(message: proto.IMessage | undefined): string | undefined { return undefined; } +async function loadWebMedia( + mediaUrl: string, +): Promise<{ buffer: Buffer; contentType?: string }> { + if (/^https?:\/\//i.test(mediaUrl)) { + const res = await fetch(mediaUrl); + if (!res.ok || !res.body) { + throw new Error(`Failed to fetch media: HTTP ${res.status}`); + } + const array = Buffer.from(await res.arrayBuffer()); + if (array.length > 5 * 1024 * 1024) { + throw new Error("Media exceeds 5MB limit"); + } + return { buffer: array, contentType: res.headers.get("content-type") ?? undefined }; + } + const data = await fs.readFile(mediaUrl); + if (data.length > 5 * 1024 * 1024) { + throw new Error("Media exceeds 5MB limit"); + } + return { buffer: data }; +} + function getStatusCode(err: unknown) { return ( (err as { output?: { statusCode?: number } })?.output?.statusCode ?? diff --git a/src/twilio/send.ts b/src/twilio/send.ts index 3ff7df823..b174f1fc7 100644 --- a/src/twilio/send.ts +++ b/src/twilio/send.ts @@ -13,6 +13,7 @@ const failureTerminalStatuses = new Set(["failed", "undelivered", "canceled"]); export async function sendMessage( to: string, body: string, + opts?: { mediaUrl?: string }, runtime: RuntimeEnv = defaultRuntime, ) { const env = readEnv(runtime); @@ -25,6 +26,7 @@ export async function sendMessage( from, to: toNumber, body, + mediaUrl: opts?.mediaUrl ? [opts.mediaUrl] : undefined, }); logInfo(`✅ Request accepted. Message SID: ${message.sid} -> ${toNumber}`, runtime); diff --git a/src/twilio/webhook.ts b/src/twilio/webhook.ts index e0d3d7fa6..9230e0f0b 100644 --- a/src/twilio/webhook.ts +++ b/src/twilio/webhook.ts @@ -7,10 +7,11 @@ import { success, logVerbose, danger } from "../globals.js"; import { readEnv } from "../env.js"; import { createClient } from "./client.js"; import { normalizePath } from "../utils.js"; -import { getReplyFromConfig } from "../auto-reply/reply.js"; +import { getReplyFromConfig, type ReplyPayload } from "../auto-reply/reply.js"; import { sendTypingIndicator } from "./typing.js"; import { logTwilioSendError } from "./utils.js"; import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; +import { attachMediaRoutes } from "../media/server.js"; /** Start the inbound webhook HTTP server and wire optional auto-replies. */ export async function startWebhook( @@ -24,6 +25,7 @@ export async function startWebhook( const env = readEnv(runtime); const app = express(); + attachMediaRoutes(app, undefined, runtime); // Twilio sends application/x-www-form-urlencoded payloads. app.use(bodyParser.urlencoded({ extended: false })); app.use((req, _res, next) => { @@ -38,9 +40,10 @@ export async function startWebhook( if (verbose) runtime.log(chalk.gray(`Body: ${Body ?? ""}`)); const client = createClient(env); - let replyText = autoReply; - if (!replyText) { - replyText = await getReplyFromConfig( + let replyResult: ReplyPayload | undefined = + autoReply !== undefined ? { text: autoReply } : undefined; + if (!replyResult) { + replyResult = await getReplyFromConfig( { Body, From, To, MessageSid }, { onReplyStart: () => sendTypingIndicator(client, runtime, MessageSid), @@ -48,10 +51,20 @@ export async function startWebhook( ); } - if (replyText) { + if (replyResult && (replyResult.text || replyResult.mediaUrl)) { try { - await client.messages.create({ from: To, to: From, body: replyText }); - if (verbose) runtime.log(success(`↩️ Auto-replied to ${From}`)); + await client.messages.create({ + from: To, + to: From, + body: replyResult.text ?? "", + ...(replyResult.mediaUrl ? { mediaUrl: [replyResult.mediaUrl] } : {}), + }); + if (verbose) + runtime.log( + success( + `↩️ Auto-replied to ${From}${replyResult.mediaUrl ? " (media)" : ""}`, + ), + ); } catch (err) { logTwilioSendError(err, From ?? undefined, runtime); }