From e17c038d1802ad62c861572cbb3f69c0d04ce2d4 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 3 Jan 2026 23:56:36 +0000 Subject: [PATCH] fix: add gif playback for WhatsApp sends --- CHANGELOG.md | 1 + docs/images.md | 1 + docs/whatsapp.md | 5 ++++- src/cli/gateway-cli.ts | 2 ++ src/cli/program.ts | 5 +++++ src/commands/send.ts | 2 ++ src/gateway/protocol/schema.ts | 1 + src/gateway/server-methods.ts | 39 ++++++++++++++++++++++++++++++++++ src/web/active-listener.ts | 5 +++++ src/web/inbound.ts | 4 ++++ src/web/monitor-inbox.test.ts | 20 +++++++++++++++++ src/web/outbound.test.ts | 21 ++++++++++++++++++ src/web/outbound.ts | 20 ++++++++++++++--- 13 files changed, 122 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05d08c49b..fd10a6a49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ ### Fixes - CI: fix lint ordering after merge cleanup (#156) — thanks @steipete. - CI: consolidate checks to avoid redundant installs (#144) — thanks @thewilloftheshadow. +- WhatsApp: support `gifPlayback` for MP4 GIF sends via CLI/gateway. - Auto-reply: drop final payloads when block streaming to avoid duplicate Discord sends. - Bash tool: default auto-background delay to 10s. - Telegram: chunk block-stream replies to avoid “message is too long” errors (#124) — thanks @mukhtharcm. diff --git a/docs/images.md b/docs/images.md index d4e4b159d..0469c762b 100644 --- a/docs/images.md +++ b/docs/images.md @@ -24,6 +24,7 @@ CLAWDIS is now **web-only** (Baileys). This document captures the current media - **Images:** resize & recompress to JPEG (max side 2048px) targeting `agent.mediaMaxMb` (default 5 MB), capped at 6 MB. - **Audio/Voice/Video:** pass-through up to 16 MB; audio is sent as a voice note (`ptt: true`). - **Documents:** anything else, up to 100 MB, with filename preserved when available. +- WhatsApp GIF-style playback: send an MP4 with `gifPlayback: true` (CLI: `--gif-playback`) so mobile clients loop inline. - MIME detection prefers magic bytes, then headers, then file extension. - Caption comes from `--message` or `reply.text`; empty caption is allowed. - Logging: non-verbose shows `↩️`/`✅`; verbose includes size and source path/URL. diff --git a/docs/whatsapp.md b/docs/whatsapp.md index c4c09ab2f..4469cc1f9 100644 --- a/docs/whatsapp.md +++ b/docs/whatsapp.md @@ -92,6 +92,9 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number - Audio sent as PTT; `audio/ogg` => `audio/ogg; codecs=opus`. - Caption only on first media item. - Media fetch supports HTTP(S) and local paths. + - Animated GIFs: WhatsApp expects MP4 with `gifPlayback: true` for inline looping. + - CLI: `clawdis send --media --gif-playback` + - Gateway: `send` params include `gifPlayback: true` ## Media limits + optimization - Default cap: 5 MB (per media item). @@ -123,7 +126,7 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number - `agent.heartbeat.model` (optional override) - `agent.heartbeat.target` - `agent.heartbeat.to` -- `session.*` (scope, idle, store, mainKey) +- `session.*` (scope, idle, store; `mainKey` is ignored) - `web.enabled` (disable provider startup when false) - `web.heartbeatSeconds` - `web.reconnect.*` diff --git a/src/cli/gateway-cli.ts b/src/cli/gateway-cli.ts index d052cbb54..1e2e657ef 100644 --- a/src/cli/gateway-cli.ts +++ b/src/cli/gateway-cli.ts @@ -578,6 +578,7 @@ export function registerGatewayCli(program: Command) { .requiredOption("--to ", "Destination (E.164 or jid)") .requiredOption("--message ", "Message text") .option("--media-url ", "Optional media URL") + .option("--gif-playback", "Treat video media as GIF playback", false) .option("--idempotency-key ", "Idempotency key") .action(async (opts) => { try { @@ -586,6 +587,7 @@ export function registerGatewayCli(program: Command) { to: opts.to, message: opts.message, mediaUrl: opts.mediaUrl, + gifPlayback: opts.gifPlayback, idempotencyKey, }); defaultRuntime.log(JSON.stringify(result, null, 2)); diff --git a/src/cli/program.ts b/src/cli/program.ts index 8e28c8f22..bd44236bb 100644 --- a/src/cli/program.ts +++ b/src/cli/program.ts @@ -311,6 +311,11 @@ export function buildProgram() { "--media ", "Attach media (image/audio/video/document). Accepts local paths or URLs.", ) + .option( + "--gif-playback", + "Treat video media as GIF playback (WhatsApp only).", + false, + ) .option( "--provider ", "Delivery provider: whatsapp|telegram|discord|signal|imessage (default: whatsapp)", diff --git a/src/commands/send.ts b/src/commands/send.ts index fa431063e..f8ec0e4a1 100644 --- a/src/commands/send.ts +++ b/src/commands/send.ts @@ -13,6 +13,7 @@ export async function sendCommand( json?: boolean; dryRun?: boolean; media?: string; + gifPlayback?: boolean; }, deps: CliDeps, runtime: RuntimeEnv, @@ -144,6 +145,7 @@ export async function sendCommand( to: opts.to, message: opts.message, mediaUrl: opts.media, + gifPlayback: opts.gifPlayback, provider, idempotencyKey: randomIdempotencyKey(), }, diff --git a/src/gateway/protocol/schema.ts b/src/gateway/protocol/schema.ts index 78511df55..e37e7ce72 100644 --- a/src/gateway/protocol/schema.ts +++ b/src/gateway/protocol/schema.ts @@ -191,6 +191,7 @@ export const SendParamsSchema = Type.Object( to: NonEmptyString, message: NonEmptyString, mediaUrl: Type.Optional(Type.String()), + gifPlayback: Type.Optional(Type.Boolean()), provider: Type.Optional(Type.String()), idempotencyKey: NonEmptyString, }, diff --git a/src/gateway/server-methods.ts b/src/gateway/server-methods.ts index bf0b9bbf9..f432ce440 100644 --- a/src/gateway/server-methods.ts +++ b/src/gateway/server-methods.ts @@ -3,6 +3,12 @@ import fs from "node:fs"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js"; import type { ModelCatalogEntry } from "../agents/model-catalog.js"; +import { + abortEmbeddedPiRun, + isEmbeddedPiRunActive, + resolveEmbeddedSessionLane, + waitForEmbeddedPiRunEnd, +} from "../agents/pi-embedded.js"; import { buildAllowedModelSet, buildModelAliasIndex, @@ -35,6 +41,7 @@ import { import { buildConfigSchema } from "../config/schema.js"; import { loadSessionStore, + resolveMainSessionKey, resolveStorePath, type SessionEntry, saveSessionStore, @@ -75,6 +82,7 @@ import { } from "../infra/voicewake.js"; import { webAuthExists } from "../providers/web/index.js"; import { defaultRuntime } from "../runtime.js"; +import { clearCommandLane } from "../process/command-queue.js"; import { normalizeSendPolicy, resolveSendPolicy, @@ -1823,12 +1831,41 @@ export async function handleGatewayRequest( break; } + const mainKey = resolveMainSessionKey(loadConfig()); + if (key === mainKey) { + respond( + false, + undefined, + errorShape( + ErrorCodes.INVALID_REQUEST, + `Cannot delete the main session (${mainKey}).`, + ), + ); + break; + } + const deleteTranscript = typeof p.deleteTranscript === "boolean" ? p.deleteTranscript : true; const { storePath, store, entry } = loadSessionEntry(key); const sessionId = entry?.sessionId; const existed = Boolean(store[key]); + clearCommandLane(resolveEmbeddedSessionLane(key)); + if (sessionId && isEmbeddedPiRunActive(sessionId)) { + abortEmbeddedPiRun(sessionId); + const ended = await waitForEmbeddedPiRunEnd(sessionId, 15_000); + if (!ended) { + respond( + false, + undefined, + errorShape( + ErrorCodes.UNAVAILABLE, + `Session ${key} is still active; try again in a moment.`, + ), + ); + break; + } + } if (existed) delete store[key]; await saveSessionStore(storePath, store); @@ -2602,6 +2639,7 @@ export async function handleGatewayRequest( to: string; message: string; mediaUrl?: string; + gifPlayback?: boolean; provider?: string; idempotencyKey: string; }; @@ -2702,6 +2740,7 @@ export async function handleGatewayRequest( const result = await sendMessageWhatsApp(to, message, { mediaUrl: params.mediaUrl, verbose: shouldLogVerbose(), + gifPlayback: params.gifPlayback, }); const payload = { runId: idem, diff --git a/src/web/active-listener.ts b/src/web/active-listener.ts index 4425575db..bdcac6b85 100644 --- a/src/web/active-listener.ts +++ b/src/web/active-listener.ts @@ -1,9 +1,14 @@ +export type ActiveWebSendOptions = { + gifPlayback?: boolean; +}; + export type ActiveWebListener = { sendMessage: ( to: string, text: string, mediaBuffer?: Buffer, mediaType?: string, + options?: ActiveWebSendOptions, ) => Promise<{ messageId: string }>; sendComposingTo: (to: string) => Promise; close?: () => Promise; diff --git a/src/web/inbound.ts b/src/web/inbound.ts index b7f0eb0d2..c4e653c41 100644 --- a/src/web/inbound.ts +++ b/src/web/inbound.ts @@ -22,6 +22,7 @@ import { normalizeE164, toWhatsappJid, } from "../utils.js"; +import type { ActiveWebSendOptions } from "./active-listener.js"; import { createWaSocket, getStatusCode, @@ -380,6 +381,7 @@ export async function monitorWebInbox(options: { text: string, mediaBuffer?: Buffer, mediaType?: string, + options?: ActiveWebSendOptions, ): Promise<{ messageId: string }> => { const jid = toWhatsappJid(to); let payload: AnyMessageContent; @@ -397,10 +399,12 @@ export async function monitorWebInbox(options: { mimetype: mediaType, }; } else if (mediaType.startsWith("video/")) { + const gifPlayback = options?.gifPlayback; payload = { video: mediaBuffer, caption: text || undefined, mimetype: mediaType, + ...(gifPlayback ? { gifPlayback: true } : {}), }; } else { payload = { diff --git a/src/web/monitor-inbox.test.ts b/src/web/monitor-inbox.test.ts index e26ec3035..d8f3886ea 100644 --- a/src/web/monitor-inbox.test.ts +++ b/src/web/monitor-inbox.test.ts @@ -282,6 +282,26 @@ describe("web monitor inbox", () => { await listener.close(); }); + it("sets gifPlayback on outbound video payloads when requested", async () => { + const onMessage = vi.fn(); + const listener = await monitorWebInbox({ verbose: false, onMessage }); + const sock = await createWaSocket(); + const buf = Buffer.from("gifvid"); + + await listener.sendMessage("+1555", "gif", buf, "video/mp4", { + gifPlayback: true, + }); + + expect(sock.sendMessage).toHaveBeenCalledWith("1555@s.whatsapp.net", { + video: buf, + caption: "gif", + mimetype: "video/mp4", + gifPlayback: true, + }); + + await listener.close(); + }); + it("resolves onClose when the socket closes", async () => { const listener = await monitorWebInbox({ verbose: false, diff --git a/src/web/outbound.test.ts b/src/web/outbound.test.ts index d9bfd9b40..d36a51f66 100644 --- a/src/web/outbound.test.ts +++ b/src/web/outbound.test.ts @@ -78,6 +78,27 @@ describe("web outbound", () => { ); }); + it("marks gif playback for video when requested", async () => { + const buf = Buffer.from("gifvid"); + loadWebMediaMock.mockResolvedValueOnce({ + buffer: buf, + contentType: "video/mp4", + kind: "video", + }); + await sendMessageWhatsApp("+1555", "gif", { + verbose: false, + mediaUrl: "/tmp/anim.mp4", + gifPlayback: true, + }); + expect(sendMessage).toHaveBeenLastCalledWith( + "+1555", + "gif", + buf, + "video/mp4", + { gifPlayback: true }, + ); + }); + it("maps image with caption", async () => { const buf = Buffer.from("img"); loadWebMediaMock.mockResolvedValueOnce({ diff --git a/src/web/outbound.ts b/src/web/outbound.ts index 961bb9b8d..9008a5538 100644 --- a/src/web/outbound.ts +++ b/src/web/outbound.ts @@ -2,7 +2,10 @@ import { randomUUID } from "node:crypto"; import { createSubsystemLogger, getChildLogger } from "../logging.js"; import { toWhatsappJid } from "../utils.js"; -import { getActiveWebListener } from "./active-listener.js"; +import { + type ActiveWebSendOptions, + getActiveWebListener, +} from "./active-listener.js"; import { loadWebMedia } from "./media.js"; const outboundLog = createSubsystemLogger("gateway/providers/whatsapp").child( @@ -12,7 +15,7 @@ const outboundLog = createSubsystemLogger("gateway/providers/whatsapp").child( export async function sendMessageWhatsApp( to: string, body: string, - options: { verbose: boolean; mediaUrl?: string }, + options: { verbose: boolean; mediaUrl?: string; gifPlayback?: boolean }, ): Promise<{ messageId: string; toJid: string }> { let text = body; const correlationId = randomUUID(); @@ -60,7 +63,18 @@ export async function sendMessageWhatsApp( ); if (!active) throw new Error("Active web listener missing"); await active.sendComposingTo(to); - const result = await active.sendMessage(to, text, mediaBuffer, mediaType); + const sendOptions: ActiveWebSendOptions | undefined = options.gifPlayback + ? { gifPlayback: true } + : undefined; + const result = sendOptions + ? await active.sendMessage( + to, + text, + mediaBuffer, + mediaType, + sendOptions, + ) + : await active.sendMessage(to, text, mediaBuffer, mediaType); const messageId = (result as { messageId?: string })?.messageId ?? "unknown"; const durationMs = Date.now() - startedAt;