fix: add gif playback for WhatsApp sends

This commit is contained in:
Peter Steinberger
2026-01-03 23:56:36 +00:00
parent e1dd764504
commit e17c038d18
13 changed files with 122 additions and 4 deletions

View File

@@ -23,6 +23,7 @@
### Fixes
- CI: fix lint ordering after merge cleanup (#156) — thanks @steipete.
- CI: consolidate checks to avoid redundant installs (#144) — thanks @thewilloftheshadow.
- WhatsApp: support `gifPlayback` for MP4 GIF sends via CLI/gateway.
- Auto-reply: drop final payloads when block streaming to avoid duplicate Discord sends.
- Bash tool: default auto-background delay to 10s.
- Telegram: chunk block-stream replies to avoid “message is too long” errors (#124) — thanks @mukhtharcm.

View File

@@ -24,6 +24,7 @@ CLAWDIS is now **web-only** (Baileys). This document captures the current media
- **Images:** resize & recompress to JPEG (max side 2048px) targeting `agent.mediaMaxMb` (default 5MB), capped at 6MB.
- **Audio/Voice/Video:** pass-through up to 16MB; audio is sent as a voice note (`ptt: true`).
- **Documents:** anything else, up to 100MB, with filename preserved when available.
- WhatsApp GIF-style playback: send an MP4 with `gifPlayback: true` (CLI: `--gif-playback`) so mobile clients loop inline.
- MIME detection prefers magic bytes, then headers, then file extension.
- Caption comes from `--message` or `reply.text`; empty caption is allowed.
- Logging: non-verbose shows `↩️`/`✅`; verbose includes size and source path/URL.

View File

@@ -92,6 +92,9 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number
- Audio sent as PTT; `audio/ogg` => `audio/ogg; codecs=opus`.
- Caption only on first media item.
- Media fetch supports HTTP(S) and local paths.
- Animated GIFs: WhatsApp expects MP4 with `gifPlayback: true` for inline looping.
- CLI: `clawdis send --media <mp4> --gif-playback`
- Gateway: `send` params include `gifPlayback: true`
## Media limits + optimization
- Default cap: 5 MB (per media item).
@@ -123,7 +126,7 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number
- `agent.heartbeat.model` (optional override)
- `agent.heartbeat.target`
- `agent.heartbeat.to`
- `session.*` (scope, idle, store, mainKey)
- `session.*` (scope, idle, store; `mainKey` is ignored)
- `web.enabled` (disable provider startup when false)
- `web.heartbeatSeconds`
- `web.reconnect.*`

View File

@@ -578,6 +578,7 @@ export function registerGatewayCli(program: Command) {
.requiredOption("--to <jidOrPhone>", "Destination (E.164 or jid)")
.requiredOption("--message <text>", "Message text")
.option("--media-url <url>", "Optional media URL")
.option("--gif-playback", "Treat video media as GIF playback", false)
.option("--idempotency-key <key>", "Idempotency key")
.action(async (opts) => {
try {
@@ -586,6 +587,7 @@ export function registerGatewayCli(program: Command) {
to: opts.to,
message: opts.message,
mediaUrl: opts.mediaUrl,
gifPlayback: opts.gifPlayback,
idempotencyKey,
});
defaultRuntime.log(JSON.stringify(result, null, 2));

View File

@@ -311,6 +311,11 @@ export function buildProgram() {
"--media <path-or-url>",
"Attach media (image/audio/video/document). Accepts local paths or URLs.",
)
.option(
"--gif-playback",
"Treat video media as GIF playback (WhatsApp only).",
false,
)
.option(
"--provider <provider>",
"Delivery provider: whatsapp|telegram|discord|signal|imessage (default: whatsapp)",

View File

@@ -13,6 +13,7 @@ export async function sendCommand(
json?: boolean;
dryRun?: boolean;
media?: string;
gifPlayback?: boolean;
},
deps: CliDeps,
runtime: RuntimeEnv,
@@ -144,6 +145,7 @@ export async function sendCommand(
to: opts.to,
message: opts.message,
mediaUrl: opts.media,
gifPlayback: opts.gifPlayback,
provider,
idempotencyKey: randomIdempotencyKey(),
},

View File

@@ -191,6 +191,7 @@ export const SendParamsSchema = Type.Object(
to: NonEmptyString,
message: NonEmptyString,
mediaUrl: Type.Optional(Type.String()),
gifPlayback: Type.Optional(Type.Boolean()),
provider: Type.Optional(Type.String()),
idempotencyKey: NonEmptyString,
},

View File

@@ -3,6 +3,12 @@ import fs from "node:fs";
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js";
import type { ModelCatalogEntry } from "../agents/model-catalog.js";
import {
abortEmbeddedPiRun,
isEmbeddedPiRunActive,
resolveEmbeddedSessionLane,
waitForEmbeddedPiRunEnd,
} from "../agents/pi-embedded.js";
import {
buildAllowedModelSet,
buildModelAliasIndex,
@@ -35,6 +41,7 @@ import {
import { buildConfigSchema } from "../config/schema.js";
import {
loadSessionStore,
resolveMainSessionKey,
resolveStorePath,
type SessionEntry,
saveSessionStore,
@@ -75,6 +82,7 @@ import {
} from "../infra/voicewake.js";
import { webAuthExists } from "../providers/web/index.js";
import { defaultRuntime } from "../runtime.js";
import { clearCommandLane } from "../process/command-queue.js";
import {
normalizeSendPolicy,
resolveSendPolicy,
@@ -1823,12 +1831,41 @@ export async function handleGatewayRequest(
break;
}
const mainKey = resolveMainSessionKey(loadConfig());
if (key === mainKey) {
respond(
false,
undefined,
errorShape(
ErrorCodes.INVALID_REQUEST,
`Cannot delete the main session (${mainKey}).`,
),
);
break;
}
const deleteTranscript =
typeof p.deleteTranscript === "boolean" ? p.deleteTranscript : true;
const { storePath, store, entry } = loadSessionEntry(key);
const sessionId = entry?.sessionId;
const existed = Boolean(store[key]);
clearCommandLane(resolveEmbeddedSessionLane(key));
if (sessionId && isEmbeddedPiRunActive(sessionId)) {
abortEmbeddedPiRun(sessionId);
const ended = await waitForEmbeddedPiRunEnd(sessionId, 15_000);
if (!ended) {
respond(
false,
undefined,
errorShape(
ErrorCodes.UNAVAILABLE,
`Session ${key} is still active; try again in a moment.`,
),
);
break;
}
}
if (existed) delete store[key];
await saveSessionStore(storePath, store);
@@ -2602,6 +2639,7 @@ export async function handleGatewayRequest(
to: string;
message: string;
mediaUrl?: string;
gifPlayback?: boolean;
provider?: string;
idempotencyKey: string;
};
@@ -2702,6 +2740,7 @@ export async function handleGatewayRequest(
const result = await sendMessageWhatsApp(to, message, {
mediaUrl: params.mediaUrl,
verbose: shouldLogVerbose(),
gifPlayback: params.gifPlayback,
});
const payload = {
runId: idem,

View File

@@ -1,9 +1,14 @@
export type ActiveWebSendOptions = {
gifPlayback?: boolean;
};
export type ActiveWebListener = {
sendMessage: (
to: string,
text: string,
mediaBuffer?: Buffer,
mediaType?: string,
options?: ActiveWebSendOptions,
) => Promise<{ messageId: string }>;
sendComposingTo: (to: string) => Promise<void>;
close?: () => Promise<void>;

View File

@@ -22,6 +22,7 @@ import {
normalizeE164,
toWhatsappJid,
} from "../utils.js";
import type { ActiveWebSendOptions } from "./active-listener.js";
import {
createWaSocket,
getStatusCode,
@@ -380,6 +381,7 @@ export async function monitorWebInbox(options: {
text: string,
mediaBuffer?: Buffer,
mediaType?: string,
options?: ActiveWebSendOptions,
): Promise<{ messageId: string }> => {
const jid = toWhatsappJid(to);
let payload: AnyMessageContent;
@@ -397,10 +399,12 @@ export async function monitorWebInbox(options: {
mimetype: mediaType,
};
} else if (mediaType.startsWith("video/")) {
const gifPlayback = options?.gifPlayback;
payload = {
video: mediaBuffer,
caption: text || undefined,
mimetype: mediaType,
...(gifPlayback ? { gifPlayback: true } : {}),
};
} else {
payload = {

View File

@@ -282,6 +282,26 @@ describe("web monitor inbox", () => {
await listener.close();
});
it("sets gifPlayback on outbound video payloads when requested", async () => {
const onMessage = vi.fn();
const listener = await monitorWebInbox({ verbose: false, onMessage });
const sock = await createWaSocket();
const buf = Buffer.from("gifvid");
await listener.sendMessage("+1555", "gif", buf, "video/mp4", {
gifPlayback: true,
});
expect(sock.sendMessage).toHaveBeenCalledWith("1555@s.whatsapp.net", {
video: buf,
caption: "gif",
mimetype: "video/mp4",
gifPlayback: true,
});
await listener.close();
});
it("resolves onClose when the socket closes", async () => {
const listener = await monitorWebInbox({
verbose: false,

View File

@@ -78,6 +78,27 @@ describe("web outbound", () => {
);
});
it("marks gif playback for video when requested", async () => {
const buf = Buffer.from("gifvid");
loadWebMediaMock.mockResolvedValueOnce({
buffer: buf,
contentType: "video/mp4",
kind: "video",
});
await sendMessageWhatsApp("+1555", "gif", {
verbose: false,
mediaUrl: "/tmp/anim.mp4",
gifPlayback: true,
});
expect(sendMessage).toHaveBeenLastCalledWith(
"+1555",
"gif",
buf,
"video/mp4",
{ gifPlayback: true },
);
});
it("maps image with caption", async () => {
const buf = Buffer.from("img");
loadWebMediaMock.mockResolvedValueOnce({

View File

@@ -2,7 +2,10 @@ import { randomUUID } from "node:crypto";
import { createSubsystemLogger, getChildLogger } from "../logging.js";
import { toWhatsappJid } from "../utils.js";
import { getActiveWebListener } from "./active-listener.js";
import {
type ActiveWebSendOptions,
getActiveWebListener,
} from "./active-listener.js";
import { loadWebMedia } from "./media.js";
const outboundLog = createSubsystemLogger("gateway/providers/whatsapp").child(
@@ -12,7 +15,7 @@ const outboundLog = createSubsystemLogger("gateway/providers/whatsapp").child(
export async function sendMessageWhatsApp(
to: string,
body: string,
options: { verbose: boolean; mediaUrl?: string },
options: { verbose: boolean; mediaUrl?: string; gifPlayback?: boolean },
): Promise<{ messageId: string; toJid: string }> {
let text = body;
const correlationId = randomUUID();
@@ -60,7 +63,18 @@ export async function sendMessageWhatsApp(
);
if (!active) throw new Error("Active web listener missing");
await active.sendComposingTo(to);
const result = await active.sendMessage(to, text, mediaBuffer, mediaType);
const sendOptions: ActiveWebSendOptions | undefined = options.gifPlayback
? { gifPlayback: true }
: undefined;
const result = sendOptions
? await active.sendMessage(
to,
text,
mediaBuffer,
mediaType,
sendOptions,
)
: await active.sendMessage(to, text, mediaBuffer, mediaType);
const messageId =
(result as { messageId?: string })?.messageId ?? "unknown";
const durationMs = Date.now() - startedAt;