fix: add gif playback for WhatsApp sends
This commit is contained in:
@@ -23,6 +23,7 @@
|
||||
### Fixes
|
||||
- CI: fix lint ordering after merge cleanup (#156) — thanks @steipete.
|
||||
- CI: consolidate checks to avoid redundant installs (#144) — thanks @thewilloftheshadow.
|
||||
- WhatsApp: support `gifPlayback` for MP4 GIF sends via CLI/gateway.
|
||||
- Auto-reply: drop final payloads when block streaming to avoid duplicate Discord sends.
|
||||
- Bash tool: default auto-background delay to 10s.
|
||||
- Telegram: chunk block-stream replies to avoid “message is too long” errors (#124) — thanks @mukhtharcm.
|
||||
|
||||
@@ -24,6 +24,7 @@ CLAWDIS is now **web-only** (Baileys). This document captures the current media
|
||||
- **Images:** resize & recompress to JPEG (max side 2048px) targeting `agent.mediaMaxMb` (default 5 MB), capped at 6 MB.
|
||||
- **Audio/Voice/Video:** pass-through up to 16 MB; audio is sent as a voice note (`ptt: true`).
|
||||
- **Documents:** anything else, up to 100 MB, with filename preserved when available.
|
||||
- WhatsApp GIF-style playback: send an MP4 with `gifPlayback: true` (CLI: `--gif-playback`) so mobile clients loop inline.
|
||||
- MIME detection prefers magic bytes, then headers, then file extension.
|
||||
- Caption comes from `--message` or `reply.text`; empty caption is allowed.
|
||||
- Logging: non-verbose shows `↩️`/`✅`; verbose includes size and source path/URL.
|
||||
|
||||
@@ -92,6 +92,9 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number
|
||||
- Audio sent as PTT; `audio/ogg` => `audio/ogg; codecs=opus`.
|
||||
- Caption only on first media item.
|
||||
- Media fetch supports HTTP(S) and local paths.
|
||||
- Animated GIFs: WhatsApp expects MP4 with `gifPlayback: true` for inline looping.
|
||||
- CLI: `clawdis send --media <mp4> --gif-playback`
|
||||
- Gateway: `send` params include `gifPlayback: true`
|
||||
|
||||
## Media limits + optimization
|
||||
- Default cap: 5 MB (per media item).
|
||||
@@ -123,7 +126,7 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number
|
||||
- `agent.heartbeat.model` (optional override)
|
||||
- `agent.heartbeat.target`
|
||||
- `agent.heartbeat.to`
|
||||
- `session.*` (scope, idle, store, mainKey)
|
||||
- `session.*` (scope, idle, store; `mainKey` is ignored)
|
||||
- `web.enabled` (disable provider startup when false)
|
||||
- `web.heartbeatSeconds`
|
||||
- `web.reconnect.*`
|
||||
|
||||
@@ -578,6 +578,7 @@ export function registerGatewayCli(program: Command) {
|
||||
.requiredOption("--to <jidOrPhone>", "Destination (E.164 or jid)")
|
||||
.requiredOption("--message <text>", "Message text")
|
||||
.option("--media-url <url>", "Optional media URL")
|
||||
.option("--gif-playback", "Treat video media as GIF playback", false)
|
||||
.option("--idempotency-key <key>", "Idempotency key")
|
||||
.action(async (opts) => {
|
||||
try {
|
||||
@@ -586,6 +587,7 @@ export function registerGatewayCli(program: Command) {
|
||||
to: opts.to,
|
||||
message: opts.message,
|
||||
mediaUrl: opts.mediaUrl,
|
||||
gifPlayback: opts.gifPlayback,
|
||||
idempotencyKey,
|
||||
});
|
||||
defaultRuntime.log(JSON.stringify(result, null, 2));
|
||||
|
||||
@@ -311,6 +311,11 @@ export function buildProgram() {
|
||||
"--media <path-or-url>",
|
||||
"Attach media (image/audio/video/document). Accepts local paths or URLs.",
|
||||
)
|
||||
.option(
|
||||
"--gif-playback",
|
||||
"Treat video media as GIF playback (WhatsApp only).",
|
||||
false,
|
||||
)
|
||||
.option(
|
||||
"--provider <provider>",
|
||||
"Delivery provider: whatsapp|telegram|discord|signal|imessage (default: whatsapp)",
|
||||
|
||||
@@ -13,6 +13,7 @@ export async function sendCommand(
|
||||
json?: boolean;
|
||||
dryRun?: boolean;
|
||||
media?: string;
|
||||
gifPlayback?: boolean;
|
||||
},
|
||||
deps: CliDeps,
|
||||
runtime: RuntimeEnv,
|
||||
@@ -144,6 +145,7 @@ export async function sendCommand(
|
||||
to: opts.to,
|
||||
message: opts.message,
|
||||
mediaUrl: opts.media,
|
||||
gifPlayback: opts.gifPlayback,
|
||||
provider,
|
||||
idempotencyKey: randomIdempotencyKey(),
|
||||
},
|
||||
|
||||
@@ -191,6 +191,7 @@ export const SendParamsSchema = Type.Object(
|
||||
to: NonEmptyString,
|
||||
message: NonEmptyString,
|
||||
mediaUrl: Type.Optional(Type.String()),
|
||||
gifPlayback: Type.Optional(Type.Boolean()),
|
||||
provider: Type.Optional(Type.String()),
|
||||
idempotencyKey: NonEmptyString,
|
||||
},
|
||||
|
||||
@@ -3,6 +3,12 @@ import fs from "node:fs";
|
||||
|
||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js";
|
||||
import type { ModelCatalogEntry } from "../agents/model-catalog.js";
|
||||
import {
|
||||
abortEmbeddedPiRun,
|
||||
isEmbeddedPiRunActive,
|
||||
resolveEmbeddedSessionLane,
|
||||
waitForEmbeddedPiRunEnd,
|
||||
} from "../agents/pi-embedded.js";
|
||||
import {
|
||||
buildAllowedModelSet,
|
||||
buildModelAliasIndex,
|
||||
@@ -35,6 +41,7 @@ import {
|
||||
import { buildConfigSchema } from "../config/schema.js";
|
||||
import {
|
||||
loadSessionStore,
|
||||
resolveMainSessionKey,
|
||||
resolveStorePath,
|
||||
type SessionEntry,
|
||||
saveSessionStore,
|
||||
@@ -75,6 +82,7 @@ import {
|
||||
} from "../infra/voicewake.js";
|
||||
import { webAuthExists } from "../providers/web/index.js";
|
||||
import { defaultRuntime } from "../runtime.js";
|
||||
import { clearCommandLane } from "../process/command-queue.js";
|
||||
import {
|
||||
normalizeSendPolicy,
|
||||
resolveSendPolicy,
|
||||
@@ -1823,12 +1831,41 @@ export async function handleGatewayRequest(
|
||||
break;
|
||||
}
|
||||
|
||||
const mainKey = resolveMainSessionKey(loadConfig());
|
||||
if (key === mainKey) {
|
||||
respond(
|
||||
false,
|
||||
undefined,
|
||||
errorShape(
|
||||
ErrorCodes.INVALID_REQUEST,
|
||||
`Cannot delete the main session (${mainKey}).`,
|
||||
),
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
const deleteTranscript =
|
||||
typeof p.deleteTranscript === "boolean" ? p.deleteTranscript : true;
|
||||
|
||||
const { storePath, store, entry } = loadSessionEntry(key);
|
||||
const sessionId = entry?.sessionId;
|
||||
const existed = Boolean(store[key]);
|
||||
clearCommandLane(resolveEmbeddedSessionLane(key));
|
||||
if (sessionId && isEmbeddedPiRunActive(sessionId)) {
|
||||
abortEmbeddedPiRun(sessionId);
|
||||
const ended = await waitForEmbeddedPiRunEnd(sessionId, 15_000);
|
||||
if (!ended) {
|
||||
respond(
|
||||
false,
|
||||
undefined,
|
||||
errorShape(
|
||||
ErrorCodes.UNAVAILABLE,
|
||||
`Session ${key} is still active; try again in a moment.`,
|
||||
),
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (existed) delete store[key];
|
||||
await saveSessionStore(storePath, store);
|
||||
|
||||
@@ -2602,6 +2639,7 @@ export async function handleGatewayRequest(
|
||||
to: string;
|
||||
message: string;
|
||||
mediaUrl?: string;
|
||||
gifPlayback?: boolean;
|
||||
provider?: string;
|
||||
idempotencyKey: string;
|
||||
};
|
||||
@@ -2702,6 +2740,7 @@ export async function handleGatewayRequest(
|
||||
const result = await sendMessageWhatsApp(to, message, {
|
||||
mediaUrl: params.mediaUrl,
|
||||
verbose: shouldLogVerbose(),
|
||||
gifPlayback: params.gifPlayback,
|
||||
});
|
||||
const payload = {
|
||||
runId: idem,
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
export type ActiveWebSendOptions = {
|
||||
gifPlayback?: boolean;
|
||||
};
|
||||
|
||||
export type ActiveWebListener = {
|
||||
sendMessage: (
|
||||
to: string,
|
||||
text: string,
|
||||
mediaBuffer?: Buffer,
|
||||
mediaType?: string,
|
||||
options?: ActiveWebSendOptions,
|
||||
) => Promise<{ messageId: string }>;
|
||||
sendComposingTo: (to: string) => Promise<void>;
|
||||
close?: () => Promise<void>;
|
||||
|
||||
@@ -22,6 +22,7 @@ import {
|
||||
normalizeE164,
|
||||
toWhatsappJid,
|
||||
} from "../utils.js";
|
||||
import type { ActiveWebSendOptions } from "./active-listener.js";
|
||||
import {
|
||||
createWaSocket,
|
||||
getStatusCode,
|
||||
@@ -380,6 +381,7 @@ export async function monitorWebInbox(options: {
|
||||
text: string,
|
||||
mediaBuffer?: Buffer,
|
||||
mediaType?: string,
|
||||
options?: ActiveWebSendOptions,
|
||||
): Promise<{ messageId: string }> => {
|
||||
const jid = toWhatsappJid(to);
|
||||
let payload: AnyMessageContent;
|
||||
@@ -397,10 +399,12 @@ export async function monitorWebInbox(options: {
|
||||
mimetype: mediaType,
|
||||
};
|
||||
} else if (mediaType.startsWith("video/")) {
|
||||
const gifPlayback = options?.gifPlayback;
|
||||
payload = {
|
||||
video: mediaBuffer,
|
||||
caption: text || undefined,
|
||||
mimetype: mediaType,
|
||||
...(gifPlayback ? { gifPlayback: true } : {}),
|
||||
};
|
||||
} else {
|
||||
payload = {
|
||||
|
||||
@@ -282,6 +282,26 @@ describe("web monitor inbox", () => {
|
||||
await listener.close();
|
||||
});
|
||||
|
||||
it("sets gifPlayback on outbound video payloads when requested", async () => {
|
||||
const onMessage = vi.fn();
|
||||
const listener = await monitorWebInbox({ verbose: false, onMessage });
|
||||
const sock = await createWaSocket();
|
||||
const buf = Buffer.from("gifvid");
|
||||
|
||||
await listener.sendMessage("+1555", "gif", buf, "video/mp4", {
|
||||
gifPlayback: true,
|
||||
});
|
||||
|
||||
expect(sock.sendMessage).toHaveBeenCalledWith("1555@s.whatsapp.net", {
|
||||
video: buf,
|
||||
caption: "gif",
|
||||
mimetype: "video/mp4",
|
||||
gifPlayback: true,
|
||||
});
|
||||
|
||||
await listener.close();
|
||||
});
|
||||
|
||||
it("resolves onClose when the socket closes", async () => {
|
||||
const listener = await monitorWebInbox({
|
||||
verbose: false,
|
||||
|
||||
@@ -78,6 +78,27 @@ describe("web outbound", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("marks gif playback for video when requested", async () => {
|
||||
const buf = Buffer.from("gifvid");
|
||||
loadWebMediaMock.mockResolvedValueOnce({
|
||||
buffer: buf,
|
||||
contentType: "video/mp4",
|
||||
kind: "video",
|
||||
});
|
||||
await sendMessageWhatsApp("+1555", "gif", {
|
||||
verbose: false,
|
||||
mediaUrl: "/tmp/anim.mp4",
|
||||
gifPlayback: true,
|
||||
});
|
||||
expect(sendMessage).toHaveBeenLastCalledWith(
|
||||
"+1555",
|
||||
"gif",
|
||||
buf,
|
||||
"video/mp4",
|
||||
{ gifPlayback: true },
|
||||
);
|
||||
});
|
||||
|
||||
it("maps image with caption", async () => {
|
||||
const buf = Buffer.from("img");
|
||||
loadWebMediaMock.mockResolvedValueOnce({
|
||||
|
||||
@@ -2,7 +2,10 @@ import { randomUUID } from "node:crypto";
|
||||
|
||||
import { createSubsystemLogger, getChildLogger } from "../logging.js";
|
||||
import { toWhatsappJid } from "../utils.js";
|
||||
import { getActiveWebListener } from "./active-listener.js";
|
||||
import {
|
||||
type ActiveWebSendOptions,
|
||||
getActiveWebListener,
|
||||
} from "./active-listener.js";
|
||||
import { loadWebMedia } from "./media.js";
|
||||
|
||||
const outboundLog = createSubsystemLogger("gateway/providers/whatsapp").child(
|
||||
@@ -12,7 +15,7 @@ const outboundLog = createSubsystemLogger("gateway/providers/whatsapp").child(
|
||||
export async function sendMessageWhatsApp(
|
||||
to: string,
|
||||
body: string,
|
||||
options: { verbose: boolean; mediaUrl?: string },
|
||||
options: { verbose: boolean; mediaUrl?: string; gifPlayback?: boolean },
|
||||
): Promise<{ messageId: string; toJid: string }> {
|
||||
let text = body;
|
||||
const correlationId = randomUUID();
|
||||
@@ -60,7 +63,18 @@ export async function sendMessageWhatsApp(
|
||||
);
|
||||
if (!active) throw new Error("Active web listener missing");
|
||||
await active.sendComposingTo(to);
|
||||
const result = await active.sendMessage(to, text, mediaBuffer, mediaType);
|
||||
const sendOptions: ActiveWebSendOptions | undefined = options.gifPlayback
|
||||
? { gifPlayback: true }
|
||||
: undefined;
|
||||
const result = sendOptions
|
||||
? await active.sendMessage(
|
||||
to,
|
||||
text,
|
||||
mediaBuffer,
|
||||
mediaType,
|
||||
sendOptions,
|
||||
)
|
||||
: await active.sendMessage(to, text, mediaBuffer, mediaType);
|
||||
const messageId =
|
||||
(result as { messageId?: string })?.messageId ?? "unknown";
|
||||
const durationMs = Date.now() - startedAt;
|
||||
|
||||
Reference in New Issue
Block a user