refactor: consolidate reply/media helpers
This commit is contained in:
@@ -22,6 +22,7 @@ import {
|
|||||||
emitAgentEvent,
|
emitAgentEvent,
|
||||||
registerAgentRunContext,
|
registerAgentRunContext,
|
||||||
} from "../../infra/agent-events.js";
|
} from "../../infra/agent-events.js";
|
||||||
|
import { isAudioFileName } from "../../media/mime.js";
|
||||||
import { defaultRuntime } from "../../runtime.js";
|
import { defaultRuntime } from "../../runtime.js";
|
||||||
import {
|
import {
|
||||||
estimateUsageCost,
|
estimateUsageCost,
|
||||||
@@ -34,8 +35,11 @@ import type { OriginatingChannelType, TemplateContext } from "../templating.js";
|
|||||||
import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
|
import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
|
||||||
import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js";
|
import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js";
|
||||||
import type { GetReplyOptions, ReplyPayload } from "../types.js";
|
import type { GetReplyOptions, ReplyPayload } from "../types.js";
|
||||||
import { extractAudioTag } from "./audio-tags.js";
|
import { parseAudioTag } from "./audio-tags.js";
|
||||||
import { createBlockReplyPipeline } from "./block-reply-pipeline.js";
|
import {
|
||||||
|
createAudioAsVoiceBuffer,
|
||||||
|
createBlockReplyPipeline,
|
||||||
|
} from "./block-reply-pipeline.js";
|
||||||
import { resolveBlockStreamingCoalescing } from "./block-streaming.js";
|
import { resolveBlockStreamingCoalescing } from "./block-streaming.js";
|
||||||
import { createFollowupRunner } from "./followup-runner.js";
|
import { createFollowupRunner } from "./followup-runner.js";
|
||||||
import {
|
import {
|
||||||
@@ -261,13 +265,12 @@ export async function runReplyAgent(params: {
|
|||||||
const blockReplyTimeoutMs =
|
const blockReplyTimeoutMs =
|
||||||
opts?.blockReplyTimeoutMs ?? BLOCK_REPLY_SEND_TIMEOUT_MS;
|
opts?.blockReplyTimeoutMs ?? BLOCK_REPLY_SEND_TIMEOUT_MS;
|
||||||
|
|
||||||
// Buffer audio blocks to apply [[audio_as_voice]] tag that may come later
|
|
||||||
const bufferedAudioBlocks: ReplyPayload[] = [];
|
|
||||||
let seenAudioAsVoice = false;
|
|
||||||
|
|
||||||
const AUDIO_EXTENSIONS = /\.(opus|mp3|m4a|wav|ogg|aac|flac)$/i;
|
|
||||||
const hasAudioMedia = (urls?: string[]): boolean =>
|
const hasAudioMedia = (urls?: string[]): boolean =>
|
||||||
Boolean(urls?.some((u) => AUDIO_EXTENSIONS.test(u)));
|
Boolean(urls?.some((u) => isAudioFileName(u)));
|
||||||
|
const isAudioPayload = (payload: ReplyPayload) =>
|
||||||
|
hasAudioMedia(
|
||||||
|
payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : undefined),
|
||||||
|
);
|
||||||
const replyToChannel =
|
const replyToChannel =
|
||||||
sessionCtx.OriginatingChannel ??
|
sessionCtx.OriginatingChannel ??
|
||||||
((sessionCtx.Surface ?? sessionCtx.Provider)?.toLowerCase() as
|
((sessionCtx.Surface ?? sessionCtx.Provider)?.toLowerCase() as
|
||||||
@@ -297,6 +300,7 @@ export async function runReplyAgent(params: {
|
|||||||
onBlockReply: opts.onBlockReply,
|
onBlockReply: opts.onBlockReply,
|
||||||
timeoutMs: blockReplyTimeoutMs,
|
timeoutMs: blockReplyTimeoutMs,
|
||||||
coalescing: blockReplyCoalescing,
|
coalescing: blockReplyCoalescing,
|
||||||
|
buffer: createAudioAsVoiceBuffer({ isAudioPayload }),
|
||||||
})
|
})
|
||||||
: null;
|
: null;
|
||||||
|
|
||||||
@@ -546,8 +550,8 @@ export async function runReplyAgent(params: {
|
|||||||
!payload.audioAsVoice
|
!payload.audioAsVoice
|
||||||
)
|
)
|
||||||
return;
|
return;
|
||||||
const audioTagResult = extractAudioTag(taggedPayload.text);
|
const audioTagResult = parseAudioTag(taggedPayload.text);
|
||||||
const cleaned = audioTagResult.cleaned || undefined;
|
const cleaned = audioTagResult.text || undefined;
|
||||||
const hasMedia =
|
const hasMedia =
|
||||||
Boolean(taggedPayload.mediaUrl) ||
|
Boolean(taggedPayload.mediaUrl) ||
|
||||||
(taggedPayload.mediaUrls?.length ?? 0) > 0;
|
(taggedPayload.mediaUrls?.length ?? 0) > 0;
|
||||||
@@ -559,11 +563,6 @@ export async function runReplyAgent(params: {
|
|||||||
)
|
)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Track if we've seen [[audio_as_voice]] from payload or text extraction
|
|
||||||
if (payload.audioAsVoice || audioTagResult.audioAsVoice) {
|
|
||||||
seenAudioAsVoice = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const blockPayload: ReplyPayload = applyReplyToMode({
|
const blockPayload: ReplyPayload = applyReplyToMode({
|
||||||
...taggedPayload,
|
...taggedPayload,
|
||||||
text: cleaned,
|
text: cleaned,
|
||||||
@@ -579,13 +578,6 @@ export async function runReplyAgent(params: {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Buffer audio blocks to apply [[audio_as_voice]] that may come later
|
|
||||||
const isAudioBlock = hasAudioMedia(taggedPayload.mediaUrls);
|
|
||||||
if (isAudioBlock) {
|
|
||||||
bufferedAudioBlocks.push(blockPayload);
|
|
||||||
return; // Don't send immediately - wait for potential [[audio_as_voice]] tag
|
|
||||||
}
|
|
||||||
|
|
||||||
blockReplyPipeline?.enqueue(blockPayload);
|
blockReplyPipeline?.enqueue(blockPayload);
|
||||||
}
|
}
|
||||||
: undefined,
|
: undefined,
|
||||||
@@ -701,16 +693,6 @@ export async function runReplyAgent(params: {
|
|||||||
|
|
||||||
const payloadArray = runResult.payloads ?? [];
|
const payloadArray = runResult.payloads ?? [];
|
||||||
|
|
||||||
if (bufferedAudioBlocks.length > 0 && blockReplyPipeline) {
|
|
||||||
for (const audioPayload of bufferedAudioBlocks) {
|
|
||||||
const finalPayload = seenAudioAsVoice
|
|
||||||
? { ...audioPayload, audioAsVoice: true }
|
|
||||||
: audioPayload;
|
|
||||||
blockReplyPipeline.enqueue(finalPayload);
|
|
||||||
}
|
|
||||||
bufferedAudioBlocks.length = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (blockReplyPipeline) {
|
if (blockReplyPipeline) {
|
||||||
await blockReplyPipeline.flush({ force: true });
|
await blockReplyPipeline.flush({ force: true });
|
||||||
blockReplyPipeline.stop();
|
blockReplyPipeline.stop();
|
||||||
@@ -753,10 +735,10 @@ export async function runReplyAgent(params: {
|
|||||||
currentMessageId: sessionCtx.MessageSid,
|
currentMessageId: sessionCtx.MessageSid,
|
||||||
})
|
})
|
||||||
.map((payload) => {
|
.map((payload) => {
|
||||||
const audioTagResult = extractAudioTag(payload.text);
|
const audioTagResult = parseAudioTag(payload.text);
|
||||||
return {
|
return {
|
||||||
...payload,
|
...payload,
|
||||||
text: audioTagResult.cleaned ? audioTagResult.cleaned : undefined,
|
text: audioTagResult.text ? audioTagResult.text : undefined,
|
||||||
audioAsVoice: audioTagResult.audioAsVoice,
|
audioAsVoice: audioTagResult.audioAsVoice,
|
||||||
};
|
};
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,25 +1,25 @@
|
|||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
import { extractAudioTag } from "./audio-tags.js";
|
import { parseAudioTag } from "./audio-tags.js";
|
||||||
|
|
||||||
describe("extractAudioTag", () => {
|
describe("parseAudioTag", () => {
|
||||||
it("detects audio_as_voice and strips the tag", () => {
|
it("detects audio_as_voice and strips the tag", () => {
|
||||||
const result = extractAudioTag("Hello [[audio_as_voice]] world");
|
const result = parseAudioTag("Hello [[audio_as_voice]] world");
|
||||||
expect(result.audioAsVoice).toBe(true);
|
expect(result.audioAsVoice).toBe(true);
|
||||||
expect(result.hasTag).toBe(true);
|
expect(result.hadTag).toBe(true);
|
||||||
expect(result.cleaned).toBe("Hello world");
|
expect(result.text).toBe("Hello world");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns empty output for missing text", () => {
|
it("returns empty output for missing text", () => {
|
||||||
const result = extractAudioTag(undefined);
|
const result = parseAudioTag(undefined);
|
||||||
expect(result.audioAsVoice).toBe(false);
|
expect(result.audioAsVoice).toBe(false);
|
||||||
expect(result.hasTag).toBe(false);
|
expect(result.hadTag).toBe(false);
|
||||||
expect(result.cleaned).toBe("");
|
expect(result.text).toBe("");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("removes tag-only messages", () => {
|
it("removes tag-only messages", () => {
|
||||||
const result = extractAudioTag("[[audio_as_voice]]");
|
const result = parseAudioTag("[[audio_as_voice]]");
|
||||||
expect(result.audioAsVoice).toBe(true);
|
expect(result.audioAsVoice).toBe(true);
|
||||||
expect(result.cleaned).toBe("");
|
expect(result.text).toBe("");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,31 +1 @@
|
|||||||
/**
|
export { parseAudioTag } from "../../media/audio-tags.js";
|
||||||
* Extract audio mode tag from text.
|
|
||||||
* Supports [[audio_as_voice]] to send audio as voice bubble instead of file.
|
|
||||||
* Default is file (preserves backward compatibility).
|
|
||||||
*/
|
|
||||||
export function extractAudioTag(text?: string): {
|
|
||||||
cleaned: string;
|
|
||||||
audioAsVoice: boolean;
|
|
||||||
hasTag: boolean;
|
|
||||||
} {
|
|
||||||
if (!text) return { cleaned: "", audioAsVoice: false, hasTag: false };
|
|
||||||
let cleaned = text;
|
|
||||||
let audioAsVoice = false; // default: audio file (backward compatible)
|
|
||||||
let hasTag = false;
|
|
||||||
|
|
||||||
// [[audio_as_voice]] -> send as voice bubble (opt-in)
|
|
||||||
const voiceMatch = cleaned.match(/\[\[audio_as_voice\]\]/i);
|
|
||||||
if (voiceMatch) {
|
|
||||||
cleaned = cleaned.replace(/\[\[audio_as_voice\]\]/gi, " ");
|
|
||||||
audioAsVoice = true;
|
|
||||||
hasTag = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up whitespace
|
|
||||||
cleaned = cleaned
|
|
||||||
.replace(/[ \t]+/g, " ")
|
|
||||||
.replace(/[ \t]*\n[ \t]*/g, "\n")
|
|
||||||
.trim();
|
|
||||||
|
|
||||||
return { cleaned, audioAsVoice, hasTag };
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -13,6 +13,28 @@ export type BlockReplyPipeline = {
|
|||||||
hasSentPayload: (payload: ReplyPayload) => boolean;
|
hasSentPayload: (payload: ReplyPayload) => boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type BlockReplyBuffer = {
|
||||||
|
shouldBuffer: (payload: ReplyPayload) => boolean;
|
||||||
|
onEnqueue?: (payload: ReplyPayload) => void;
|
||||||
|
finalize?: (payload: ReplyPayload) => ReplyPayload;
|
||||||
|
};
|
||||||
|
|
||||||
|
export function createAudioAsVoiceBuffer(params: {
|
||||||
|
isAudioPayload: (payload: ReplyPayload) => boolean;
|
||||||
|
}): BlockReplyBuffer {
|
||||||
|
let seenAudioAsVoice = false;
|
||||||
|
return {
|
||||||
|
onEnqueue: (payload) => {
|
||||||
|
if (payload.audioAsVoice) {
|
||||||
|
seenAudioAsVoice = true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
shouldBuffer: (payload) => params.isAudioPayload(payload),
|
||||||
|
finalize: (payload) =>
|
||||||
|
seenAudioAsVoice ? { ...payload, audioAsVoice: true } : payload,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export function createBlockReplyPayloadKey(payload: ReplyPayload): string {
|
export function createBlockReplyPayloadKey(payload: ReplyPayload): string {
|
||||||
const text = payload.text?.trim() ?? "";
|
const text = payload.text?.trim() ?? "";
|
||||||
const mediaList = payload.mediaUrls?.length
|
const mediaList = payload.mediaUrls?.length
|
||||||
@@ -51,12 +73,15 @@ export function createBlockReplyPipeline(params: {
|
|||||||
) => Promise<void> | void;
|
) => Promise<void> | void;
|
||||||
timeoutMs: number;
|
timeoutMs: number;
|
||||||
coalescing?: BlockStreamingCoalescing;
|
coalescing?: BlockStreamingCoalescing;
|
||||||
|
buffer?: BlockReplyBuffer;
|
||||||
}): BlockReplyPipeline {
|
}): BlockReplyPipeline {
|
||||||
const { onBlockReply, timeoutMs, coalescing } = params;
|
const { onBlockReply, timeoutMs, coalescing, buffer } = params;
|
||||||
const sentKeys = new Set<string>();
|
const sentKeys = new Set<string>();
|
||||||
const pendingKeys = new Set<string>();
|
const pendingKeys = new Set<string>();
|
||||||
const seenKeys = new Set<string>();
|
const seenKeys = new Set<string>();
|
||||||
const bufferedKeys = new Set<string>();
|
const bufferedKeys = new Set<string>();
|
||||||
|
const bufferedPayloadKeys = new Set<string>();
|
||||||
|
const bufferedPayloads: ReplyPayload[] = [];
|
||||||
let sendChain: Promise<void> = Promise.resolve();
|
let sendChain: Promise<void> = Promise.resolve();
|
||||||
let aborted = false;
|
let aborted = false;
|
||||||
let didStream = false;
|
let didStream = false;
|
||||||
@@ -124,8 +149,37 @@ export function createBlockReplyPipeline(params: {
|
|||||||
})
|
})
|
||||||
: null;
|
: null;
|
||||||
|
|
||||||
|
const bufferPayload = (payload: ReplyPayload) => {
|
||||||
|
buffer?.onEnqueue?.(payload);
|
||||||
|
if (!buffer?.shouldBuffer(payload)) return false;
|
||||||
|
const payloadKey = createBlockReplyPayloadKey(payload);
|
||||||
|
if (
|
||||||
|
seenKeys.has(payloadKey) ||
|
||||||
|
sentKeys.has(payloadKey) ||
|
||||||
|
pendingKeys.has(payloadKey) ||
|
||||||
|
bufferedPayloadKeys.has(payloadKey)
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
seenKeys.add(payloadKey);
|
||||||
|
bufferedPayloadKeys.add(payloadKey);
|
||||||
|
bufferedPayloads.push(payload);
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
const flushBuffered = () => {
|
||||||
|
if (!bufferedPayloads.length) return;
|
||||||
|
for (const payload of bufferedPayloads) {
|
||||||
|
const finalPayload = buffer?.finalize?.(payload) ?? payload;
|
||||||
|
sendPayload(finalPayload, true);
|
||||||
|
}
|
||||||
|
bufferedPayloads.length = 0;
|
||||||
|
bufferedPayloadKeys.clear();
|
||||||
|
};
|
||||||
|
|
||||||
const enqueue = (payload: ReplyPayload) => {
|
const enqueue = (payload: ReplyPayload) => {
|
||||||
if (aborted) return;
|
if (aborted) return;
|
||||||
|
if (bufferPayload(payload)) return;
|
||||||
const hasMedia =
|
const hasMedia =
|
||||||
Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0;
|
Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0;
|
||||||
if (hasMedia) {
|
if (hasMedia) {
|
||||||
@@ -151,6 +205,7 @@ export function createBlockReplyPipeline(params: {
|
|||||||
|
|
||||||
const flush = async (options?: { force?: boolean }) => {
|
const flush = async (options?: { force?: boolean }) => {
|
||||||
await coalescer?.flush(options);
|
await coalescer?.flush(options);
|
||||||
|
flushBuffered();
|
||||||
await sendChain;
|
await sendChain;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -162,7 +217,8 @@ export function createBlockReplyPipeline(params: {
|
|||||||
enqueue,
|
enqueue,
|
||||||
flush,
|
flush,
|
||||||
stop,
|
stop,
|
||||||
hasBuffered: () => Boolean(coalescer?.hasBuffered()),
|
hasBuffered: () =>
|
||||||
|
Boolean(coalescer?.hasBuffered() || bufferedPayloads.length > 0),
|
||||||
didStream: () => didStream,
|
didStream: () => didStream,
|
||||||
isAborted: () => aborted,
|
isAborted: () => aborted,
|
||||||
hasSentPayload: (payload) => {
|
hasSentPayload: (payload) => {
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import { tryFastAbortFromMessage } from "./abort.js";
|
|||||||
import type { ReplyDispatcher, ReplyDispatchKind } from "./reply-dispatcher.js";
|
import type { ReplyDispatcher, ReplyDispatchKind } from "./reply-dispatcher.js";
|
||||||
import { isRoutableChannel, routeReply } from "./route-reply.js";
|
import { isRoutableChannel, routeReply } from "./route-reply.js";
|
||||||
|
|
||||||
type DispatchFromConfigResult = {
|
export type DispatchFromConfigResult = {
|
||||||
queuedFinal: boolean;
|
queuedFinal: boolean;
|
||||||
counts: Record<ReplyDispatchKind, number>;
|
counts: Record<ReplyDispatchKind, number>;
|
||||||
};
|
};
|
||||||
|
|||||||
34
src/auto-reply/reply/provider-dispatcher.ts
Normal file
34
src/auto-reply/reply/provider-dispatcher.ts
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import type { ClawdbotConfig } from "../../config/config.js";
|
||||||
|
import type { MsgContext } from "../templating.js";
|
||||||
|
import type { GetReplyOptions } from "../types.js";
|
||||||
|
import type { DispatchFromConfigResult } from "./dispatch-from-config.js";
|
||||||
|
import { dispatchReplyFromConfig } from "./dispatch-from-config.js";
|
||||||
|
import {
|
||||||
|
createReplyDispatcherWithTyping,
|
||||||
|
type ReplyDispatcherWithTypingOptions,
|
||||||
|
} from "./reply-dispatcher.js";
|
||||||
|
|
||||||
|
export async function dispatchReplyWithBufferedBlockDispatcher(params: {
|
||||||
|
ctx: MsgContext;
|
||||||
|
cfg: ClawdbotConfig;
|
||||||
|
dispatcherOptions: ReplyDispatcherWithTypingOptions;
|
||||||
|
replyOptions?: Omit<GetReplyOptions, "onToolResult" | "onBlockReply">;
|
||||||
|
replyResolver?: typeof import("../reply.js").getReplyFromConfig;
|
||||||
|
}): Promise<DispatchFromConfigResult> {
|
||||||
|
const { dispatcher, replyOptions, markDispatchIdle } =
|
||||||
|
createReplyDispatcherWithTyping(params.dispatcherOptions);
|
||||||
|
|
||||||
|
const result = await dispatchReplyFromConfig({
|
||||||
|
ctx: params.ctx,
|
||||||
|
cfg: params.cfg,
|
||||||
|
dispatcher,
|
||||||
|
replyResolver: params.replyResolver,
|
||||||
|
replyOptions: {
|
||||||
|
...params.replyOptions,
|
||||||
|
...replyOptions,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
markDispatchIdle();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
@@ -22,7 +22,7 @@ export type ReplyDispatcherOptions = {
|
|||||||
onError?: ReplyDispatchErrorHandler;
|
onError?: ReplyDispatchErrorHandler;
|
||||||
};
|
};
|
||||||
|
|
||||||
type ReplyDispatcherWithTypingOptions = Omit<
|
export type ReplyDispatcherWithTypingOptions = Omit<
|
||||||
ReplyDispatcherOptions,
|
ReplyDispatcherOptions,
|
||||||
"onIdle"
|
"onIdle"
|
||||||
> & {
|
> & {
|
||||||
|
|||||||
31
src/media/audio-tags.ts
Normal file
31
src/media/audio-tags.ts
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
/**
|
||||||
|
* Extract audio mode tag from text.
|
||||||
|
* Supports [[audio_as_voice]] to send audio as voice bubble instead of file.
|
||||||
|
* Default is file (preserves backward compatibility).
|
||||||
|
*/
|
||||||
|
export function parseAudioTag(text?: string): {
|
||||||
|
text: string;
|
||||||
|
audioAsVoice: boolean;
|
||||||
|
hadTag: boolean;
|
||||||
|
} {
|
||||||
|
if (!text) return { text: "", audioAsVoice: false, hadTag: false };
|
||||||
|
let cleaned = text;
|
||||||
|
let audioAsVoice = false; // default: audio file (backward compatible)
|
||||||
|
let hadTag = false;
|
||||||
|
|
||||||
|
// [[audio_as_voice]] -> send as voice bubble (opt-in)
|
||||||
|
const voiceMatch = cleaned.match(/\[\[audio_as_voice\]\]/i);
|
||||||
|
if (voiceMatch) {
|
||||||
|
cleaned = cleaned.replace(/\[\[audio_as_voice\]\]/gi, " ");
|
||||||
|
audioAsVoice = true;
|
||||||
|
hadTag = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up whitespace
|
||||||
|
cleaned = cleaned
|
||||||
|
.replace(/[ \t]+/g, " ")
|
||||||
|
.replace(/[ \t]*\n[ \t]*/g, "\n")
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
return { text: cleaned, audioAsVoice, hadTag };
|
||||||
|
}
|
||||||
125
src/media/fetch.ts
Normal file
125
src/media/fetch.ts
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
import path from "node:path";
|
||||||
|
|
||||||
|
import { detectMime, extensionForMime } from "./mime.js";
|
||||||
|
|
||||||
|
type FetchMediaResult = {
|
||||||
|
buffer: Buffer;
|
||||||
|
contentType?: string;
|
||||||
|
fileName?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type FetchMediaOptions = {
|
||||||
|
url: string;
|
||||||
|
fetchImpl?: typeof fetch;
|
||||||
|
filePathHint?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
function stripQuotes(value: string): string {
|
||||||
|
return value.replace(/^["']|["']$/g, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseContentDispositionFileName(
|
||||||
|
header?: string | null,
|
||||||
|
): string | undefined {
|
||||||
|
if (!header) return undefined;
|
||||||
|
const starMatch = /filename\*\s*=\s*([^;]+)/i.exec(header);
|
||||||
|
if (starMatch?.[1]) {
|
||||||
|
const cleaned = stripQuotes(starMatch[1].trim());
|
||||||
|
const encoded = cleaned.split("''").slice(1).join("''") || cleaned;
|
||||||
|
try {
|
||||||
|
return path.basename(decodeURIComponent(encoded));
|
||||||
|
} catch {
|
||||||
|
return path.basename(encoded);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const match = /filename\s*=\s*([^;]+)/i.exec(header);
|
||||||
|
if (match?.[1]) return path.basename(stripQuotes(match[1].trim()));
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function readErrorBodySnippet(
|
||||||
|
res: Response,
|
||||||
|
maxChars = 200,
|
||||||
|
): Promise<string | undefined> {
|
||||||
|
try {
|
||||||
|
const text = await res.text();
|
||||||
|
if (!text) return undefined;
|
||||||
|
const collapsed = text.replace(/\s+/g, " ").trim();
|
||||||
|
if (!collapsed) return undefined;
|
||||||
|
if (collapsed.length <= maxChars) return collapsed;
|
||||||
|
return `${collapsed.slice(0, maxChars)}…`;
|
||||||
|
} catch {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function fetchRemoteMedia(
|
||||||
|
options: FetchMediaOptions,
|
||||||
|
): Promise<FetchMediaResult> {
|
||||||
|
const { url, fetchImpl, filePathHint } = options;
|
||||||
|
const fetcher = fetchImpl ?? globalThis.fetch;
|
||||||
|
if (!fetcher) {
|
||||||
|
throw new Error("fetch is not available");
|
||||||
|
}
|
||||||
|
|
||||||
|
let res: Response;
|
||||||
|
try {
|
||||||
|
res = await fetcher(url);
|
||||||
|
} catch (err) {
|
||||||
|
throw new Error(`Failed to fetch media from ${url}: ${String(err)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const statusText = res.statusText ? ` ${res.statusText}` : "";
|
||||||
|
const redirected =
|
||||||
|
res.url && res.url !== url ? ` (redirected to ${res.url})` : "";
|
||||||
|
let detail = `HTTP ${res.status}${statusText}`;
|
||||||
|
if (!res.body) {
|
||||||
|
detail = `HTTP ${res.status}${statusText}; empty response body`;
|
||||||
|
} else {
|
||||||
|
const snippet = await readErrorBodySnippet(res);
|
||||||
|
if (snippet) detail += `; body: ${snippet}`;
|
||||||
|
}
|
||||||
|
throw new Error(
|
||||||
|
`Failed to fetch media from ${url}${redirected}: ${detail}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const buffer = Buffer.from(await res.arrayBuffer());
|
||||||
|
let fileNameFromUrl: string | undefined;
|
||||||
|
try {
|
||||||
|
const parsed = new URL(url);
|
||||||
|
const base = path.basename(parsed.pathname);
|
||||||
|
fileNameFromUrl = base || undefined;
|
||||||
|
} catch {
|
||||||
|
// ignore parse errors; leave undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const headerFileName = parseContentDispositionFileName(
|
||||||
|
res.headers.get("content-disposition"),
|
||||||
|
);
|
||||||
|
let fileName =
|
||||||
|
headerFileName ||
|
||||||
|
fileNameFromUrl ||
|
||||||
|
(filePathHint ? path.basename(filePathHint) : undefined);
|
||||||
|
|
||||||
|
const filePathForMime =
|
||||||
|
headerFileName && path.extname(headerFileName)
|
||||||
|
? headerFileName
|
||||||
|
: (filePathHint ?? url);
|
||||||
|
const contentType = await detectMime({
|
||||||
|
buffer,
|
||||||
|
headerMime: res.headers.get("content-type"),
|
||||||
|
filePath: filePathForMime,
|
||||||
|
});
|
||||||
|
if (fileName && !path.extname(fileName) && contentType) {
|
||||||
|
const ext = extensionForMime(contentType);
|
||||||
|
if (ext) fileName = `${fileName}${ext}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
buffer,
|
||||||
|
contentType: contentType ?? undefined,
|
||||||
|
fileName,
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -36,6 +36,17 @@ const MIME_BY_EXT: Record<string, string> = Object.fromEntries(
|
|||||||
Object.entries(EXT_BY_MIME).map(([mime, ext]) => [ext, mime]),
|
Object.entries(EXT_BY_MIME).map(([mime, ext]) => [ext, mime]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const AUDIO_FILE_EXTENSIONS = new Set([
|
||||||
|
".aac",
|
||||||
|
".flac",
|
||||||
|
".m4a",
|
||||||
|
".mp3",
|
||||||
|
".oga",
|
||||||
|
".ogg",
|
||||||
|
".opus",
|
||||||
|
".wav",
|
||||||
|
]);
|
||||||
|
|
||||||
function normalizeHeaderMime(mime?: string | null): string | undefined {
|
function normalizeHeaderMime(mime?: string | null): string | undefined {
|
||||||
if (!mime) return undefined;
|
if (!mime) return undefined;
|
||||||
const cleaned = mime.split(";")[0]?.trim().toLowerCase();
|
const cleaned = mime.split(";")[0]?.trim().toLowerCase();
|
||||||
@@ -52,7 +63,7 @@ async function sniffMime(buffer?: Buffer): Promise<string | undefined> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function extFromPath(filePath?: string): string | undefined {
|
export function getFileExtension(filePath?: string | null): string | undefined {
|
||||||
if (!filePath) return undefined;
|
if (!filePath) return undefined;
|
||||||
try {
|
try {
|
||||||
if (/^https?:\/\//i.test(filePath)) {
|
if (/^https?:\/\//i.test(filePath)) {
|
||||||
@@ -66,6 +77,12 @@ function extFromPath(filePath?: string): string | undefined {
|
|||||||
return ext || undefined;
|
return ext || undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function isAudioFileName(fileName?: string | null): boolean {
|
||||||
|
const ext = getFileExtension(fileName);
|
||||||
|
if (!ext) return false;
|
||||||
|
return AUDIO_FILE_EXTENSIONS.has(ext);
|
||||||
|
}
|
||||||
|
|
||||||
export function detectMime(opts: {
|
export function detectMime(opts: {
|
||||||
buffer?: Buffer;
|
buffer?: Buffer;
|
||||||
headerMime?: string | null;
|
headerMime?: string | null;
|
||||||
@@ -85,7 +102,7 @@ async function detectMimeImpl(opts: {
|
|||||||
headerMime?: string | null;
|
headerMime?: string | null;
|
||||||
filePath?: string;
|
filePath?: string;
|
||||||
}): Promise<string | undefined> {
|
}): Promise<string | undefined> {
|
||||||
const ext = extFromPath(opts.filePath);
|
const ext = getFileExtension(opts.filePath);
|
||||||
const extMime = ext ? MIME_BY_EXT[ext] : undefined;
|
const extMime = ext ? MIME_BY_EXT[ext] : undefined;
|
||||||
|
|
||||||
const headerMime = normalizeHeaderMime(opts.headerMime);
|
const headerMime = normalizeHeaderMime(opts.headerMime);
|
||||||
@@ -112,9 +129,7 @@ export function isGifMedia(opts: {
|
|||||||
fileName?: string | null;
|
fileName?: string | null;
|
||||||
}): boolean {
|
}): boolean {
|
||||||
if (opts.contentType?.toLowerCase() === "image/gif") return true;
|
if (opts.contentType?.toLowerCase() === "image/gif") return true;
|
||||||
const ext = opts.fileName
|
const ext = getFileExtension(opts.fileName);
|
||||||
? path.extname(opts.fileName).toLowerCase()
|
|
||||||
: undefined;
|
|
||||||
return ext === ".gif";
|
return ext === ".gif";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
// Shared helpers for parsing MEDIA tokens from command/stdout text.
|
// Shared helpers for parsing MEDIA tokens from command/stdout text.
|
||||||
|
|
||||||
import { parseFenceSpans } from "../markdown/fences.js";
|
import { parseFenceSpans } from "../markdown/fences.js";
|
||||||
|
import { parseAudioTag } from "./audio-tags.js";
|
||||||
|
|
||||||
// Allow optional wrapping backticks and punctuation after the token; capture the core token.
|
// Allow optional wrapping backticks and punctuation after the token; capture the core token.
|
||||||
export const MEDIA_TOKEN_RE = /\bMEDIA:\s*`?([^\n]+)`?/gi;
|
export const MEDIA_TOKEN_RE = /\bMEDIA:\s*`?([^\n]+)`?/gi;
|
||||||
@@ -32,10 +33,6 @@ function isInsideFence(
|
|||||||
return fenceSpans.some((span) => offset >= span.start && offset < span.end);
|
return fenceSpans.some((span) => offset >= span.start && offset < span.end);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Regex to detect [[audio_as_voice]] tag
|
|
||||||
const AUDIO_AS_VOICE_RE = /\[\[audio_as_voice\]\]/gi;
|
|
||||||
const AUDIO_AS_VOICE_TEST_RE = /\[\[audio_as_voice\]\]/i;
|
|
||||||
|
|
||||||
export function splitMediaFromOutput(raw: string): {
|
export function splitMediaFromOutput(raw: string): {
|
||||||
text: string;
|
text: string;
|
||||||
mediaUrls?: string[];
|
mediaUrls?: string[];
|
||||||
@@ -124,13 +121,10 @@ export function splitMediaFromOutput(raw: string): {
|
|||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
// Detect and strip [[audio_as_voice]] tag
|
// Detect and strip [[audio_as_voice]] tag
|
||||||
const hasAudioAsVoice = AUDIO_AS_VOICE_TEST_RE.test(cleanedText);
|
const audioTagResult = parseAudioTag(cleanedText);
|
||||||
if (hasAudioAsVoice) {
|
const hasAudioAsVoice = audioTagResult.audioAsVoice;
|
||||||
cleanedText = cleanedText
|
if (audioTagResult.hadTag) {
|
||||||
.replace(AUDIO_AS_VOICE_RE, "")
|
cleanedText = audioTagResult.text.replace(/\n{2,}/g, "\n").trim();
|
||||||
.replace(/[ \t]+/g, " ")
|
|
||||||
.replace(/\n{2,}/g, "\n")
|
|
||||||
.trim();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (media.length === 0) {
|
if (media.length === 0) {
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
// @ts-nocheck
|
// @ts-nocheck
|
||||||
import { Buffer } from "node:buffer";
|
|
||||||
|
|
||||||
import { sequentialize } from "@grammyjs/runner";
|
import { sequentialize } from "@grammyjs/runner";
|
||||||
import { apiThrottler } from "@grammyjs/transformer-throttler";
|
import { apiThrottler } from "@grammyjs/transformer-throttler";
|
||||||
import type { ApiClientOptions, Message } from "grammy";
|
import type { ApiClientOptions, Message } from "grammy";
|
||||||
@@ -22,12 +20,11 @@ import {
|
|||||||
} from "../auto-reply/commands-registry.js";
|
} from "../auto-reply/commands-registry.js";
|
||||||
import { formatAgentEnvelope } from "../auto-reply/envelope.js";
|
import { formatAgentEnvelope } from "../auto-reply/envelope.js";
|
||||||
import { resolveBlockStreamingChunking } from "../auto-reply/reply/block-streaming.js";
|
import { resolveBlockStreamingChunking } from "../auto-reply/reply/block-streaming.js";
|
||||||
import { dispatchReplyFromConfig } from "../auto-reply/reply/dispatch-from-config.js";
|
|
||||||
import {
|
import {
|
||||||
buildMentionRegexes,
|
buildMentionRegexes,
|
||||||
matchesMentionPatterns,
|
matchesMentionPatterns,
|
||||||
} from "../auto-reply/reply/mentions.js";
|
} from "../auto-reply/reply/mentions.js";
|
||||||
import { createReplyDispatcherWithTyping } from "../auto-reply/reply/reply-dispatcher.js";
|
import { dispatchReplyWithBufferedBlockDispatcher } from "../auto-reply/reply/provider-dispatcher.js";
|
||||||
import { getReplyFromConfig } from "../auto-reply/reply.js";
|
import { getReplyFromConfig } from "../auto-reply/reply.js";
|
||||||
import type { ReplyPayload } from "../auto-reply/types.js";
|
import type { ReplyPayload } from "../auto-reply/types.js";
|
||||||
import type { ClawdbotConfig, ReplyToMode } from "../config/config.js";
|
import type { ClawdbotConfig, ReplyToMode } from "../config/config.js";
|
||||||
@@ -46,7 +43,8 @@ import { formatErrorMessage } from "../infra/errors.js";
|
|||||||
import { recordProviderActivity } from "../infra/provider-activity.js";
|
import { recordProviderActivity } from "../infra/provider-activity.js";
|
||||||
import { getChildLogger } from "../logging.js";
|
import { getChildLogger } from "../logging.js";
|
||||||
import { mediaKindFromMime } from "../media/constants.js";
|
import { mediaKindFromMime } from "../media/constants.js";
|
||||||
import { detectMime, isGifMedia } from "../media/mime.js";
|
import { fetchRemoteMedia } from "../media/fetch.js";
|
||||||
|
import { isGifMedia } from "../media/mime.js";
|
||||||
import { saveMediaBuffer } from "../media/store.js";
|
import { saveMediaBuffer } from "../media/store.js";
|
||||||
import {
|
import {
|
||||||
formatLocationText,
|
formatLocationText,
|
||||||
@@ -64,7 +62,7 @@ import {
|
|||||||
readTelegramAllowFromStore,
|
readTelegramAllowFromStore,
|
||||||
upsertTelegramPairingRequest,
|
upsertTelegramPairingRequest,
|
||||||
} from "./pairing-store.js";
|
} from "./pairing-store.js";
|
||||||
import { resolveTelegramVoiceDecision } from "./voice.js";
|
import { resolveTelegramVoiceSend } from "./voice.js";
|
||||||
|
|
||||||
const PARSE_ERR_RE =
|
const PARSE_ERR_RE =
|
||||||
/can't parse entities|parse entities|find end of the entity/i;
|
/can't parse entities|parse entities|find end of the entity/i;
|
||||||
@@ -805,8 +803,16 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
await draftStream.flush();
|
await draftStream.flush();
|
||||||
};
|
};
|
||||||
|
|
||||||
const { dispatcher, replyOptions, markDispatchIdle } =
|
const disableBlockStreaming =
|
||||||
createReplyDispatcherWithTyping({
|
Boolean(draftStream) ||
|
||||||
|
(typeof telegramCfg.blockStreaming === "boolean"
|
||||||
|
? !telegramCfg.blockStreaming
|
||||||
|
: undefined);
|
||||||
|
|
||||||
|
const { queuedFinal } = await dispatchReplyWithBufferedBlockDispatcher({
|
||||||
|
ctx: ctxPayload,
|
||||||
|
cfg,
|
||||||
|
dispatcherOptions: {
|
||||||
responsePrefix: resolveEffectiveMessagesConfig(cfg, route.agentId)
|
responsePrefix: resolveEffectiveMessagesConfig(cfg, route.agentId)
|
||||||
.responsePrefix,
|
.responsePrefix,
|
||||||
deliver: async (payload, info) => {
|
deliver: async (payload, info) => {
|
||||||
@@ -831,20 +837,8 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
);
|
);
|
||||||
},
|
},
|
||||||
onReplyStart: sendTyping,
|
onReplyStart: sendTyping,
|
||||||
});
|
},
|
||||||
|
|
||||||
const disableBlockStreaming =
|
|
||||||
Boolean(draftStream) ||
|
|
||||||
(typeof telegramCfg.blockStreaming === "boolean"
|
|
||||||
? !telegramCfg.blockStreaming
|
|
||||||
: undefined);
|
|
||||||
|
|
||||||
const { queuedFinal } = await dispatchReplyFromConfig({
|
|
||||||
ctx: ctxPayload,
|
|
||||||
cfg,
|
|
||||||
dispatcher,
|
|
||||||
replyOptions: {
|
replyOptions: {
|
||||||
...replyOptions,
|
|
||||||
skillFilter,
|
skillFilter,
|
||||||
onPartialReply: draftStream
|
onPartialReply: draftStream
|
||||||
? (payload) => updateDraftFromPartial(payload.text)
|
? (payload) => updateDraftFromPartial(payload.text)
|
||||||
@@ -857,7 +851,6 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
disableBlockStreaming,
|
disableBlockStreaming,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
markDispatchIdle();
|
|
||||||
draftStream?.stop();
|
draftStream?.stop();
|
||||||
if (!queuedFinal) return;
|
if (!queuedFinal) return;
|
||||||
if (
|
if (
|
||||||
@@ -1409,16 +1402,12 @@ async function deliverReplies(params: {
|
|||||||
...mediaParams,
|
...mediaParams,
|
||||||
});
|
});
|
||||||
} else if (kind === "audio") {
|
} else if (kind === "audio") {
|
||||||
const { useVoice, reason } = resolveTelegramVoiceDecision({
|
const { useVoice } = resolveTelegramVoiceSend({
|
||||||
wantsVoice: reply.audioAsVoice === true, // default false (backward compatible)
|
wantsVoice: reply.audioAsVoice === true, // default false (backward compatible)
|
||||||
contentType: media.contentType,
|
contentType: media.contentType,
|
||||||
fileName,
|
fileName,
|
||||||
|
logFallback: logVerbose,
|
||||||
});
|
});
|
||||||
if (reason) {
|
|
||||||
logVerbose(
|
|
||||||
`Telegram voice requested but ${reason}; sending as audio file instead.`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (useVoice) {
|
if (useVoice) {
|
||||||
// Voice message - displays as round playable bubble (opt-in via [[audio_as_voice]])
|
// Voice message - displays as round playable bubble (opt-in via [[audio_as_voice]])
|
||||||
await bot.api.sendVoice(chatId, file, {
|
await bot.api.sendVoice(chatId, file, {
|
||||||
@@ -1571,19 +1560,17 @@ async function resolveMedia(
|
|||||||
throw new Error("fetch is not available; set telegram.proxy in config");
|
throw new Error("fetch is not available; set telegram.proxy in config");
|
||||||
}
|
}
|
||||||
const url = `https://api.telegram.org/file/bot${token}/${file.file_path}`;
|
const url = `https://api.telegram.org/file/bot${token}/${file.file_path}`;
|
||||||
const res = await fetchImpl(url);
|
const fetched = await fetchRemoteMedia({
|
||||||
if (!res.ok) {
|
url,
|
||||||
throw new Error(
|
fetchImpl,
|
||||||
`Failed to download telegram file: HTTP ${res.status} ${res.statusText}`,
|
filePathHint: file.file_path,
|
||||||
);
|
|
||||||
}
|
|
||||||
const data = Buffer.from(await res.arrayBuffer());
|
|
||||||
const mime = await detectMime({
|
|
||||||
buffer: data,
|
|
||||||
headerMime: res.headers.get("content-type"),
|
|
||||||
filePath: file.file_path,
|
|
||||||
});
|
});
|
||||||
const saved = await saveMediaBuffer(data, mime, "inbound", maxBytes);
|
const saved = await saveMediaBuffer(
|
||||||
|
fetched.buffer,
|
||||||
|
fetched.contentType,
|
||||||
|
"inbound",
|
||||||
|
maxBytes,
|
||||||
|
);
|
||||||
let placeholder = "<media:document>";
|
let placeholder = "<media:document>";
|
||||||
if (msg.photo) placeholder = "<media:image>";
|
if (msg.photo) placeholder = "<media:image>";
|
||||||
else if (msg.video) placeholder = "<media:video>";
|
else if (msg.video) placeholder = "<media:video>";
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import {
|
|||||||
parseTelegramTarget,
|
parseTelegramTarget,
|
||||||
stripTelegramInternalPrefixes,
|
stripTelegramInternalPrefixes,
|
||||||
} from "./targets.js";
|
} from "./targets.js";
|
||||||
import { resolveTelegramVoiceDecision } from "./voice.js";
|
import { resolveTelegramVoiceSend } from "./voice.js";
|
||||||
|
|
||||||
type TelegramSendOpts = {
|
type TelegramSendOpts = {
|
||||||
token?: string;
|
token?: string;
|
||||||
@@ -239,16 +239,12 @@ export async function sendMessageTelegram(
|
|||||||
throw wrapChatNotFound(err);
|
throw wrapChatNotFound(err);
|
||||||
});
|
});
|
||||||
} else if (kind === "audio") {
|
} else if (kind === "audio") {
|
||||||
const { useVoice, reason } = resolveTelegramVoiceDecision({
|
const { useVoice } = resolveTelegramVoiceSend({
|
||||||
wantsVoice: opts.asVoice === true, // default false (backward compatible)
|
wantsVoice: opts.asVoice === true, // default false (backward compatible)
|
||||||
contentType: media.contentType,
|
contentType: media.contentType,
|
||||||
fileName,
|
fileName,
|
||||||
|
logFallback: logVerbose,
|
||||||
});
|
});
|
||||||
if (reason) {
|
|
||||||
logVerbose(
|
|
||||||
`Telegram voice requested but ${reason}; sending as audio file instead.`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (useVoice) {
|
if (useVoice) {
|
||||||
result = await request(
|
result = await request(
|
||||||
() => api.sendVoice(chatId, file, mediaParams),
|
() => api.sendVoice(chatId, file, mediaParams),
|
||||||
|
|||||||
43
src/telegram/voice.test.ts
Normal file
43
src/telegram/voice.test.ts
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
import { describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
|
import { resolveTelegramVoiceSend } from "./voice.js";
|
||||||
|
|
||||||
|
describe("resolveTelegramVoiceSend", () => {
|
||||||
|
it("skips voice when wantsVoice is false", () => {
|
||||||
|
const logFallback = vi.fn();
|
||||||
|
const result = resolveTelegramVoiceSend({
|
||||||
|
wantsVoice: false,
|
||||||
|
contentType: "audio/ogg",
|
||||||
|
fileName: "voice.ogg",
|
||||||
|
logFallback,
|
||||||
|
});
|
||||||
|
expect(result.useVoice).toBe(false);
|
||||||
|
expect(logFallback).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("logs fallback for incompatible media", () => {
|
||||||
|
const logFallback = vi.fn();
|
||||||
|
const result = resolveTelegramVoiceSend({
|
||||||
|
wantsVoice: true,
|
||||||
|
contentType: "audio/mpeg",
|
||||||
|
fileName: "track.mp3",
|
||||||
|
logFallback,
|
||||||
|
});
|
||||||
|
expect(result.useVoice).toBe(false);
|
||||||
|
expect(logFallback).toHaveBeenCalledWith(
|
||||||
|
"Telegram voice requested but media is audio/mpeg (track.mp3); sending as audio file instead.",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("keeps voice when compatible", () => {
|
||||||
|
const logFallback = vi.fn();
|
||||||
|
const result = resolveTelegramVoiceSend({
|
||||||
|
wantsVoice: true,
|
||||||
|
contentType: "audio/ogg",
|
||||||
|
fileName: "voice.ogg",
|
||||||
|
logFallback,
|
||||||
|
});
|
||||||
|
expect(result.useVoice).toBe(true);
|
||||||
|
expect(logFallback).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import path from "node:path";
|
import { getFileExtension } from "../media/mime.js";
|
||||||
|
|
||||||
export function isTelegramVoiceCompatible(opts: {
|
export function isTelegramVoiceCompatible(opts: {
|
||||||
contentType?: string | null;
|
contentType?: string | null;
|
||||||
@@ -10,7 +10,8 @@ export function isTelegramVoiceCompatible(opts: {
|
|||||||
}
|
}
|
||||||
const fileName = opts.fileName?.trim();
|
const fileName = opts.fileName?.trim();
|
||||||
if (!fileName) return false;
|
if (!fileName) return false;
|
||||||
const ext = path.extname(fileName).toLowerCase();
|
const ext = getFileExtension(fileName);
|
||||||
|
if (!ext) return false;
|
||||||
return ext === ".ogg" || ext === ".opus" || ext === ".oga";
|
return ext === ".ogg" || ext === ".opus" || ext === ".oga";
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -28,3 +29,18 @@ export function resolveTelegramVoiceDecision(opts: {
|
|||||||
reason: `media is ${contentType} (${fileName})`,
|
reason: `media is ${contentType} (${fileName})`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function resolveTelegramVoiceSend(opts: {
|
||||||
|
wantsVoice: boolean;
|
||||||
|
contentType?: string | null;
|
||||||
|
fileName?: string | null;
|
||||||
|
logFallback?: (message: string) => void;
|
||||||
|
}): { useVoice: boolean } {
|
||||||
|
const decision = resolveTelegramVoiceDecision(opts);
|
||||||
|
if (decision.reason && opts.logFallback) {
|
||||||
|
opts.logFallback(
|
||||||
|
`Telegram voice requested but ${decision.reason}; sending as audio file instead.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return { useVoice: decision.useVoice };
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,12 +17,11 @@ import {
|
|||||||
resolveHeartbeatPrompt,
|
resolveHeartbeatPrompt,
|
||||||
stripHeartbeatToken,
|
stripHeartbeatToken,
|
||||||
} from "../auto-reply/heartbeat.js";
|
} from "../auto-reply/heartbeat.js";
|
||||||
import { dispatchReplyFromConfig } from "../auto-reply/reply/dispatch-from-config.js";
|
|
||||||
import {
|
import {
|
||||||
buildMentionRegexes,
|
buildMentionRegexes,
|
||||||
normalizeMentionText,
|
normalizeMentionText,
|
||||||
} from "../auto-reply/reply/mentions.js";
|
} from "../auto-reply/reply/mentions.js";
|
||||||
import { createReplyDispatcherWithTyping } from "../auto-reply/reply/reply-dispatcher.js";
|
import { dispatchReplyWithBufferedBlockDispatcher } from "../auto-reply/reply/provider-dispatcher.js";
|
||||||
import { getReplyFromConfig } from "../auto-reply/reply.js";
|
import { getReplyFromConfig } from "../auto-reply/reply.js";
|
||||||
import { HEARTBEAT_TOKEN, SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
|
import { HEARTBEAT_TOKEN, SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
|
||||||
import type { ReplyPayload } from "../auto-reply/types.js";
|
import type { ReplyPayload } from "../auto-reply/types.js";
|
||||||
@@ -1219,8 +1218,39 @@ export async function monitorWebProvider(
|
|||||||
cfg,
|
cfg,
|
||||||
route.agentId,
|
route.agentId,
|
||||||
).responsePrefix;
|
).responsePrefix;
|
||||||
const { dispatcher, replyOptions, markDispatchIdle } =
|
const { queuedFinal } = await dispatchReplyWithBufferedBlockDispatcher({
|
||||||
createReplyDispatcherWithTyping({
|
ctx: {
|
||||||
|
Body: combinedBody,
|
||||||
|
From: msg.from,
|
||||||
|
To: msg.to,
|
||||||
|
SessionKey: route.sessionKey,
|
||||||
|
AccountId: route.accountId,
|
||||||
|
MessageSid: msg.id,
|
||||||
|
ReplyToId: msg.replyToId,
|
||||||
|
ReplyToBody: msg.replyToBody,
|
||||||
|
ReplyToSender: msg.replyToSender,
|
||||||
|
MediaPath: msg.mediaPath,
|
||||||
|
MediaUrl: msg.mediaUrl,
|
||||||
|
MediaType: msg.mediaType,
|
||||||
|
ChatType: msg.chatType,
|
||||||
|
GroupSubject: msg.groupSubject,
|
||||||
|
GroupMembers: formatGroupMembers(
|
||||||
|
msg.groupParticipants,
|
||||||
|
groupMemberNames.get(groupHistoryKey),
|
||||||
|
msg.senderE164,
|
||||||
|
),
|
||||||
|
SenderName: msg.senderName,
|
||||||
|
SenderE164: msg.senderE164,
|
||||||
|
WasMentioned: msg.wasMentioned,
|
||||||
|
...(msg.location ? toLocationContext(msg.location) : {}),
|
||||||
|
Provider: "whatsapp",
|
||||||
|
Surface: "whatsapp",
|
||||||
|
OriginatingChannel: "whatsapp",
|
||||||
|
OriginatingTo: msg.from,
|
||||||
|
},
|
||||||
|
cfg,
|
||||||
|
replyResolver,
|
||||||
|
dispatcherOptions: {
|
||||||
responsePrefix,
|
responsePrefix,
|
||||||
onHeartbeatStrip: () => {
|
onHeartbeatStrip: () => {
|
||||||
if (!didLogHeartbeatStrip) {
|
if (!didLogHeartbeatStrip) {
|
||||||
@@ -1283,50 +1313,14 @@ export async function monitorWebProvider(
|
|||||||
);
|
);
|
||||||
},
|
},
|
||||||
onReplyStart: msg.sendComposing,
|
onReplyStart: msg.sendComposing,
|
||||||
});
|
|
||||||
|
|
||||||
const { queuedFinal } = await dispatchReplyFromConfig({
|
|
||||||
ctx: {
|
|
||||||
Body: combinedBody,
|
|
||||||
From: msg.from,
|
|
||||||
To: msg.to,
|
|
||||||
SessionKey: route.sessionKey,
|
|
||||||
AccountId: route.accountId,
|
|
||||||
MessageSid: msg.id,
|
|
||||||
ReplyToId: msg.replyToId,
|
|
||||||
ReplyToBody: msg.replyToBody,
|
|
||||||
ReplyToSender: msg.replyToSender,
|
|
||||||
MediaPath: msg.mediaPath,
|
|
||||||
MediaUrl: msg.mediaUrl,
|
|
||||||
MediaType: msg.mediaType,
|
|
||||||
ChatType: msg.chatType,
|
|
||||||
GroupSubject: msg.groupSubject,
|
|
||||||
GroupMembers: formatGroupMembers(
|
|
||||||
msg.groupParticipants,
|
|
||||||
groupMemberNames.get(groupHistoryKey),
|
|
||||||
msg.senderE164,
|
|
||||||
),
|
|
||||||
SenderName: msg.senderName,
|
|
||||||
SenderE164: msg.senderE164,
|
|
||||||
WasMentioned: msg.wasMentioned,
|
|
||||||
...(msg.location ? toLocationContext(msg.location) : {}),
|
|
||||||
Provider: "whatsapp",
|
|
||||||
Surface: "whatsapp",
|
|
||||||
OriginatingChannel: "whatsapp",
|
|
||||||
OriginatingTo: msg.from,
|
|
||||||
},
|
},
|
||||||
cfg,
|
|
||||||
dispatcher,
|
|
||||||
replyResolver,
|
|
||||||
replyOptions: {
|
replyOptions: {
|
||||||
...replyOptions,
|
|
||||||
disableBlockStreaming:
|
disableBlockStreaming:
|
||||||
typeof cfg.whatsapp?.blockStreaming === "boolean"
|
typeof cfg.whatsapp?.blockStreaming === "boolean"
|
||||||
? !cfg.whatsapp.blockStreaming
|
? !cfg.whatsapp.blockStreaming
|
||||||
: undefined,
|
: undefined,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
markDispatchIdle();
|
|
||||||
if (!queuedFinal) {
|
if (!queuedFinal) {
|
||||||
if (shouldClearGroupHistory && didSendReply) {
|
if (shouldClearGroupHistory && didSendReply) {
|
||||||
groupHistories.set(groupHistoryKey, []);
|
groupHistories.set(groupHistoryKey, []);
|
||||||
|
|||||||
103
src/web/media.ts
103
src/web/media.ts
@@ -7,6 +7,7 @@ import {
|
|||||||
maxBytesForKind,
|
maxBytesForKind,
|
||||||
mediaKindFromMime,
|
mediaKindFromMime,
|
||||||
} from "../media/constants.js";
|
} from "../media/constants.js";
|
||||||
|
import { fetchRemoteMedia } from "../media/fetch.js";
|
||||||
import { resizeToJpeg } from "../media/image-ops.js";
|
import { resizeToJpeg } from "../media/image-ops.js";
|
||||||
import { detectMime, extensionForMime } from "../media/mime.js";
|
import { detectMime, extensionForMime } from "../media/mime.js";
|
||||||
|
|
||||||
@@ -22,45 +23,6 @@ type WebMediaOptions = {
|
|||||||
optimizeImages?: boolean;
|
optimizeImages?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
function stripQuotes(value: string): string {
|
|
||||||
return value.replace(/^["']|["']$/g, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseContentDispositionFileName(
|
|
||||||
header?: string | null,
|
|
||||||
): string | undefined {
|
|
||||||
if (!header) return undefined;
|
|
||||||
const starMatch = /filename\*\s*=\s*([^;]+)/i.exec(header);
|
|
||||||
if (starMatch?.[1]) {
|
|
||||||
const cleaned = stripQuotes(starMatch[1].trim());
|
|
||||||
const encoded = cleaned.split("''").slice(1).join("''") || cleaned;
|
|
||||||
try {
|
|
||||||
return path.basename(decodeURIComponent(encoded));
|
|
||||||
} catch {
|
|
||||||
return path.basename(encoded);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const match = /filename\s*=\s*([^;]+)/i.exec(header);
|
|
||||||
if (match?.[1]) return path.basename(stripQuotes(match[1].trim()));
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function readErrorBodySnippet(
|
|
||||||
res: Response,
|
|
||||||
maxChars = 200,
|
|
||||||
): Promise<string | undefined> {
|
|
||||||
try {
|
|
||||||
const text = await res.text();
|
|
||||||
if (!text) return undefined;
|
|
||||||
const collapsed = text.replace(/\s+/g, " ").trim();
|
|
||||||
if (!collapsed) return undefined;
|
|
||||||
if (collapsed.length <= maxChars) return collapsed;
|
|
||||||
return `${collapsed.slice(0, maxChars)}…`;
|
|
||||||
} catch {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function loadWebMediaInternal(
|
async function loadWebMediaInternal(
|
||||||
mediaUrl: string,
|
mediaUrl: string,
|
||||||
options: WebMediaOptions = {},
|
options: WebMediaOptions = {},
|
||||||
@@ -93,53 +55,8 @@ async function loadWebMediaInternal(
|
|||||||
};
|
};
|
||||||
|
|
||||||
if (/^https?:\/\//i.test(mediaUrl)) {
|
if (/^https?:\/\//i.test(mediaUrl)) {
|
||||||
let fileNameFromUrl: string | undefined;
|
const fetched = await fetchRemoteMedia({ url: mediaUrl });
|
||||||
try {
|
const { buffer, contentType, fileName } = fetched;
|
||||||
const url = new URL(mediaUrl);
|
|
||||||
const base = path.basename(url.pathname);
|
|
||||||
fileNameFromUrl = base || undefined;
|
|
||||||
} catch {
|
|
||||||
// ignore parse errors; leave undefined
|
|
||||||
}
|
|
||||||
let res: Response;
|
|
||||||
try {
|
|
||||||
res = await fetch(mediaUrl);
|
|
||||||
} catch (err) {
|
|
||||||
throw new Error(`Failed to fetch media from ${mediaUrl}: ${String(err)}`);
|
|
||||||
}
|
|
||||||
if (!res.ok || !res.body) {
|
|
||||||
const statusText = res.statusText ? ` ${res.statusText}` : "";
|
|
||||||
const redirected =
|
|
||||||
res.url && res.url !== mediaUrl ? ` (redirected to ${res.url})` : "";
|
|
||||||
let detail = `HTTP ${res.status}${statusText}`;
|
|
||||||
if (!res.body) {
|
|
||||||
detail = `HTTP ${res.status}${statusText}; empty response body`;
|
|
||||||
} else if (!res.ok) {
|
|
||||||
const snippet = await readErrorBodySnippet(res);
|
|
||||||
if (snippet) detail += `; body: ${snippet}`;
|
|
||||||
}
|
|
||||||
throw new Error(
|
|
||||||
`Failed to fetch media from ${mediaUrl}${redirected}: ${detail}`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
const array = Buffer.from(await res.arrayBuffer());
|
|
||||||
const headerFileName = parseContentDispositionFileName(
|
|
||||||
res.headers.get("content-disposition"),
|
|
||||||
);
|
|
||||||
let fileName = headerFileName || fileNameFromUrl || undefined;
|
|
||||||
const filePathForMime =
|
|
||||||
headerFileName && path.extname(headerFileName)
|
|
||||||
? headerFileName
|
|
||||||
: mediaUrl;
|
|
||||||
const contentType = await detectMime({
|
|
||||||
buffer: array,
|
|
||||||
headerMime: res.headers.get("content-type"),
|
|
||||||
filePath: filePathForMime,
|
|
||||||
});
|
|
||||||
if (fileName && !path.extname(fileName) && contentType) {
|
|
||||||
const ext = extensionForMime(contentType);
|
|
||||||
if (ext) fileName = `${fileName}${ext}`;
|
|
||||||
}
|
|
||||||
const kind = mediaKindFromMime(contentType);
|
const kind = mediaKindFromMime(contentType);
|
||||||
const cap = Math.min(
|
const cap = Math.min(
|
||||||
maxBytes ?? maxBytesForKind(kind),
|
maxBytes ?? maxBytesForKind(kind),
|
||||||
@@ -148,28 +65,28 @@ async function loadWebMediaInternal(
|
|||||||
if (kind === "image") {
|
if (kind === "image") {
|
||||||
// Skip optimization for GIFs to preserve animation.
|
// Skip optimization for GIFs to preserve animation.
|
||||||
if (contentType === "image/gif" || !optimizeImages) {
|
if (contentType === "image/gif" || !optimizeImages) {
|
||||||
if (array.length > cap) {
|
if (buffer.length > cap) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`${
|
`${
|
||||||
contentType === "image/gif" ? "GIF" : "Media"
|
contentType === "image/gif" ? "GIF" : "Media"
|
||||||
} exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${(
|
} exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${(
|
||||||
array.length / (1024 * 1024)
|
buffer.length / (1024 * 1024)
|
||||||
).toFixed(2)}MB)`,
|
).toFixed(2)}MB)`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return { buffer: array, contentType, kind, fileName };
|
return { buffer, contentType, kind, fileName };
|
||||||
}
|
}
|
||||||
return { ...(await optimizeAndClampImage(array, cap)), fileName };
|
return { ...(await optimizeAndClampImage(buffer, cap)), fileName };
|
||||||
}
|
}
|
||||||
if (array.length > cap) {
|
if (buffer.length > cap) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Media exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${(
|
`Media exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${(
|
||||||
array.length / (1024 * 1024)
|
buffer.length / (1024 * 1024)
|
||||||
).toFixed(2)}MB)`,
|
).toFixed(2)}MB)`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
buffer: array,
|
buffer,
|
||||||
contentType: contentType ?? undefined,
|
contentType: contentType ?? undefined,
|
||||||
kind,
|
kind,
|
||||||
fileName,
|
fileName,
|
||||||
|
|||||||
Reference in New Issue
Block a user