feat: move TTS into core (#1559) (thanks @Glucksberg)
This commit is contained in:
@@ -16,6 +16,7 @@ import {
|
||||
import { handleAllowlistCommand } from "./commands-allowlist.js";
|
||||
import { handleSubagentsCommand } from "./commands-subagents.js";
|
||||
import { handleModelsCommand } from "./commands-models.js";
|
||||
import { handleTtsCommands } from "./commands-tts.js";
|
||||
import {
|
||||
handleAbortTrigger,
|
||||
handleActivationCommand,
|
||||
@@ -39,6 +40,7 @@ const HANDLERS: CommandHandler[] = [
|
||||
handleSendPolicyCommand,
|
||||
handleUsageCommand,
|
||||
handleRestartCommand,
|
||||
handleTtsCommands,
|
||||
handleHelpCommand,
|
||||
handleCommandsListCommand,
|
||||
handleStatusCommand,
|
||||
|
||||
214
src/auto-reply/reply/commands-tts.ts
Normal file
214
src/auto-reply/reply/commands-tts.ts
Normal file
@@ -0,0 +1,214 @@
|
||||
import { logVerbose } from "../../globals.js";
|
||||
import type { ReplyPayload } from "../types.js";
|
||||
import type { CommandHandler } from "./commands-types.js";
|
||||
import {
|
||||
getLastTtsAttempt,
|
||||
getTtsMaxLength,
|
||||
getTtsProvider,
|
||||
isSummarizationEnabled,
|
||||
isTtsEnabled,
|
||||
resolveTtsApiKey,
|
||||
resolveTtsConfig,
|
||||
resolveTtsPrefsPath,
|
||||
setLastTtsAttempt,
|
||||
setSummarizationEnabled,
|
||||
setTtsEnabled,
|
||||
setTtsMaxLength,
|
||||
setTtsProvider,
|
||||
textToSpeech,
|
||||
} from "../../tts/tts.js";
|
||||
|
||||
function parseCommandArg(normalized: string, command: string): string | null {
|
||||
if (normalized === command) return "";
|
||||
if (normalized.startsWith(`${command} `)) return normalized.slice(command.length).trim();
|
||||
return null;
|
||||
}
|
||||
|
||||
export const handleTtsCommands: CommandHandler = async (params, allowTextCommands) => {
|
||||
if (!allowTextCommands) return null;
|
||||
const normalized = params.command.commandBodyNormalized;
|
||||
if (
|
||||
!normalized.startsWith("/tts_") &&
|
||||
normalized !== "/audio" &&
|
||||
!normalized.startsWith("/audio ")
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!params.command.isAuthorizedSender) {
|
||||
logVerbose(
|
||||
`Ignoring TTS command from unauthorized sender: ${params.command.senderId || "<unknown>"}`,
|
||||
);
|
||||
return { shouldContinue: false };
|
||||
}
|
||||
|
||||
const config = resolveTtsConfig(params.cfg);
|
||||
const prefsPath = resolveTtsPrefsPath(config);
|
||||
|
||||
if (normalized === "/tts_on") {
|
||||
setTtsEnabled(prefsPath, true);
|
||||
return { shouldContinue: false, reply: { text: "🔊 TTS enabled." } };
|
||||
}
|
||||
|
||||
if (normalized === "/tts_off") {
|
||||
setTtsEnabled(prefsPath, false);
|
||||
return { shouldContinue: false, reply: { text: "🔇 TTS disabled." } };
|
||||
}
|
||||
|
||||
const audioArg = parseCommandArg(normalized, "/audio");
|
||||
if (audioArg !== null) {
|
||||
if (!audioArg.trim()) {
|
||||
return { shouldContinue: false, reply: { text: "⚙️ Usage: /audio <text>" } };
|
||||
}
|
||||
|
||||
const start = Date.now();
|
||||
const result = await textToSpeech({
|
||||
text: audioArg,
|
||||
cfg: params.cfg,
|
||||
channel: params.command.channel,
|
||||
prefsPath,
|
||||
});
|
||||
|
||||
if (result.success && result.audioPath) {
|
||||
setLastTtsAttempt({
|
||||
timestamp: Date.now(),
|
||||
success: true,
|
||||
textLength: audioArg.length,
|
||||
summarized: false,
|
||||
provider: result.provider,
|
||||
latencyMs: result.latencyMs,
|
||||
});
|
||||
const payload: ReplyPayload = {
|
||||
mediaUrl: result.audioPath,
|
||||
audioAsVoice: result.voiceCompatible === true,
|
||||
};
|
||||
return { shouldContinue: false, reply: payload };
|
||||
}
|
||||
|
||||
setLastTtsAttempt({
|
||||
timestamp: Date.now(),
|
||||
success: false,
|
||||
textLength: audioArg.length,
|
||||
summarized: false,
|
||||
error: result.error,
|
||||
latencyMs: Date.now() - start,
|
||||
});
|
||||
return {
|
||||
shouldContinue: false,
|
||||
reply: { text: `❌ Error generating audio: ${result.error ?? "unknown error"}` },
|
||||
};
|
||||
}
|
||||
|
||||
const providerArg = parseCommandArg(normalized, "/tts_provider");
|
||||
if (providerArg !== null) {
|
||||
const currentProvider = getTtsProvider(config, prefsPath);
|
||||
if (!providerArg.trim()) {
|
||||
const fallback = currentProvider === "openai" ? "elevenlabs" : "openai";
|
||||
const hasOpenAI = Boolean(resolveTtsApiKey(config, "openai"));
|
||||
const hasElevenLabs = Boolean(resolveTtsApiKey(config, "elevenlabs"));
|
||||
return {
|
||||
shouldContinue: false,
|
||||
reply: {
|
||||
text:
|
||||
`🎙️ TTS provider\n` +
|
||||
`Primary: ${currentProvider}\n` +
|
||||
`Fallback: ${fallback}\n` +
|
||||
`OpenAI key: ${hasOpenAI ? "✅" : "❌"}\n` +
|
||||
`ElevenLabs key: ${hasElevenLabs ? "✅" : "❌"}\n` +
|
||||
`Usage: /tts_provider openai | elevenlabs`,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const requested = providerArg.trim().toLowerCase();
|
||||
if (requested !== "openai" && requested !== "elevenlabs") {
|
||||
return {
|
||||
shouldContinue: false,
|
||||
reply: { text: "⚙️ Usage: /tts_provider openai | elevenlabs" },
|
||||
};
|
||||
}
|
||||
|
||||
setTtsProvider(prefsPath, requested);
|
||||
const fallback = requested === "openai" ? "elevenlabs" : "openai";
|
||||
return {
|
||||
shouldContinue: false,
|
||||
reply: { text: `✅ TTS provider set to ${requested} (fallback: ${fallback}).` },
|
||||
};
|
||||
}
|
||||
|
||||
const limitArg = parseCommandArg(normalized, "/tts_limit");
|
||||
if (limitArg !== null) {
|
||||
if (!limitArg.trim()) {
|
||||
const currentLimit = getTtsMaxLength(prefsPath);
|
||||
return {
|
||||
shouldContinue: false,
|
||||
reply: { text: `📏 TTS limit: ${currentLimit} characters.` },
|
||||
};
|
||||
}
|
||||
const next = Number.parseInt(limitArg.trim(), 10);
|
||||
if (!Number.isFinite(next) || next < 100 || next > 10_000) {
|
||||
return {
|
||||
shouldContinue: false,
|
||||
reply: { text: "⚙️ Usage: /tts_limit <100-10000>" },
|
||||
};
|
||||
}
|
||||
setTtsMaxLength(prefsPath, next);
|
||||
return {
|
||||
shouldContinue: false,
|
||||
reply: { text: `✅ TTS limit set to ${next} characters.` },
|
||||
};
|
||||
}
|
||||
|
||||
const summaryArg = parseCommandArg(normalized, "/tts_summary");
|
||||
if (summaryArg !== null) {
|
||||
if (!summaryArg.trim()) {
|
||||
const enabled = isSummarizationEnabled(prefsPath);
|
||||
return {
|
||||
shouldContinue: false,
|
||||
reply: { text: `📝 TTS auto-summary: ${enabled ? "on" : "off"}.` },
|
||||
};
|
||||
}
|
||||
const requested = summaryArg.trim().toLowerCase();
|
||||
if (requested !== "on" && requested !== "off") {
|
||||
return { shouldContinue: false, reply: { text: "⚙️ Usage: /tts_summary on|off" } };
|
||||
}
|
||||
setSummarizationEnabled(prefsPath, requested === "on");
|
||||
return {
|
||||
shouldContinue: false,
|
||||
reply: {
|
||||
text: requested === "on" ? "✅ TTS auto-summary enabled." : "❌ TTS auto-summary disabled.",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
if (normalized === "/tts_status") {
|
||||
const enabled = isTtsEnabled(config, prefsPath);
|
||||
const provider = getTtsProvider(config, prefsPath);
|
||||
const hasKey = Boolean(resolveTtsApiKey(config, provider));
|
||||
const maxLength = getTtsMaxLength(prefsPath);
|
||||
const summarize = isSummarizationEnabled(prefsPath);
|
||||
const last = getLastTtsAttempt();
|
||||
const lines = [
|
||||
"📊 TTS status",
|
||||
`State: ${enabled ? "✅ enabled" : "❌ disabled"}`,
|
||||
`Provider: ${provider} (${hasKey ? "✅ key" : "❌ no key"})`,
|
||||
`Text limit: ${maxLength} chars`,
|
||||
`Auto-summary: ${summarize ? "on" : "off"}`,
|
||||
];
|
||||
if (last) {
|
||||
const timeAgo = Math.round((Date.now() - last.timestamp) / 1000);
|
||||
lines.push("");
|
||||
lines.push(`Last attempt (${timeAgo}s ago): ${last.success ? "✅" : "❌"}`);
|
||||
lines.push(`Text: ${last.textLength} chars${last.summarized ? " (summarized)" : ""}`);
|
||||
if (last.success) {
|
||||
lines.push(`Provider: ${last.provider ?? "unknown"}`);
|
||||
lines.push(`Latency: ${last.latencyMs ?? 0}ms`);
|
||||
} else if (last.error) {
|
||||
lines.push(`Error: ${last.error}`);
|
||||
}
|
||||
}
|
||||
return { shouldContinue: false, reply: { text: lines.join("\n") } };
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
@@ -13,6 +13,7 @@ import { formatAbortReplyText, tryFastAbortFromMessage } from "./abort.js";
|
||||
import { shouldSkipDuplicateInbound } from "./inbound-dedupe.js";
|
||||
import type { ReplyDispatcher, ReplyDispatchKind } from "./reply-dispatcher.js";
|
||||
import { isRoutableChannel, routeReply } from "./route-reply.js";
|
||||
import { maybeApplyTtsToPayload } from "../../tts/tts.js";
|
||||
|
||||
export type DispatchFromConfigResult = {
|
||||
queuedFinal: boolean;
|
||||
@@ -91,6 +92,7 @@ export async function dispatchReplyFromConfig(params: {
|
||||
const currentSurface = (ctx.Surface ?? ctx.Provider)?.toLowerCase();
|
||||
const shouldRouteToOriginating =
|
||||
isRoutableChannel(originatingChannel) && originatingTo && originatingChannel !== currentSurface;
|
||||
const ttsChannel = shouldRouteToOriginating ? originatingChannel : currentSurface;
|
||||
|
||||
/**
|
||||
* Helper to send a payload via route-reply (async).
|
||||
@@ -164,22 +166,36 @@ export async function dispatchReplyFromConfig(params: {
|
||||
{
|
||||
...params.replyOptions,
|
||||
onToolResult: (payload: ReplyPayload) => {
|
||||
if (shouldRouteToOriginating) {
|
||||
// Fire-and-forget for streaming tool results when routing.
|
||||
void sendPayloadAsync(payload);
|
||||
} else {
|
||||
// Synchronous dispatch to preserve callback timing.
|
||||
dispatcher.sendToolResult(payload);
|
||||
}
|
||||
const run = async () => {
|
||||
const ttsPayload = await maybeApplyTtsToPayload({
|
||||
payload,
|
||||
cfg,
|
||||
channel: ttsChannel,
|
||||
kind: "tool",
|
||||
});
|
||||
if (shouldRouteToOriginating) {
|
||||
await sendPayloadAsync(ttsPayload);
|
||||
} else {
|
||||
dispatcher.sendToolResult(ttsPayload);
|
||||
}
|
||||
};
|
||||
return run();
|
||||
},
|
||||
onBlockReply: (payload: ReplyPayload, context) => {
|
||||
if (shouldRouteToOriginating) {
|
||||
// Await routed sends so upstream can enforce ordering/timeouts.
|
||||
return sendPayloadAsync(payload, context?.abortSignal);
|
||||
} else {
|
||||
// Synchronous dispatch to preserve callback timing.
|
||||
dispatcher.sendBlockReply(payload);
|
||||
}
|
||||
const run = async () => {
|
||||
const ttsPayload = await maybeApplyTtsToPayload({
|
||||
payload,
|
||||
cfg,
|
||||
channel: ttsChannel,
|
||||
kind: "block",
|
||||
});
|
||||
if (shouldRouteToOriginating) {
|
||||
await sendPayloadAsync(ttsPayload, context?.abortSignal);
|
||||
} else {
|
||||
dispatcher.sendBlockReply(ttsPayload);
|
||||
}
|
||||
};
|
||||
return run();
|
||||
},
|
||||
},
|
||||
cfg,
|
||||
@@ -190,10 +206,16 @@ export async function dispatchReplyFromConfig(params: {
|
||||
let queuedFinal = false;
|
||||
let routedFinalCount = 0;
|
||||
for (const reply of replies) {
|
||||
const ttsReply = await maybeApplyTtsToPayload({
|
||||
payload: reply,
|
||||
cfg,
|
||||
channel: ttsChannel,
|
||||
kind: "final",
|
||||
});
|
||||
if (shouldRouteToOriginating && originatingChannel && originatingTo) {
|
||||
// Route final reply to originating channel.
|
||||
const result = await routeReply({
|
||||
payload: reply,
|
||||
payload: ttsReply,
|
||||
channel: originatingChannel,
|
||||
to: originatingTo,
|
||||
sessionKey: ctx.SessionKey,
|
||||
@@ -209,7 +231,7 @@ export async function dispatchReplyFromConfig(params: {
|
||||
queuedFinal = result.ok || queuedFinal;
|
||||
if (result.ok) routedFinalCount += 1;
|
||||
} else {
|
||||
queuedFinal = dispatcher.sendFinalReply(reply) || queuedFinal;
|
||||
queuedFinal = dispatcher.sendFinalReply(ttsReply) || queuedFinal;
|
||||
}
|
||||
}
|
||||
await dispatcher.waitForIdle();
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
import { resolveSessionAgentId } from "../../agents/agent-scope.js";
|
||||
import { resolveEffectiveMessagesConfig } from "../../agents/identity.js";
|
||||
import { normalizeChannelId } from "../../channels/plugins/index.js";
|
||||
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
|
||||
import type { ClawdbotConfig } from "../../config/config.js";
|
||||
import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js";
|
||||
import type { OriginatingChannelType } from "../templating.js";
|
||||
@@ -81,48 +80,6 @@ export async function routeReply(params: RouteReplyParams): Promise<RouteReplyRe
|
||||
: [];
|
||||
const replyToId = normalized.replyToId;
|
||||
|
||||
// Run message_sending hook (allows plugins to modify or cancel)
|
||||
const hookRunner = getGlobalHookRunner();
|
||||
const normalizedChannel = normalizeChannelId(channel);
|
||||
if (hookRunner && text.trim() && normalizedChannel) {
|
||||
try {
|
||||
const hookResult = await hookRunner.runMessageSending(
|
||||
{
|
||||
to,
|
||||
content: text,
|
||||
metadata: { channel, accountId, threadId },
|
||||
},
|
||||
{
|
||||
channelId: normalizedChannel,
|
||||
accountId: accountId ?? undefined,
|
||||
conversationId: to,
|
||||
},
|
||||
);
|
||||
|
||||
// Check if hook wants to cancel the message
|
||||
if (hookResult?.cancel) {
|
||||
return { ok: true }; // Silently cancel
|
||||
}
|
||||
|
||||
// Check if hook modified the content
|
||||
if (hookResult?.content !== undefined) {
|
||||
// Check if the modified content contains MEDIA: directive
|
||||
const mediaMatch = hookResult.content.match(/^MEDIA:(.+)$/m);
|
||||
if (mediaMatch) {
|
||||
// Extract media path and add to mediaUrls
|
||||
const mediaPath = mediaMatch[1].trim();
|
||||
mediaUrls = [mediaPath];
|
||||
// Remove MEDIA: directive from text (send audio only)
|
||||
text = hookResult.content.replace(/^MEDIA:.+$/m, "").trim();
|
||||
} else {
|
||||
text = hookResult.content;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Hook errors shouldn't block message sending
|
||||
}
|
||||
}
|
||||
|
||||
// Skip empty replies.
|
||||
if (!text.trim() && mediaUrls.length === 0) {
|
||||
return { ok: true };
|
||||
|
||||
Reference in New Issue
Block a user