feat: move TTS into core (#1559) (thanks @Glucksberg)

This commit is contained in:
Peter Steinberger
2026-01-24 07:57:46 +00:00
parent aef88cd9f1
commit d9a467fe3b
26 changed files with 1522 additions and 1649 deletions

View File

@@ -16,6 +16,7 @@ import {
import { handleAllowlistCommand } from "./commands-allowlist.js";
import { handleSubagentsCommand } from "./commands-subagents.js";
import { handleModelsCommand } from "./commands-models.js";
import { handleTtsCommands } from "./commands-tts.js";
import {
handleAbortTrigger,
handleActivationCommand,
@@ -39,6 +40,7 @@ const HANDLERS: CommandHandler[] = [
handleSendPolicyCommand,
handleUsageCommand,
handleRestartCommand,
handleTtsCommands,
handleHelpCommand,
handleCommandsListCommand,
handleStatusCommand,

View File

@@ -0,0 +1,214 @@
import { logVerbose } from "../../globals.js";
import type { ReplyPayload } from "../types.js";
import type { CommandHandler } from "./commands-types.js";
import {
getLastTtsAttempt,
getTtsMaxLength,
getTtsProvider,
isSummarizationEnabled,
isTtsEnabled,
resolveTtsApiKey,
resolveTtsConfig,
resolveTtsPrefsPath,
setLastTtsAttempt,
setSummarizationEnabled,
setTtsEnabled,
setTtsMaxLength,
setTtsProvider,
textToSpeech,
} from "../../tts/tts.js";
function parseCommandArg(normalized: string, command: string): string | null {
if (normalized === command) return "";
if (normalized.startsWith(`${command} `)) return normalized.slice(command.length).trim();
return null;
}
export const handleTtsCommands: CommandHandler = async (params, allowTextCommands) => {
if (!allowTextCommands) return null;
const normalized = params.command.commandBodyNormalized;
if (
!normalized.startsWith("/tts_") &&
normalized !== "/audio" &&
!normalized.startsWith("/audio ")
) {
return null;
}
if (!params.command.isAuthorizedSender) {
logVerbose(
`Ignoring TTS command from unauthorized sender: ${params.command.senderId || "<unknown>"}`,
);
return { shouldContinue: false };
}
const config = resolveTtsConfig(params.cfg);
const prefsPath = resolveTtsPrefsPath(config);
if (normalized === "/tts_on") {
setTtsEnabled(prefsPath, true);
return { shouldContinue: false, reply: { text: "🔊 TTS enabled." } };
}
if (normalized === "/tts_off") {
setTtsEnabled(prefsPath, false);
return { shouldContinue: false, reply: { text: "🔇 TTS disabled." } };
}
const audioArg = parseCommandArg(normalized, "/audio");
if (audioArg !== null) {
if (!audioArg.trim()) {
return { shouldContinue: false, reply: { text: "⚙️ Usage: /audio <text>" } };
}
const start = Date.now();
const result = await textToSpeech({
text: audioArg,
cfg: params.cfg,
channel: params.command.channel,
prefsPath,
});
if (result.success && result.audioPath) {
setLastTtsAttempt({
timestamp: Date.now(),
success: true,
textLength: audioArg.length,
summarized: false,
provider: result.provider,
latencyMs: result.latencyMs,
});
const payload: ReplyPayload = {
mediaUrl: result.audioPath,
audioAsVoice: result.voiceCompatible === true,
};
return { shouldContinue: false, reply: payload };
}
setLastTtsAttempt({
timestamp: Date.now(),
success: false,
textLength: audioArg.length,
summarized: false,
error: result.error,
latencyMs: Date.now() - start,
});
return {
shouldContinue: false,
reply: { text: `❌ Error generating audio: ${result.error ?? "unknown error"}` },
};
}
const providerArg = parseCommandArg(normalized, "/tts_provider");
if (providerArg !== null) {
const currentProvider = getTtsProvider(config, prefsPath);
if (!providerArg.trim()) {
const fallback = currentProvider === "openai" ? "elevenlabs" : "openai";
const hasOpenAI = Boolean(resolveTtsApiKey(config, "openai"));
const hasElevenLabs = Boolean(resolveTtsApiKey(config, "elevenlabs"));
return {
shouldContinue: false,
reply: {
text:
`🎙️ TTS provider\n` +
`Primary: ${currentProvider}\n` +
`Fallback: ${fallback}\n` +
`OpenAI key: ${hasOpenAI ? "✅" : "❌"}\n` +
`ElevenLabs key: ${hasElevenLabs ? "✅" : "❌"}\n` +
`Usage: /tts_provider openai | elevenlabs`,
},
};
}
const requested = providerArg.trim().toLowerCase();
if (requested !== "openai" && requested !== "elevenlabs") {
return {
shouldContinue: false,
reply: { text: "⚙️ Usage: /tts_provider openai | elevenlabs" },
};
}
setTtsProvider(prefsPath, requested);
const fallback = requested === "openai" ? "elevenlabs" : "openai";
return {
shouldContinue: false,
reply: { text: `✅ TTS provider set to ${requested} (fallback: ${fallback}).` },
};
}
const limitArg = parseCommandArg(normalized, "/tts_limit");
if (limitArg !== null) {
if (!limitArg.trim()) {
const currentLimit = getTtsMaxLength(prefsPath);
return {
shouldContinue: false,
reply: { text: `📏 TTS limit: ${currentLimit} characters.` },
};
}
const next = Number.parseInt(limitArg.trim(), 10);
if (!Number.isFinite(next) || next < 100 || next > 10_000) {
return {
shouldContinue: false,
reply: { text: "⚙️ Usage: /tts_limit <100-10000>" },
};
}
setTtsMaxLength(prefsPath, next);
return {
shouldContinue: false,
reply: { text: `✅ TTS limit set to ${next} characters.` },
};
}
const summaryArg = parseCommandArg(normalized, "/tts_summary");
if (summaryArg !== null) {
if (!summaryArg.trim()) {
const enabled = isSummarizationEnabled(prefsPath);
return {
shouldContinue: false,
reply: { text: `📝 TTS auto-summary: ${enabled ? "on" : "off"}.` },
};
}
const requested = summaryArg.trim().toLowerCase();
if (requested !== "on" && requested !== "off") {
return { shouldContinue: false, reply: { text: "⚙️ Usage: /tts_summary on|off" } };
}
setSummarizationEnabled(prefsPath, requested === "on");
return {
shouldContinue: false,
reply: {
text: requested === "on" ? "✅ TTS auto-summary enabled." : "❌ TTS auto-summary disabled.",
},
};
}
if (normalized === "/tts_status") {
const enabled = isTtsEnabled(config, prefsPath);
const provider = getTtsProvider(config, prefsPath);
const hasKey = Boolean(resolveTtsApiKey(config, provider));
const maxLength = getTtsMaxLength(prefsPath);
const summarize = isSummarizationEnabled(prefsPath);
const last = getLastTtsAttempt();
const lines = [
"📊 TTS status",
`State: ${enabled ? "✅ enabled" : "❌ disabled"}`,
`Provider: ${provider} (${hasKey ? "✅ key" : "❌ no key"})`,
`Text limit: ${maxLength} chars`,
`Auto-summary: ${summarize ? "on" : "off"}`,
];
if (last) {
const timeAgo = Math.round((Date.now() - last.timestamp) / 1000);
lines.push("");
lines.push(`Last attempt (${timeAgo}s ago): ${last.success ? "✅" : "❌"}`);
lines.push(`Text: ${last.textLength} chars${last.summarized ? " (summarized)" : ""}`);
if (last.success) {
lines.push(`Provider: ${last.provider ?? "unknown"}`);
lines.push(`Latency: ${last.latencyMs ?? 0}ms`);
} else if (last.error) {
lines.push(`Error: ${last.error}`);
}
}
return { shouldContinue: false, reply: { text: lines.join("\n") } };
}
return null;
};

View File

@@ -13,6 +13,7 @@ import { formatAbortReplyText, tryFastAbortFromMessage } from "./abort.js";
import { shouldSkipDuplicateInbound } from "./inbound-dedupe.js";
import type { ReplyDispatcher, ReplyDispatchKind } from "./reply-dispatcher.js";
import { isRoutableChannel, routeReply } from "./route-reply.js";
import { maybeApplyTtsToPayload } from "../../tts/tts.js";
export type DispatchFromConfigResult = {
queuedFinal: boolean;
@@ -91,6 +92,7 @@ export async function dispatchReplyFromConfig(params: {
const currentSurface = (ctx.Surface ?? ctx.Provider)?.toLowerCase();
const shouldRouteToOriginating =
isRoutableChannel(originatingChannel) && originatingTo && originatingChannel !== currentSurface;
const ttsChannel = shouldRouteToOriginating ? originatingChannel : currentSurface;
/**
* Helper to send a payload via route-reply (async).
@@ -164,22 +166,36 @@ export async function dispatchReplyFromConfig(params: {
{
...params.replyOptions,
onToolResult: (payload: ReplyPayload) => {
if (shouldRouteToOriginating) {
// Fire-and-forget for streaming tool results when routing.
void sendPayloadAsync(payload);
} else {
// Synchronous dispatch to preserve callback timing.
dispatcher.sendToolResult(payload);
}
const run = async () => {
const ttsPayload = await maybeApplyTtsToPayload({
payload,
cfg,
channel: ttsChannel,
kind: "tool",
});
if (shouldRouteToOriginating) {
await sendPayloadAsync(ttsPayload);
} else {
dispatcher.sendToolResult(ttsPayload);
}
};
return run();
},
onBlockReply: (payload: ReplyPayload, context) => {
if (shouldRouteToOriginating) {
// Await routed sends so upstream can enforce ordering/timeouts.
return sendPayloadAsync(payload, context?.abortSignal);
} else {
// Synchronous dispatch to preserve callback timing.
dispatcher.sendBlockReply(payload);
}
const run = async () => {
const ttsPayload = await maybeApplyTtsToPayload({
payload,
cfg,
channel: ttsChannel,
kind: "block",
});
if (shouldRouteToOriginating) {
await sendPayloadAsync(ttsPayload, context?.abortSignal);
} else {
dispatcher.sendBlockReply(ttsPayload);
}
};
return run();
},
},
cfg,
@@ -190,10 +206,16 @@ export async function dispatchReplyFromConfig(params: {
let queuedFinal = false;
let routedFinalCount = 0;
for (const reply of replies) {
const ttsReply = await maybeApplyTtsToPayload({
payload: reply,
cfg,
channel: ttsChannel,
kind: "final",
});
if (shouldRouteToOriginating && originatingChannel && originatingTo) {
// Route final reply to originating channel.
const result = await routeReply({
payload: reply,
payload: ttsReply,
channel: originatingChannel,
to: originatingTo,
sessionKey: ctx.SessionKey,
@@ -209,7 +231,7 @@ export async function dispatchReplyFromConfig(params: {
queuedFinal = result.ok || queuedFinal;
if (result.ok) routedFinalCount += 1;
} else {
queuedFinal = dispatcher.sendFinalReply(reply) || queuedFinal;
queuedFinal = dispatcher.sendFinalReply(ttsReply) || queuedFinal;
}
}
await dispatcher.waitForIdle();

View File

@@ -10,7 +10,6 @@
import { resolveSessionAgentId } from "../../agents/agent-scope.js";
import { resolveEffectiveMessagesConfig } from "../../agents/identity.js";
import { normalizeChannelId } from "../../channels/plugins/index.js";
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
import type { ClawdbotConfig } from "../../config/config.js";
import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js";
import type { OriginatingChannelType } from "../templating.js";
@@ -81,48 +80,6 @@ export async function routeReply(params: RouteReplyParams): Promise<RouteReplyRe
: [];
const replyToId = normalized.replyToId;
// Run message_sending hook (allows plugins to modify or cancel)
const hookRunner = getGlobalHookRunner();
const normalizedChannel = normalizeChannelId(channel);
if (hookRunner && text.trim() && normalizedChannel) {
try {
const hookResult = await hookRunner.runMessageSending(
{
to,
content: text,
metadata: { channel, accountId, threadId },
},
{
channelId: normalizedChannel,
accountId: accountId ?? undefined,
conversationId: to,
},
);
// Check if hook wants to cancel the message
if (hookResult?.cancel) {
return { ok: true }; // Silently cancel
}
// Check if hook modified the content
if (hookResult?.content !== undefined) {
// Check if the modified content contains MEDIA: directive
const mediaMatch = hookResult.content.match(/^MEDIA:(.+)$/m);
if (mediaMatch) {
// Extract media path and add to mediaUrls
const mediaPath = mediaMatch[1].trim();
mediaUrls = [mediaPath];
// Remove MEDIA: directive from text (send audio only)
text = hookResult.content.replace(/^MEDIA:.+$/m, "").trim();
} else {
text = hookResult.content;
}
}
} catch {
// Hook errors shouldn't block message sending
}
}
// Skip empty replies.
if (!text.trim() && mediaUrls.length === 0) {
return { ok: true };