feat: move TTS into core (#1559) (thanks @Glucksberg)
This commit is contained in:
@@ -17,6 +17,7 @@ import { createSessionsListTool } from "./tools/sessions-list-tool.js";
|
||||
import { createSessionsSendTool } from "./tools/sessions-send-tool.js";
|
||||
import { createSessionsSpawnTool } from "./tools/sessions-spawn-tool.js";
|
||||
import { createWebFetchTool, createWebSearchTool } from "./tools/web-tools.js";
|
||||
import { createTtsTool } from "./tools/tts-tool.js";
|
||||
|
||||
export function createClawdbotTools(options?: {
|
||||
browserControlUrl?: string;
|
||||
@@ -96,6 +97,10 @@ export function createClawdbotTools(options?: {
|
||||
replyToMode: options?.replyToMode,
|
||||
hasRepliedRef: options?.hasRepliedRef,
|
||||
}),
|
||||
createTtsTool({
|
||||
agentChannel: options?.agentChannel,
|
||||
config: options?.config,
|
||||
}),
|
||||
createGatewayTool({
|
||||
agentSessionKey: options?.agentSessionKey,
|
||||
config: options?.config,
|
||||
|
||||
60
src/agents/tools/tts-tool.ts
Normal file
60
src/agents/tools/tts-tool.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import { Type } from "@sinclair/typebox";
|
||||
|
||||
import { loadConfig } from "../../config/config.js";
|
||||
import type { ClawdbotConfig } from "../../config/config.js";
|
||||
import type { GatewayMessageChannel } from "../../utils/message-channel.js";
|
||||
import { textToSpeech } from "../../tts/tts.js";
|
||||
import type { AnyAgentTool } from "./common.js";
|
||||
import { readStringParam } from "./common.js";
|
||||
|
||||
const TtsToolSchema = Type.Object({
|
||||
text: Type.String({ description: "Text to convert to speech." }),
|
||||
channel: Type.Optional(
|
||||
Type.String({ description: "Optional channel id to pick output format (e.g. telegram)." }),
|
||||
),
|
||||
});
|
||||
|
||||
export function createTtsTool(opts?: {
|
||||
config?: ClawdbotConfig;
|
||||
agentChannel?: GatewayMessageChannel;
|
||||
}): AnyAgentTool {
|
||||
return {
|
||||
label: "TTS",
|
||||
name: "tts",
|
||||
description:
|
||||
"Convert text to speech and return a MEDIA: path. Use when the user requests audio or TTS is enabled. Copy the MEDIA line exactly.",
|
||||
parameters: TtsToolSchema,
|
||||
execute: async (_toolCallId, args) => {
|
||||
const params = args as Record<string, unknown>;
|
||||
const text = readStringParam(params, "text", { required: true });
|
||||
const channel = readStringParam(params, "channel");
|
||||
const cfg = opts?.config ?? loadConfig();
|
||||
const result = await textToSpeech({
|
||||
text,
|
||||
cfg,
|
||||
channel: channel ?? opts?.agentChannel,
|
||||
});
|
||||
|
||||
if (result.success && result.audioPath) {
|
||||
const lines: string[] = [];
|
||||
// Tag Telegram Opus output as a voice bubble instead of a file attachment.
|
||||
if (result.voiceCompatible) lines.push("[[audio_as_voice]]");
|
||||
lines.push(`MEDIA:${result.audioPath}`);
|
||||
return {
|
||||
content: [{ type: "text", text: lines.join("\n") }],
|
||||
details: { audioPath: result.audioPath, provider: result.provider },
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: result.error ?? "TTS conversion failed",
|
||||
},
|
||||
],
|
||||
details: { error: result.error },
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user