From 5428c97685bdae1c2a388fd54ce352a2611b30a3 Mon Sep 17 00:00:00 2001 From: Glucksberg Date: Fri, 23 Jan 2026 00:38:43 +0000 Subject: [PATCH] feat(extensions): add telegram-tts extension for voice responses Add a new extension that provides automatic text-to-speech for chat responses using ElevenLabs API. Features: - `speak` tool for converting text to voice messages - RPC methods: tts.status, tts.enable, tts.disable, tts.convert - User preferences file for persistent TTS state - Configurable voice ID, model, and max text length Co-Authored-By: Claude Opus 4.5 --- extensions/telegram-tts/README.md | 122 ++++++++++ extensions/telegram-tts/clawdbot.plugin.json | 81 +++++++ extensions/telegram-tts/index.ts | 224 +++++++++++++++++++ extensions/telegram-tts/package.json | 7 + 4 files changed, 434 insertions(+) create mode 100644 extensions/telegram-tts/README.md create mode 100644 extensions/telegram-tts/clawdbot.plugin.json create mode 100644 extensions/telegram-tts/index.ts create mode 100644 extensions/telegram-tts/package.json diff --git a/extensions/telegram-tts/README.md b/extensions/telegram-tts/README.md new file mode 100644 index 000000000..d0adaab20 --- /dev/null +++ b/extensions/telegram-tts/README.md @@ -0,0 +1,122 @@ +# Telegram TTS Extension + +Automatic text-to-speech for chat responses using ElevenLabs. + +## Features + +- **`speak` Tool**: Converts text to speech and sends as voice message +- **RPC Methods**: Control TTS via Gateway (`tts.status`, `tts.enable`, `tts.disable`, `tts.convert`) +- **User Preferences**: Persistent TTS state via JSON file +- **Multi-channel**: Works with Telegram and other channels + +## Requirements + +- ElevenLabs API key +- `sag` CLI tool (ElevenLabs TTS wrapper) + +## Installation + +The extension is bundled with Clawdbot. Enable it in your config: + +```json +{ + "plugins": { + "entries": { + "telegram-tts": { + "enabled": true, + "elevenlabs": { + "apiKey": "your-api-key" + } + } + } + } +} +``` + +Or set the API key via environment variable: + +```bash +export ELEVENLABS_API_KEY=your-api-key +``` + +## Configuration + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `enabled` | boolean | `false` | Enable the plugin | +| `provider` | string | `"elevenlabs"` | TTS provider | +| `elevenlabs.apiKey` | string | - | ElevenLabs API key | +| `elevenlabs.voiceId` | string | `"pMsXgVXv3BLzUgSXRplE"` | Voice ID | +| `elevenlabs.modelId` | string | `"eleven_multilingual_v2"` | Model ID | +| `prefsPath` | string | `~/clawd/.user-preferences.json` | User preferences file | +| `maxTextLength` | number | `4000` | Max characters for TTS | + +## Usage + +### Agent Tool + +The agent can use the `speak` tool to send voice messages: + +``` +User: Send me a voice message saying hello +Agent: [calls speak({ text: "Hello! How can I help you today?" })] +``` + +### RPC Methods + +```bash +# Check TTS status +clawdbot gateway call tts.status + +# Enable/disable TTS +clawdbot gateway call tts.enable +clawdbot gateway call tts.disable + +# Convert text to audio +clawdbot gateway call tts.convert '{"text": "Hello world"}' +``` + +### Telegram Commands + +Add custom commands to toggle TTS mode: + +```json +{ + "channels": { + "telegram": { + "customCommands": [ + {"command": "tts_on", "description": "Enable voice responses"}, + {"command": "tts_off", "description": "Disable voice responses"} + ] + } + } +} +``` + +Then add handling instructions to your agent workspace (CLAUDE.md or TOOLS.md). + +## Dependencies + +This extension requires the `sag` CLI tool. On Linux, you can create a Python wrapper: + +```python +#!/usr/bin/env python3 +# ~/.local/bin/sag +from elevenlabs.client import ElevenLabs +import sys, os, tempfile + +client = ElevenLabs(api_key=os.environ["ELEVENLABS_API_KEY"]) +audio = client.text_to_speech.convert( + voice_id=os.environ.get("ELEVENLABS_VOICE_ID", "pMsXgVXv3BLzUgSXRplE"), + model_id="eleven_multilingual_v2", + text=sys.argv[1] +) +with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: + for chunk in audio: + f.write(chunk) + print(f.name) +``` + +## License + +MIT diff --git a/extensions/telegram-tts/clawdbot.plugin.json b/extensions/telegram-tts/clawdbot.plugin.json new file mode 100644 index 000000000..515bec2a7 --- /dev/null +++ b/extensions/telegram-tts/clawdbot.plugin.json @@ -0,0 +1,81 @@ +{ + "id": "telegram-tts", + "uiHints": { + "enabled": { + "label": "Enable TTS", + "help": "Automatically convert text responses to voice messages" + }, + "provider": { + "label": "TTS Provider" + }, + "elevenlabs.apiKey": { + "label": "ElevenLabs API Key", + "sensitive": true + }, + "elevenlabs.voiceId": { + "label": "ElevenLabs Voice ID", + "help": "Default: pMsXgVXv3BLzUgSXRplE (Borislav)" + }, + "elevenlabs.modelId": { + "label": "ElevenLabs Model ID", + "help": "Default: eleven_multilingual_v2" + }, + "prefsPath": { + "label": "User Preferences File", + "help": "Path to JSON file storing TTS state", + "advanced": true + }, + "maxTextLength": { + "label": "Max Text Length", + "help": "Maximum characters to convert to speech", + "advanced": true + } + }, + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": false + }, + "provider": { + "type": "string", + "enum": ["elevenlabs", "openai"], + "default": "elevenlabs" + }, + "elevenlabs": { + "type": "object", + "additionalProperties": false, + "properties": { + "apiKey": { + "type": "string" + }, + "voiceId": { + "type": "string", + "default": "pMsXgVXv3BLzUgSXRplE" + }, + "modelId": { + "type": "string", + "default": "eleven_multilingual_v2" + } + } + }, + "prefsPath": { + "type": "string" + }, + "maxTextLength": { + "type": "integer", + "minimum": 1, + "default": 4000 + }, + "channels": { + "type": "array", + "items": { + "type": "string" + }, + "default": ["telegram"] + } + } + } +} diff --git a/extensions/telegram-tts/index.ts b/extensions/telegram-tts/index.ts new file mode 100644 index 000000000..c3d812c36 --- /dev/null +++ b/extensions/telegram-tts/index.ts @@ -0,0 +1,224 @@ +/** + * telegram-tts - Automatic TTS for chat responses + * + * This plugin provides a `speak` tool that converts text to speech using + * ElevenLabs API and sends the response as a voice message. + * + * When TTS mode is enabled (via user preferences or config), the agent + * is instructed to use the speak tool for all responses. + */ + +import { execSync } from "child_process"; +import { existsSync, readFileSync, writeFileSync } from "fs"; +import { join } from "path"; +import type { PluginApi, PluginConfig } from "clawdbot"; + +const PLUGIN_ID = "telegram-tts"; + +interface TelegramTtsConfig { + enabled?: boolean; + provider?: "elevenlabs" | "openai"; + elevenlabs?: { + apiKey?: string; + voiceId?: string; + modelId?: string; + }; + prefsPath?: string; + maxTextLength?: number; + channels?: string[]; +} + +interface UserPreferences { + tts?: { + enabled?: boolean; + }; +} + +/** + * Load environment variables from .clawdbot/.env + */ +function loadEnv(): Record { + const envPath = join(process.env.HOME || "/home/dev", ".clawdbot", ".env"); + const env: Record = { ...process.env } as Record; + + if (existsSync(envPath)) { + const content = readFileSync(envPath, "utf8"); + for (const line of content.split("\n")) { + const trimmed = line.trim(); + if (trimmed && !trimmed.startsWith("#")) { + const [key, ...valueParts] = trimmed.split("="); + if (key && valueParts.length > 0) { + let value = valueParts.join("="); + // Remove quotes if present + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + value = value.slice(1, -1); + } + env[key.trim()] = value; + } + } + } + } + return env; +} + +/** + * Check if TTS is enabled in user preferences + */ +function isTtsEnabled(prefsPath: string): boolean { + try { + if (!existsSync(prefsPath)) return false; + const prefs: UserPreferences = JSON.parse(readFileSync(prefsPath, "utf8")); + return prefs?.tts?.enabled === true; + } catch { + return false; + } +} + +/** + * Set TTS enabled state in user preferences + */ +function setTtsEnabled(prefsPath: string, enabled: boolean): void { + let prefs: UserPreferences = {}; + try { + if (existsSync(prefsPath)) { + prefs = JSON.parse(readFileSync(prefsPath, "utf8")); + } + } catch { + // ignore + } + prefs.tts = { enabled }; + writeFileSync(prefsPath, JSON.stringify(prefs, null, 2)); +} + +/** + * Convert text to audio using sag CLI (ElevenLabs wrapper) + */ +function textToAudio(text: string): string | null { + try { + const escapedText = text.replace(/'/g, "'\\''"); + const env = loadEnv(); + + const result = execSync(`sag '${escapedText}'`, { + encoding: "utf8", + timeout: 60000, + env, + }).trim(); + + if (result && existsSync(result)) { + return result; + } + return null; + } catch (err) { + console.error(`[${PLUGIN_ID}] TTS error:`, (err as Error).message); + return null; + } +} + +/** + * Plugin registration + */ +export default function register(api: PluginApi) { + const log = api.logger; + const config = (api.pluginConfig || {}) as TelegramTtsConfig; + const prefsPath = + config.prefsPath || + process.env.CLAWDBOT_TTS_PREFS || + join(process.env.HOME || "/home/dev", "clawd", ".user-preferences.json"); + + log.info(`[${PLUGIN_ID}] Registering plugin...`); + log.info(`[${PLUGIN_ID}] Preferences path: ${prefsPath}`); + + // Register the 'speak' tool for TTS + api.registerTool({ + name: "speak", + description: + "Convert text to speech and send as voice message. Use this tool when TTS mode is enabled or when the user requests an audio response.", + parameters: { + type: "object", + properties: { + text: { + type: "string", + description: "The text to convert to speech and send as voice message", + }, + }, + required: ["text"], + }, + execute: async (_id: string, params: { text: string }) => { + const { text } = params; + log.info(`[${PLUGIN_ID}] speak() called, text length: ${text?.length || 0}`); + + if (!text) { + return { content: [{ type: "text", text: "Error: No text provided" }] }; + } + + const maxLen = config.maxTextLength || 4000; + if (text.length > maxLen) { + return { + content: [ + { + type: "text", + text: `Error: Text too long (${text.length} chars, max ${maxLen})`, + }, + ], + }; + } + + const audioPath = textToAudio(text); + + if (audioPath) { + log.info(`[${PLUGIN_ID}] Audio generated: ${audioPath}`); + return { + content: [{ type: "text", text: `Voice message generated successfully.` }], + media: audioPath, + asVoice: true, + }; + } + + log.error(`[${PLUGIN_ID}] TTS conversion failed`); + return { + content: [{ type: "text", text: `TTS conversion failed. Original: ${text}` }], + }; + }, + }); + + // Register Gateway RPC methods + api.registerGatewayMethod("tts.status", async () => ({ + enabled: isTtsEnabled(prefsPath), + prefsPath, + pluginId: PLUGIN_ID, + config: { + provider: config.provider || "elevenlabs", + maxTextLength: config.maxTextLength || 4000, + }, + })); + + api.registerGatewayMethod("tts.enable", async () => { + setTtsEnabled(prefsPath, true); + return { ok: true, enabled: true }; + }); + + api.registerGatewayMethod("tts.disable", async () => { + setTtsEnabled(prefsPath, false); + return { ok: true, enabled: false }; + }); + + api.registerGatewayMethod("tts.convert", async (params: { text: string }) => { + if (!params.text) return { ok: false, error: "No text provided" }; + const audioPath = textToAudio(params.text); + return audioPath ? { ok: true, audioPath } : { ok: false, error: "Conversion failed" }; + }); + + log.info( + `[${PLUGIN_ID}] Plugin ready. TTS is currently ${isTtsEnabled(prefsPath) ? "ENABLED" : "disabled"}` + ); +} + +export const meta = { + id: PLUGIN_ID, + name: "Telegram TTS", + description: "Automatic text-to-speech for chat responses using ElevenLabs", + version: "0.1.0", +}; diff --git a/extensions/telegram-tts/package.json b/extensions/telegram-tts/package.json new file mode 100644 index 000000000..d1248f111 --- /dev/null +++ b/extensions/telegram-tts/package.json @@ -0,0 +1,7 @@ +{ + "name": "@clawdbot/telegram-tts", + "version": "0.1.0", + "private": true, + "description": "Automatic text-to-speech for chat responses using ElevenLabs", + "main": "index.ts" +}