feat(telegram-tts): add latency logging, status tracking, and unit tests
- Add latency metrics to summarizeText and textToSpeech functions - Add /tts_status command showing config and last attempt result - Add /tts_summary command for feature flag control - Fix atomic write to clean up temp file on rename failure - Add timer.unref() to prevent blocking process shutdown - Add unit tests for validation functions (13 tests) - Update README with new commands and features Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
committed by
Peter Steinberger
parent
4b24753be7
commit
104d977d12
@@ -4,15 +4,18 @@ Automatic text-to-speech for chat responses using ElevenLabs or OpenAI.
|
||||
|
||||
## Features
|
||||
|
||||
- **Auto-TTS**: Automatically converts all text responses to voice when enabled
|
||||
- **`speak` Tool**: Converts text to speech and sends as voice message
|
||||
- **RPC Methods**: Control TTS via Gateway (`tts.status`, `tts.enable`, `tts.disable`, `tts.convert`, `tts.providers`)
|
||||
- **User Preferences**: Persistent TTS state via JSON file
|
||||
- **Multi-provider**: ElevenLabs and OpenAI TTS support
|
||||
- **User Commands**: `/tts_on`, `/tts_off`, `/tts_provider`, `/tts_limit`, `/tts_summary`, `/tts_status`
|
||||
- **Auto-Summarization**: Long texts are automatically summarized before TTS conversion
|
||||
- **Multi-provider**: ElevenLabs and OpenAI TTS with automatic fallback
|
||||
- **Self-contained**: No external CLI dependencies - calls APIs directly
|
||||
|
||||
## Requirements
|
||||
|
||||
- ElevenLabs API key OR OpenAI API key
|
||||
- **For TTS**: ElevenLabs API key OR OpenAI API key
|
||||
- **For Auto-Summarization**: OpenAI API key (uses gpt-4o-mini to summarize long texts)
|
||||
|
||||
## Installation
|
||||
|
||||
@@ -70,19 +73,20 @@ export OPENAI_API_KEY=your-api-key
|
||||
| Option | Type | Default | Description |
|
||||
|--------|------|---------|-------------|
|
||||
| `enabled` | boolean | `false` | Enable the plugin |
|
||||
| `provider` | string | `"elevenlabs"` | TTS provider (`elevenlabs` or `openai`) |
|
||||
| `provider` | string | `"openai"` | TTS provider (`elevenlabs` or `openai`) |
|
||||
| `elevenlabs.apiKey` | string | - | ElevenLabs API key |
|
||||
| `elevenlabs.voiceId` | string | `"pMsXgVXv3BLzUgSXRplE"` | ElevenLabs Voice ID |
|
||||
| `elevenlabs.modelId` | string | `"eleven_multilingual_v2"` | ElevenLabs Model ID |
|
||||
| `openai.apiKey` | string | - | OpenAI API key |
|
||||
| `openai.model` | string | `"tts-1"` | OpenAI model (`tts-1` or `tts-1-hd`) |
|
||||
| `openai.model` | string | `"gpt-4o-mini-tts"` | OpenAI model (`gpt-4o-mini-tts`, `tts-1`, or `tts-1-hd`) |
|
||||
| `openai.voice` | string | `"alloy"` | OpenAI voice |
|
||||
| `prefsPath` | string | `~/clawd/.user-preferences.json` | User preferences file |
|
||||
| `maxTextLength` | number | `4000` | Max characters for TTS |
|
||||
| `timeoutMs` | number | `30000` | API request timeout in milliseconds |
|
||||
|
||||
### OpenAI Voices
|
||||
|
||||
Available voices: `alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer`
|
||||
Available voices: `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`
|
||||
|
||||
## Usage
|
||||
|
||||
@@ -114,23 +118,28 @@ clawdbot gateway call tts.providers
|
||||
|
||||
### Telegram Commands
|
||||
|
||||
Add custom commands to toggle TTS mode:
|
||||
The plugin registers the following commands automatically:
|
||||
|
||||
```json
|
||||
{
|
||||
"channels": {
|
||||
"telegram": {
|
||||
"customCommands": [
|
||||
{"command": "tts_on", "description": "Enable voice responses"},
|
||||
{"command": "tts_off", "description": "Disable voice responses"},
|
||||
{"command": "audio", "description": "Send response as voice message"}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/tts_on` | Enable auto-TTS for all responses |
|
||||
| `/tts_off` | Disable auto-TTS |
|
||||
| `/tts_provider [openai\|elevenlabs]` | Switch TTS provider (with fallback) |
|
||||
| `/tts_limit [chars]` | Set max text length before summarization (default: 1500) |
|
||||
| `/tts_summary [on\|off]` | Enable/disable auto-summarization for long texts |
|
||||
| `/tts_status` | Show TTS status, config, and last attempt result |
|
||||
|
||||
Then add handling instructions to your agent workspace (CLAUDE.md or TOOLS.md).
|
||||
## Auto-Summarization
|
||||
|
||||
When enabled (default), texts exceeding the configured limit are automatically summarized using OpenAI's gpt-4o-mini before TTS conversion. This ensures long responses can still be converted to audio.
|
||||
|
||||
**Requirements**: OpenAI API key must be configured for summarization to work, even if using ElevenLabs for TTS.
|
||||
|
||||
**Behavior**:
|
||||
- Texts under the limit are converted directly
|
||||
- Texts over the limit are summarized first, then converted
|
||||
- If summarization is disabled (`/tts_summary off`), long texts are skipped (no audio)
|
||||
- After summarization, a hard limit is applied to prevent oversized TTS requests
|
||||
|
||||
## License
|
||||
|
||||
|
||||
101
extensions/telegram-tts/index.test.ts
Normal file
101
extensions/telegram-tts/index.test.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
/**
|
||||
* Unit tests for telegram-tts extension
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { _test, meta } from "./index.js";
|
||||
|
||||
const { isValidVoiceId, isValidOpenAIVoice, isValidOpenAIModel, OPENAI_TTS_MODELS } = _test;
|
||||
|
||||
describe("telegram-tts", () => {
|
||||
describe("meta", () => {
|
||||
it("should have correct plugin metadata", () => {
|
||||
expect(meta.id).toBe("telegram-tts");
|
||||
expect(meta.name).toBe("Telegram TTS");
|
||||
expect(meta.version).toMatch(/^\d+\.\d+\.\d+$/);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isValidVoiceId", () => {
|
||||
it("should accept valid ElevenLabs voice IDs", () => {
|
||||
// Real ElevenLabs voice ID format (20 alphanumeric chars)
|
||||
expect(isValidVoiceId("pMsXgVXv3BLzUgSXRplE")).toBe(true);
|
||||
expect(isValidVoiceId("21m00Tcm4TlvDq8ikWAM")).toBe(true);
|
||||
expect(isValidVoiceId("EXAVITQu4vr4xnSDxMaL")).toBe(true);
|
||||
});
|
||||
|
||||
it("should accept voice IDs of varying valid lengths", () => {
|
||||
expect(isValidVoiceId("a1b2c3d4e5")).toBe(true); // 10 chars (min)
|
||||
expect(isValidVoiceId("a".repeat(40))).toBe(true); // 40 chars (max)
|
||||
});
|
||||
|
||||
it("should reject too short voice IDs", () => {
|
||||
expect(isValidVoiceId("")).toBe(false);
|
||||
expect(isValidVoiceId("abc")).toBe(false);
|
||||
expect(isValidVoiceId("123456789")).toBe(false); // 9 chars
|
||||
});
|
||||
|
||||
it("should reject too long voice IDs", () => {
|
||||
expect(isValidVoiceId("a".repeat(41))).toBe(false);
|
||||
expect(isValidVoiceId("a".repeat(100))).toBe(false);
|
||||
});
|
||||
|
||||
it("should reject voice IDs with invalid characters", () => {
|
||||
expect(isValidVoiceId("pMsXgVXv3BLz-gSXRplE")).toBe(false); // hyphen
|
||||
expect(isValidVoiceId("pMsXgVXv3BLz_gSXRplE")).toBe(false); // underscore
|
||||
expect(isValidVoiceId("pMsXgVXv3BLz gSXRplE")).toBe(false); // space
|
||||
expect(isValidVoiceId("../../../etc/passwd")).toBe(false); // path traversal
|
||||
expect(isValidVoiceId("voice?param=value")).toBe(false); // query string
|
||||
});
|
||||
});
|
||||
|
||||
describe("isValidOpenAIVoice", () => {
|
||||
it("should accept all valid OpenAI voices", () => {
|
||||
const validVoices = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"];
|
||||
for (const voice of validVoices) {
|
||||
expect(isValidOpenAIVoice(voice)).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
it("should reject invalid voice names", () => {
|
||||
expect(isValidOpenAIVoice("invalid")).toBe(false);
|
||||
expect(isValidOpenAIVoice("")).toBe(false);
|
||||
expect(isValidOpenAIVoice("ALLOY")).toBe(false); // case sensitive
|
||||
expect(isValidOpenAIVoice("alloy ")).toBe(false); // trailing space
|
||||
expect(isValidOpenAIVoice(" alloy")).toBe(false); // leading space
|
||||
});
|
||||
});
|
||||
|
||||
describe("isValidOpenAIModel", () => {
|
||||
it("should accept standard OpenAI TTS models", () => {
|
||||
expect(isValidOpenAIModel("gpt-4o-mini-tts")).toBe(true);
|
||||
expect(isValidOpenAIModel("tts-1")).toBe(true);
|
||||
expect(isValidOpenAIModel("tts-1-hd")).toBe(true);
|
||||
});
|
||||
|
||||
it("should accept gpt-4o-mini-tts variants", () => {
|
||||
expect(isValidOpenAIModel("gpt-4o-mini-tts-2025-12-15")).toBe(true);
|
||||
expect(isValidOpenAIModel("gpt-4o-mini-tts-preview")).toBe(true);
|
||||
});
|
||||
|
||||
it("should reject invalid model names", () => {
|
||||
expect(isValidOpenAIModel("invalid")).toBe(false);
|
||||
expect(isValidOpenAIModel("")).toBe(false);
|
||||
expect(isValidOpenAIModel("tts-2")).toBe(false);
|
||||
expect(isValidOpenAIModel("gpt-4")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("OPENAI_TTS_MODELS", () => {
|
||||
it("should contain the expected models", () => {
|
||||
expect(OPENAI_TTS_MODELS).toContain("gpt-4o-mini-tts");
|
||||
expect(OPENAI_TTS_MODELS).toContain("tts-1");
|
||||
expect(OPENAI_TTS_MODELS).toContain("tts-1-hd");
|
||||
});
|
||||
|
||||
it("should be a non-empty array", () => {
|
||||
expect(Array.isArray(OPENAI_TTS_MODELS)).toBe(true);
|
||||
expect(OPENAI_TTS_MODELS.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -13,7 +13,7 @@
|
||||
* via Telegram customCommands and handled by the agent workspace.
|
||||
*/
|
||||
|
||||
import { existsSync, readFileSync, writeFileSync, mkdtempSync, rmSync } from "fs";
|
||||
import { existsSync, readFileSync, writeFileSync, mkdtempSync, rmSync, renameSync, unlinkSync } from "fs";
|
||||
import { join } from "path";
|
||||
import { tmpdir } from "os";
|
||||
import type { PluginApi } from "clawdbot";
|
||||
@@ -49,17 +49,35 @@ interface UserPreferences {
|
||||
enabled?: boolean;
|
||||
provider?: "openai" | "elevenlabs";
|
||||
maxLength?: number; // Max chars before summarizing (default 1500)
|
||||
summarize?: boolean; // Enable auto-summarization (default true)
|
||||
};
|
||||
}
|
||||
|
||||
const DEFAULT_TTS_MAX_LENGTH = 1500;
|
||||
const DEFAULT_TTS_SUMMARIZE = true;
|
||||
|
||||
interface TtsResult {
|
||||
success: boolean;
|
||||
audioPath?: string;
|
||||
error?: string;
|
||||
latencyMs?: number;
|
||||
provider?: string;
|
||||
}
|
||||
|
||||
interface TtsStatusEntry {
|
||||
timestamp: number;
|
||||
success: boolean;
|
||||
textLength: number;
|
||||
summarized: boolean;
|
||||
provider?: string;
|
||||
latencyMs?: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
// Track last TTS attempt for diagnostics (global, not per-user)
|
||||
// Note: This shows the most recent TTS attempt system-wide, not user-specific
|
||||
let lastTtsAttempt: TtsStatusEntry | undefined;
|
||||
|
||||
// =============================================================================
|
||||
// Validation
|
||||
// =============================================================================
|
||||
@@ -118,7 +136,27 @@ function isTtsEnabled(prefsPath: string): boolean {
|
||||
}
|
||||
}
|
||||
|
||||
function setTtsEnabled(prefsPath: string, enabled: boolean): void {
|
||||
/**
|
||||
* Atomically writes to a file using temp file + rename pattern.
|
||||
* Prevents race conditions when multiple processes write simultaneously.
|
||||
*/
|
||||
function atomicWriteFileSync(filePath: string, content: string): void {
|
||||
const tmpPath = `${filePath}.tmp.${Date.now()}.${Math.random().toString(36).slice(2)}`;
|
||||
writeFileSync(tmpPath, content);
|
||||
try {
|
||||
renameSync(tmpPath, filePath);
|
||||
} catch (err) {
|
||||
// Clean up temp file on rename failure
|
||||
try {
|
||||
unlinkSync(tmpPath);
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
function updatePrefs(prefsPath: string, update: (prefs: UserPreferences) => void): void {
|
||||
let prefs: UserPreferences = {};
|
||||
try {
|
||||
if (existsSync(prefsPath)) {
|
||||
@@ -127,8 +165,14 @@ function setTtsEnabled(prefsPath: string, enabled: boolean): void {
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
prefs.tts = { ...prefs.tts, enabled };
|
||||
writeFileSync(prefsPath, JSON.stringify(prefs, null, 2));
|
||||
update(prefs);
|
||||
atomicWriteFileSync(prefsPath, JSON.stringify(prefs, null, 2));
|
||||
}
|
||||
|
||||
function setTtsEnabled(prefsPath: string, enabled: boolean): void {
|
||||
updatePrefs(prefsPath, (prefs) => {
|
||||
prefs.tts = { ...prefs.tts, enabled };
|
||||
});
|
||||
}
|
||||
|
||||
function getTtsProvider(prefsPath: string): "openai" | "elevenlabs" | undefined {
|
||||
@@ -142,16 +186,9 @@ function getTtsProvider(prefsPath: string): "openai" | "elevenlabs" | undefined
|
||||
}
|
||||
|
||||
function setTtsProvider(prefsPath: string, provider: "openai" | "elevenlabs"): void {
|
||||
let prefs: UserPreferences = {};
|
||||
try {
|
||||
if (existsSync(prefsPath)) {
|
||||
prefs = JSON.parse(readFileSync(prefsPath, "utf8"));
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
prefs.tts = { ...prefs.tts, provider };
|
||||
writeFileSync(prefsPath, JSON.stringify(prefs, null, 2));
|
||||
updatePrefs(prefsPath, (prefs) => {
|
||||
prefs.tts = { ...prefs.tts, provider };
|
||||
});
|
||||
}
|
||||
|
||||
function getTtsMaxLength(prefsPath: string): number {
|
||||
@@ -165,33 +202,50 @@ function getTtsMaxLength(prefsPath: string): number {
|
||||
}
|
||||
|
||||
function setTtsMaxLength(prefsPath: string, maxLength: number): void {
|
||||
let prefs: UserPreferences = {};
|
||||
updatePrefs(prefsPath, (prefs) => {
|
||||
prefs.tts = { ...prefs.tts, maxLength };
|
||||
});
|
||||
}
|
||||
|
||||
function isSummarizationEnabled(prefsPath: string): boolean {
|
||||
try {
|
||||
if (existsSync(prefsPath)) {
|
||||
prefs = JSON.parse(readFileSync(prefsPath, "utf8"));
|
||||
}
|
||||
if (!existsSync(prefsPath)) return DEFAULT_TTS_SUMMARIZE;
|
||||
const prefs: UserPreferences = JSON.parse(readFileSync(prefsPath, "utf8"));
|
||||
return prefs?.tts?.summarize ?? DEFAULT_TTS_SUMMARIZE;
|
||||
} catch {
|
||||
// ignore
|
||||
return DEFAULT_TTS_SUMMARIZE;
|
||||
}
|
||||
prefs.tts = { ...prefs.tts, maxLength };
|
||||
writeFileSync(prefsPath, JSON.stringify(prefs, null, 2));
|
||||
}
|
||||
|
||||
function setSummarizationEnabled(prefsPath: string, enabled: boolean): void {
|
||||
updatePrefs(prefsPath, (prefs) => {
|
||||
prefs.tts = { ...prefs.tts, summarize: enabled };
|
||||
});
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Text Summarization (for long texts)
|
||||
// =============================================================================
|
||||
|
||||
interface SummarizeResult {
|
||||
summary: string;
|
||||
latencyMs: number;
|
||||
inputLength: number;
|
||||
outputLength: number;
|
||||
}
|
||||
|
||||
async function summarizeText(
|
||||
text: string,
|
||||
targetLength: number,
|
||||
apiKey: string,
|
||||
timeoutMs: number = 30000
|
||||
): Promise<string> {
|
||||
): Promise<SummarizeResult> {
|
||||
// Validate targetLength
|
||||
if (targetLength < 100 || targetLength > 10000) {
|
||||
throw new Error(`Invalid targetLength: ${targetLength}`);
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
@@ -233,7 +287,13 @@ async function summarizeText(
|
||||
throw new Error("No summary returned");
|
||||
}
|
||||
|
||||
return summary;
|
||||
const latencyMs = Date.now() - startTime;
|
||||
return {
|
||||
summary,
|
||||
latencyMs,
|
||||
inputLength: text.length,
|
||||
outputLength: summary.length,
|
||||
};
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
@@ -262,13 +322,14 @@ function getApiKey(config: TtsConfig, provider: string): string | undefined {
|
||||
* This ensures the file is consumed before deletion.
|
||||
*/
|
||||
function scheduleCleanup(tempDir: string, delayMs: number = TEMP_FILE_CLEANUP_DELAY_MS): void {
|
||||
setTimeout(() => {
|
||||
const timer = setTimeout(() => {
|
||||
try {
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
}, delayMs);
|
||||
timer.unref(); // Allow process to exit without waiting for cleanup
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
@@ -401,6 +462,7 @@ async function textToSpeech(text: string, config: TtsConfig, prefsPath?: string)
|
||||
continue;
|
||||
}
|
||||
|
||||
const providerStartTime = Date.now();
|
||||
try {
|
||||
let audioBuffer: Buffer;
|
||||
|
||||
@@ -425,6 +487,8 @@ async function textToSpeech(text: string, config: TtsConfig, prefsPath?: string)
|
||||
continue;
|
||||
}
|
||||
|
||||
const latencyMs = Date.now() - providerStartTime;
|
||||
|
||||
// Save to temp file
|
||||
const tempDir = mkdtempSync(join(tmpdir(), "tts-"));
|
||||
const audioPath = join(tempDir, `voice-${Date.now()}.mp3`);
|
||||
@@ -433,7 +497,7 @@ async function textToSpeech(text: string, config: TtsConfig, prefsPath?: string)
|
||||
// Schedule cleanup after delay (file should be consumed by then)
|
||||
scheduleCleanup(tempDir);
|
||||
|
||||
return { success: true, audioPath };
|
||||
return { success: true, audioPath, latencyMs, provider };
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
if (error.name === "AbortError") {
|
||||
@@ -735,6 +799,84 @@ Do NOT add extra text around the MEDIA directive.`,
|
||||
},
|
||||
});
|
||||
|
||||
// /tts_summary [on|off] - Enable/disable auto-summarization
|
||||
api.registerCommand({
|
||||
name: "tts_summary",
|
||||
description: "Enable or disable auto-summarization for long texts",
|
||||
acceptsArgs: true,
|
||||
handler: (ctx) => {
|
||||
const arg = ctx.args?.trim().toLowerCase();
|
||||
const currentEnabled = isSummarizationEnabled(prefsPath);
|
||||
const maxLength = getTtsMaxLength(prefsPath);
|
||||
|
||||
if (!arg) {
|
||||
// Show current status
|
||||
return {
|
||||
text: `📝 **Auto-Resumo TTS**\n\n` +
|
||||
`Status: ${currentEnabled ? "✅ Ativado" : "❌ Desativado"}\n` +
|
||||
`Limite: ${maxLength} caracteres\n\n` +
|
||||
`Quando ativado, textos maiores que ${maxLength} chars são resumidos com gpt-4o-mini antes de virar áudio.\n\n` +
|
||||
`Uso: /tts_summary on ou /tts_summary off`,
|
||||
};
|
||||
}
|
||||
|
||||
if (arg !== "on" && arg !== "off") {
|
||||
return { text: "❌ Use: /tts_summary on ou /tts_summary off" };
|
||||
}
|
||||
|
||||
const newEnabled = arg === "on";
|
||||
setSummarizationEnabled(prefsPath, newEnabled);
|
||||
log.info(`[${PLUGIN_ID}] Summarization ${newEnabled ? "enabled" : "disabled"} via /tts_summary command`);
|
||||
return {
|
||||
text: newEnabled
|
||||
? `✅ Auto-resumo **ativado**!\n\nTextos longos serão resumidos antes de virar áudio.`
|
||||
: `❌ Auto-resumo **desativado**!\n\nTextos longos serão ignorados (sem áudio).`,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
// /tts_status - Show TTS status and last attempt result
|
||||
api.registerCommand({
|
||||
name: "tts_status",
|
||||
description: "Show TTS status, configuration, and last attempt result",
|
||||
acceptsArgs: false,
|
||||
handler: () => {
|
||||
const enabled = isTtsEnabled(prefsPath);
|
||||
const userProvider = getTtsProvider(prefsPath);
|
||||
const activeProvider = userProvider || config.provider || "openai";
|
||||
const maxLength = getTtsMaxLength(prefsPath);
|
||||
const summarizationEnabled = isSummarizationEnabled(prefsPath);
|
||||
const hasKey = !!getApiKey(config, activeProvider);
|
||||
|
||||
let statusLines = [
|
||||
`📊 **Status TTS**\n`,
|
||||
`Estado: ${enabled ? "✅ Ativado" : "❌ Desativado"}`,
|
||||
`Provedor: ${activeProvider} (API Key: ${hasKey ? "✅" : "❌"})`,
|
||||
`Limite de texto: ${maxLength} caracteres`,
|
||||
`Auto-resumo: ${summarizationEnabled ? "✅ Ativado" : "❌ Desativado"}`,
|
||||
];
|
||||
|
||||
if (lastTtsAttempt) {
|
||||
const timeAgo = Math.round((Date.now() - lastTtsAttempt.timestamp) / 1000);
|
||||
statusLines.push(``);
|
||||
statusLines.push(`**Última tentativa** (há ${timeAgo}s):`);
|
||||
statusLines.push(`Resultado: ${lastTtsAttempt.success ? "✅ Sucesso" : "❌ Falha"}`);
|
||||
statusLines.push(`Texto: ${lastTtsAttempt.textLength} chars${lastTtsAttempt.summarized ? " (resumido)" : ""}`);
|
||||
if (lastTtsAttempt.success) {
|
||||
statusLines.push(`Provedor: ${lastTtsAttempt.provider}`);
|
||||
statusLines.push(`Latência: ${lastTtsAttempt.latencyMs}ms`);
|
||||
} else if (lastTtsAttempt.error) {
|
||||
statusLines.push(`Erro: ${lastTtsAttempt.error}`);
|
||||
}
|
||||
} else {
|
||||
statusLines.push(``);
|
||||
statusLines.push(`_Nenhuma tentativa de TTS registrada nesta sessão._`);
|
||||
}
|
||||
|
||||
return { text: statusLines.join("\n") };
|
||||
},
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Auto-TTS Hook (message_sending)
|
||||
// ===========================================================================
|
||||
@@ -763,9 +905,15 @@ Do NOT add extra text around the MEDIA directive.`,
|
||||
|
||||
const maxLength = getTtsMaxLength(prefsPath);
|
||||
let textForAudio = content;
|
||||
const summarizationEnabled = isSummarizationEnabled(prefsPath);
|
||||
|
||||
// If text exceeds limit, summarize it first
|
||||
// If text exceeds limit, summarize it first (if enabled)
|
||||
if (content.length > maxLength) {
|
||||
if (!summarizationEnabled) {
|
||||
log.info(`[${PLUGIN_ID}] Auto-TTS: Text too long (${content.length} > ${maxLength}), summarization disabled, skipping audio`);
|
||||
return; // User disabled summarization, skip audio for long texts
|
||||
}
|
||||
|
||||
log.info(`[${PLUGIN_ID}] Auto-TTS: Text too long (${content.length} > ${maxLength}), summarizing...`);
|
||||
|
||||
const openaiKey = getApiKey(config, "openai");
|
||||
@@ -775,8 +923,11 @@ Do NOT add extra text around the MEDIA directive.`,
|
||||
}
|
||||
|
||||
try {
|
||||
textForAudio = await summarizeText(content, maxLength, openaiKey, config.timeoutMs);
|
||||
log.info(`[${PLUGIN_ID}] Auto-TTS: Summarized to ${textForAudio.length} chars`);
|
||||
const summarizeResult = await summarizeText(content, maxLength, openaiKey, config.timeoutMs);
|
||||
textForAudio = summarizeResult.summary;
|
||||
log.info(
|
||||
`[${PLUGIN_ID}] Auto-TTS: Summarized ${summarizeResult.inputLength} → ${summarizeResult.outputLength} chars in ${summarizeResult.latencyMs}ms`
|
||||
);
|
||||
|
||||
// Safeguard: if summary still exceeds hard limit, truncate
|
||||
const hardLimit = config.maxTextLength || 4000;
|
||||
@@ -793,24 +944,61 @@ Do NOT add extra text around the MEDIA directive.`,
|
||||
log.info(`[${PLUGIN_ID}] Auto-TTS: Converting ${content.length} chars`);
|
||||
}
|
||||
|
||||
const wasSummarized = textForAudio !== content;
|
||||
|
||||
try {
|
||||
const ttsStartTime = Date.now();
|
||||
const result = await textToSpeech(textForAudio, config, prefsPath);
|
||||
|
||||
if (result.success && result.audioPath) {
|
||||
log.info(`[${PLUGIN_ID}] Auto-TTS: Audio generated: ${result.audioPath}`);
|
||||
const totalLatency = Date.now() - ttsStartTime;
|
||||
log.info(
|
||||
`[${PLUGIN_ID}] Auto-TTS: Generated via ${result.provider} in ${result.latencyMs}ms (total: ${totalLatency}ms)`
|
||||
);
|
||||
|
||||
// Track successful attempt
|
||||
lastTtsAttempt = {
|
||||
timestamp: Date.now(),
|
||||
success: true,
|
||||
textLength: content.length,
|
||||
summarized: wasSummarized,
|
||||
provider: result.provider,
|
||||
latencyMs: result.latencyMs,
|
||||
};
|
||||
|
||||
// Return modified content with MEDIA directive
|
||||
// The text is kept for accessibility, audio is appended
|
||||
return {
|
||||
content: `MEDIA:${result.audioPath}`,
|
||||
};
|
||||
} else {
|
||||
log.warn(`[${PLUGIN_ID}] Auto-TTS: Failed - ${result.error}`);
|
||||
log.warn(`[${PLUGIN_ID}] Auto-TTS: TTS conversion failed - ${result.error}`);
|
||||
|
||||
// Track failed attempt
|
||||
lastTtsAttempt = {
|
||||
timestamp: Date.now(),
|
||||
success: false,
|
||||
textLength: content.length,
|
||||
summarized: wasSummarized,
|
||||
error: result.error,
|
||||
};
|
||||
|
||||
// On failure, send original text without audio
|
||||
return;
|
||||
}
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
log.error(`[${PLUGIN_ID}] Auto-TTS error: ${error.message}`);
|
||||
log.error(`[${PLUGIN_ID}] Auto-TTS: Unexpected error - ${error.message}`);
|
||||
|
||||
// Track error
|
||||
lastTtsAttempt = {
|
||||
timestamp: Date.now(),
|
||||
success: false,
|
||||
textLength: content.length,
|
||||
summarized: wasSummarized,
|
||||
error: error.message,
|
||||
};
|
||||
|
||||
// On error, send original text
|
||||
return;
|
||||
}
|
||||
@@ -844,3 +1032,14 @@ export const meta = {
|
||||
description: "Text-to-speech for chat responses using ElevenLabs or OpenAI",
|
||||
version: "0.3.0",
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Test Exports (for unit testing)
|
||||
// =============================================================================
|
||||
|
||||
export const _test = {
|
||||
isValidVoiceId,
|
||||
isValidOpenAIVoice,
|
||||
isValidOpenAIModel,
|
||||
OPENAI_TTS_MODELS,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user