import crypto from "node:crypto"; import type { TwilioConfig } from "../config.js"; import type { MediaStreamHandler } from "../media-stream.js"; import type { HangupCallInput, InitiateCallInput, InitiateCallResult, NormalizedEvent, PlayTtsInput, ProviderWebhookParseResult, StartListeningInput, StopListeningInput, WebhookContext, WebhookVerificationResult, } from "../types.js"; import { escapeXml, mapVoiceToPolly } from "../voice-mapping.js"; import type { VoiceCallProvider } from "./base.js"; import type { OpenAITTSProvider } from "./tts-openai.js"; import { chunkAudio } from "./tts-openai.js"; import { twilioApiRequest } from "./twilio/api.js"; import { verifyTwilioProviderWebhook } from "./twilio/webhook.js"; /** * Twilio Voice API provider implementation. * * Uses Twilio Programmable Voice API with Media Streams for real-time * bidirectional audio streaming. * * @see https://www.twilio.com/docs/voice * @see https://www.twilio.com/docs/voice/media-streams */ export interface TwilioProviderOptions { /** Allow ngrok free tier compatibility mode (less secure) */ allowNgrokFreeTier?: boolean; /** Override public URL for signature verification */ publicUrl?: string; /** Path for media stream WebSocket (e.g., /voice/stream) */ streamPath?: string; /** Skip webhook signature verification (development only) */ skipVerification?: boolean; } export class TwilioProvider implements VoiceCallProvider { readonly name = "twilio" as const; private readonly accountSid: string; private readonly authToken: string; private readonly baseUrl: string; private readonly callWebhookUrls = new Map(); private readonly options: TwilioProviderOptions; /** Current public webhook URL (set when tunnel starts or from config) */ private currentPublicUrl: string | null = null; /** Optional OpenAI TTS provider for streaming TTS */ private ttsProvider: OpenAITTSProvider | null = null; /** Optional media stream handler for sending audio */ private mediaStreamHandler: MediaStreamHandler | null = null; /** Map of call SID to stream SID for media streams */ private callStreamMap = new Map(); /** Storage for TwiML content (for notify mode with URL-based TwiML) */ private readonly twimlStorage = new Map(); /** Track notify-mode calls to avoid streaming on follow-up callbacks */ private readonly notifyCalls = new Set(); /** * Delete stored TwiML for a given `callId`. * * We keep TwiML in-memory only long enough to satisfy the initial Twilio * webhook request (notify mode). Subsequent webhooks should not reuse it. */ private deleteStoredTwiml(callId: string): void { this.twimlStorage.delete(callId); this.notifyCalls.delete(callId); } /** * Delete stored TwiML for a call, addressed by Twilio's provider call SID. * * This is used when we only have `providerCallId` (e.g. hangup). */ private deleteStoredTwimlForProviderCall(providerCallId: string): void { const webhookUrl = this.callWebhookUrls.get(providerCallId); if (!webhookUrl) return; const callIdMatch = webhookUrl.match(/callId=([^&]+)/); if (!callIdMatch) return; this.deleteStoredTwiml(callIdMatch[1]); } constructor(config: TwilioConfig, options: TwilioProviderOptions = {}) { if (!config.accountSid) { throw new Error("Twilio Account SID is required"); } if (!config.authToken) { throw new Error("Twilio Auth Token is required"); } this.accountSid = config.accountSid; this.authToken = config.authToken; this.baseUrl = `https://api.twilio.com/2010-04-01/Accounts/${this.accountSid}`; this.options = options; if (options.publicUrl) { this.currentPublicUrl = options.publicUrl; } } setPublicUrl(url: string): void { this.currentPublicUrl = url; } getPublicUrl(): string | null { return this.currentPublicUrl; } setTTSProvider(provider: OpenAITTSProvider): void { this.ttsProvider = provider; } setMediaStreamHandler(handler: MediaStreamHandler): void { this.mediaStreamHandler = handler; } registerCallStream(callSid: string, streamSid: string): void { this.callStreamMap.set(callSid, streamSid); } unregisterCallStream(callSid: string): void { this.callStreamMap.delete(callSid); } /** * Make an authenticated request to the Twilio API. */ private async apiRequest( endpoint: string, params: Record, options?: { allowNotFound?: boolean }, ): Promise { return await twilioApiRequest({ baseUrl: this.baseUrl, accountSid: this.accountSid, authToken: this.authToken, endpoint, body: params, allowNotFound: options?.allowNotFound, }); } /** * Verify Twilio webhook signature using HMAC-SHA1. * * Handles reverse proxy scenarios (Tailscale, nginx, ngrok) by reconstructing * the public URL from forwarding headers. * * @see https://www.twilio.com/docs/usage/webhooks/webhooks-security */ verifyWebhook(ctx: WebhookContext): WebhookVerificationResult { return verifyTwilioProviderWebhook({ ctx, authToken: this.authToken, currentPublicUrl: this.currentPublicUrl, options: this.options, }); } /** * Parse Twilio webhook event into normalized format. */ parseWebhookEvent(ctx: WebhookContext): ProviderWebhookParseResult { try { const params = new URLSearchParams(ctx.rawBody); const callIdFromQuery = typeof ctx.query?.callId === "string" && ctx.query.callId.trim() ? ctx.query.callId.trim() : undefined; const event = this.normalizeEvent(params, callIdFromQuery); // For Twilio, we must return TwiML. Most actions are driven by Calls API updates, // so the webhook response is typically a pause to keep the call alive. const twiml = this.generateTwimlResponse(ctx); return { events: event ? [event] : [], providerResponseBody: twiml, providerResponseHeaders: { "Content-Type": "application/xml" }, statusCode: 200, }; } catch { return { events: [], statusCode: 400 }; } } /** * Parse Twilio direction to normalized format. */ private static parseDirection( direction: string | null, ): "inbound" | "outbound" | undefined { if (direction === "inbound") return "inbound"; if (direction === "outbound-api" || direction === "outbound-dial") return "outbound"; return undefined; } /** * Convert Twilio webhook params to normalized event format. */ private normalizeEvent( params: URLSearchParams, callIdOverride?: string, ): NormalizedEvent | null { const callSid = params.get("CallSid") || ""; const baseEvent = { id: crypto.randomUUID(), callId: callIdOverride || callSid, providerCallId: callSid, timestamp: Date.now(), direction: TwilioProvider.parseDirection(params.get("Direction")), from: params.get("From") || undefined, to: params.get("To") || undefined, }; // Handle speech result (from ) const speechResult = params.get("SpeechResult"); if (speechResult) { return { ...baseEvent, type: "call.speech", transcript: speechResult, isFinal: true, confidence: parseFloat(params.get("Confidence") || "0.9"), }; } // Handle DTMF const digits = params.get("Digits"); if (digits) { return { ...baseEvent, type: "call.dtmf", digits }; } // Handle call status changes const callStatus = params.get("CallStatus"); switch (callStatus) { case "initiated": return { ...baseEvent, type: "call.initiated" }; case "ringing": return { ...baseEvent, type: "call.ringing" }; case "in-progress": return { ...baseEvent, type: "call.answered" }; case "completed": case "busy": case "no-answer": case "failed": if (callIdOverride) { this.deleteStoredTwiml(callIdOverride); } return { ...baseEvent, type: "call.ended", reason: callStatus }; case "canceled": if (callIdOverride) { this.deleteStoredTwiml(callIdOverride); } return { ...baseEvent, type: "call.ended", reason: "hangup-bot" }; default: return null; } } private static readonly EMPTY_TWIML = ''; private static readonly PAUSE_TWIML = ` `; /** * Generate TwiML response for webhook. * When a call is answered, connects to media stream for bidirectional audio. */ private generateTwimlResponse(ctx?: WebhookContext): string { if (!ctx) return TwilioProvider.EMPTY_TWIML; const params = new URLSearchParams(ctx.rawBody); const type = typeof ctx.query?.type === "string" ? ctx.query.type.trim() : undefined; const isStatusCallback = type === "status"; const callStatus = params.get("CallStatus"); const direction = params.get("Direction"); const callIdFromQuery = typeof ctx.query?.callId === "string" && ctx.query.callId.trim() ? ctx.query.callId.trim() : undefined; // Avoid logging webhook params/TwiML (may contain PII). // Handle initial TwiML request (when Twilio first initiates the call) // Check if we have stored TwiML for this call (notify mode) if (callIdFromQuery && !isStatusCallback) { const storedTwiml = this.twimlStorage.get(callIdFromQuery); if (storedTwiml) { // Clean up after serving (one-time use) this.deleteStoredTwiml(callIdFromQuery); return storedTwiml; } if (this.notifyCalls.has(callIdFromQuery)) { return TwilioProvider.EMPTY_TWIML; } } // Status callbacks should not receive TwiML. if (isStatusCallback) { return TwilioProvider.EMPTY_TWIML; } // Handle subsequent webhook requests (status callbacks, etc.) // For inbound calls, answer immediately with stream if (direction === "inbound") { const streamUrl = this.getStreamUrl(); return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML; } // For outbound calls, only connect to stream when call is in-progress if (callStatus !== "in-progress") { return TwilioProvider.EMPTY_TWIML; } const streamUrl = this.getStreamUrl(); return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML; } /** * Get the WebSocket URL for media streaming. * Derives from the public URL origin + stream path. */ private getStreamUrl(): string | null { if (!this.currentPublicUrl || !this.options.streamPath) { return null; } // Extract just the origin (host) from the public URL, ignoring any path const url = new URL(this.currentPublicUrl); const origin = url.origin; // Convert https:// to wss:// for WebSocket const wsOrigin = origin .replace(/^https:\/\//, "wss://") .replace(/^http:\/\//, "ws://"); // Append the stream path const path = this.options.streamPath.startsWith("/") ? this.options.streamPath : `/${this.options.streamPath}`; return `${wsOrigin}${path}`; } /** * Generate TwiML to connect a call to a WebSocket media stream. * This enables bidirectional audio streaming for real-time STT/TTS. * * @param streamUrl - WebSocket URL (wss://...) for the media stream */ getStreamConnectXml(streamUrl: string): string { return ` `; } /** * Initiate an outbound call via Twilio API. * If inlineTwiml is provided, uses that directly (for notify mode). * Otherwise, uses webhook URL for dynamic TwiML. */ async initiateCall(input: InitiateCallInput): Promise { const url = new URL(input.webhookUrl); url.searchParams.set("callId", input.callId); // Create separate URL for status callbacks (required by Twilio) const statusUrl = new URL(input.webhookUrl); statusUrl.searchParams.set("callId", input.callId); statusUrl.searchParams.set("type", "status"); // Differentiate from TwiML requests // Store TwiML content if provided (for notify mode) // We now serve it from the webhook endpoint instead of sending inline if (input.inlineTwiml) { this.twimlStorage.set(input.callId, input.inlineTwiml); this.notifyCalls.add(input.callId); } // Build request params - always use URL-based TwiML. // Twilio silently ignores `StatusCallback` when using the inline `Twiml` parameter. const params: Record = { To: input.to, From: input.from, Url: url.toString(), // TwiML serving endpoint StatusCallback: statusUrl.toString(), // Separate status callback endpoint StatusCallbackEvent: ["initiated", "ringing", "answered", "completed"], Timeout: "30", }; const result = await this.apiRequest( "/Calls.json", params, ); this.callWebhookUrls.set(result.sid, url.toString()); return { providerCallId: result.sid, status: result.status === "queued" ? "queued" : "initiated", }; } /** * Hang up a call via Twilio API. */ async hangupCall(input: HangupCallInput): Promise { this.deleteStoredTwimlForProviderCall(input.providerCallId); this.callWebhookUrls.delete(input.providerCallId); await this.apiRequest( `/Calls/${input.providerCallId}.json`, { Status: "completed" }, { allowNotFound: true }, ); } /** * Play TTS audio via Twilio. * * Two modes: * 1. OpenAI TTS + Media Streams: If TTS provider and media stream are available, * generates audio via OpenAI and streams it through WebSocket (preferred). * 2. TwiML : Falls back to Twilio's native TTS with Polly voices. * Note: This may not work on all Twilio accounts. */ async playTts(input: PlayTtsInput): Promise { // Try OpenAI TTS via media stream first (if configured) const streamSid = this.callStreamMap.get(input.providerCallId); if (this.ttsProvider && this.mediaStreamHandler && streamSid) { try { await this.playTtsViaStream(input.text, streamSid); return; } catch (err) { console.warn( `[voice-call] OpenAI TTS failed, falling back to Twilio :`, err instanceof Error ? err.message : err, ); // Fall through to TwiML fallback } } // Fall back to TwiML (may not work on all accounts) const webhookUrl = this.callWebhookUrls.get(input.providerCallId); if (!webhookUrl) { throw new Error( "Missing webhook URL for this call (provider state not initialized)", ); } console.warn( "[voice-call] Using TwiML fallback - OpenAI TTS not configured or media stream not active", ); const pollyVoice = mapVoiceToPolly(input.voice); const twiml = ` ${escapeXml(input.text)} . `; await this.apiRequest(`/Calls/${input.providerCallId}.json`, { Twiml: twiml, }); } /** * Play TTS via OpenAI and Twilio Media Streams. * Generates audio with OpenAI TTS, converts to mu-law, and streams via WebSocket. * Uses a jitter buffer to smooth out timing variations. */ private async playTtsViaStream( text: string, streamSid: string, ): Promise { if (!this.ttsProvider || !this.mediaStreamHandler) { throw new Error("TTS provider and media stream handler required"); } // Generate audio with OpenAI TTS (returns mu-law at 8kHz) const muLawAudio = await this.ttsProvider.synthesizeForTwilio(text); // Stream audio in 20ms chunks (160 bytes at 8kHz mu-law) const CHUNK_SIZE = 160; const CHUNK_DELAY_MS = 20; for (const chunk of chunkAudio(muLawAudio, CHUNK_SIZE)) { this.mediaStreamHandler.sendAudio(streamSid, chunk); // Pace the audio to match real-time playback await new Promise((resolve) => setTimeout(resolve, CHUNK_DELAY_MS)); } // Send a mark to track when audio finishes this.mediaStreamHandler.sendMark(streamSid, `tts-${Date.now()}`); } /** * Start listening for speech via Twilio . */ async startListening(input: StartListeningInput): Promise { const webhookUrl = this.callWebhookUrls.get(input.providerCallId); if (!webhookUrl) { throw new Error( "Missing webhook URL for this call (provider state not initialized)", ); } const twiml = ` `; await this.apiRequest(`/Calls/${input.providerCallId}.json`, { Twiml: twiml, }); } /** * Stop listening - for Twilio this is a no-op as auto-ends. */ async stopListening(_input: StopListeningInput): Promise { // Twilio's automatically stops on speech end // No explicit action needed } } // ----------------------------------------------------------------------------- // Twilio-specific types // ----------------------------------------------------------------------------- interface TwilioCallResponse { sid: string; status: string; direction: string; from: string; to: string; uri: string; }