From e43f4c0628228bd5ae3a9fe8774881f7c7753f65 Mon Sep 17 00:00:00 2001 From: techboss Date: Mon, 26 Jan 2026 15:25:27 -0700 Subject: [PATCH] fix(telegram): handle network errors gracefully - Add bot.catch() to prevent unhandled rejections from middleware - Add isRecoverableNetworkError() to retry on transient failures - Add maxRetryTime and exponential backoff to grammY runner - Global unhandled rejection handler now logs recoverable errors instead of crashing (fetch failures, timeouts, connection resets) Fixes crash loop when Telegram API is temporarily unreachable. --- src/infra/unhandled-rejections.ts | 37 +++++++++++++++++++++++++++++++ src/telegram/bot.ts | 6 +++++ src/telegram/fetch.ts | 12 ++++++++++ src/telegram/monitor.ts | 31 ++++++++++++++++++++++++-- 4 files changed, 84 insertions(+), 2 deletions(-) diff --git a/src/infra/unhandled-rejections.ts b/src/infra/unhandled-rejections.ts index c444baaa2..c45923c4b 100644 --- a/src/infra/unhandled-rejections.ts +++ b/src/infra/unhandled-rejections.ts @@ -13,6 +13,36 @@ export function registerUnhandledRejectionHandler(handler: UnhandledRejectionHan }; } +/** + * Check if an error is a recoverable/transient error that shouldn't crash the process. + * These include network errors and abort signals during shutdown. + */ +function isRecoverableError(reason: unknown): boolean { + if (!reason) return false; + + // Check error name for AbortError + if (reason instanceof Error && reason.name === "AbortError") { + return true; + } + + const message = reason instanceof Error ? reason.message : String(reason); + const lowerMessage = message.toLowerCase(); + return ( + lowerMessage.includes("fetch failed") || + lowerMessage.includes("network request") || + lowerMessage.includes("econnrefused") || + lowerMessage.includes("econnreset") || + lowerMessage.includes("etimedout") || + lowerMessage.includes("socket hang up") || + lowerMessage.includes("enotfound") || + lowerMessage.includes("network error") || + lowerMessage.includes("getaddrinfo") || + lowerMessage.includes("client network socket disconnected") || + lowerMessage.includes("this operation was aborted") || + lowerMessage.includes("aborted") + ); +} + export function isUnhandledRejectionHandled(reason: unknown): boolean { for (const handler of handlers) { try { @@ -30,6 +60,13 @@ export function isUnhandledRejectionHandled(reason: unknown): boolean { export function installUnhandledRejectionHandler(): void { process.on("unhandledRejection", (reason, _promise) => { if (isUnhandledRejectionHandled(reason)) return; + + // Don't crash on recoverable/transient errors - log them and continue + if (isRecoverableError(reason)) { + console.error("[clawdbot] Recoverable error (not crashing):", formatUncaughtError(reason)); + return; + } + console.error("[clawdbot] Unhandled promise rejection:", formatUncaughtError(reason)); process.exit(1); }); diff --git a/src/telegram/bot.ts b/src/telegram/bot.ts index d958d5616..d1996bade 100644 --- a/src/telegram/bot.ts +++ b/src/telegram/bot.ts @@ -138,6 +138,12 @@ export function createTelegramBot(opts: TelegramBotOptions) { bot.api.config.use(apiThrottler()); bot.use(sequentialize(getTelegramSequentialKey)); + // Catch all errors from bot middleware to prevent unhandled rejections + bot.catch((err) => { + const message = err instanceof Error ? err.message : String(err); + runtime.error?.(danger(`telegram bot error: ${message}`)); + }); + const recentUpdates = createTelegramUpdateDedupe(); let lastUpdateId = typeof opts.updateOffset?.lastUpdateId === "number" ? opts.updateOffset.lastUpdateId : null; diff --git a/src/telegram/fetch.ts b/src/telegram/fetch.ts index 7fdaef301..00a21be9b 100644 --- a/src/telegram/fetch.ts +++ b/src/telegram/fetch.ts @@ -1,5 +1,17 @@ +import { setDefaultAutoSelectFamily } from "net"; import { resolveFetch } from "../infra/fetch.js"; +// Workaround for Node.js 22 "Happy Eyeballs" (autoSelectFamily) bug +// that causes intermittent ETIMEDOUT errors when connecting to Telegram's +// dual-stack servers. Disabling autoSelectFamily forces sequential IPv4/IPv6 +// attempts which works reliably. +// See: https://github.com/nodejs/node/issues/54359 +try { + setDefaultAutoSelectFamily(false); +} catch { + // Ignore if not available (older Node versions) +} + // Prefer wrapped fetch when available to normalize AbortSignal across runtimes. export function resolveTelegramFetch(proxyFetch?: typeof fetch): typeof fetch | undefined { if (proxyFetch) return resolveFetch(proxyFetch); diff --git a/src/telegram/monitor.ts b/src/telegram/monitor.ts index 24c8743df..aeb5aae7c 100644 --- a/src/telegram/monitor.ts +++ b/src/telegram/monitor.ts @@ -40,6 +40,10 @@ export function createTelegramRunnerOptions(cfg: ClawdbotConfig): RunOptions { return haystack.includes("getupdates"); }; +const isRecoverableNetworkError = (err: unknown): boolean => { + if (!err) return false; + const message = err instanceof Error ? err.message : String(err); + const lowerMessage = message.toLowerCase(); + // Recoverable network errors that should trigger retry, not crash + return ( + lowerMessage.includes("fetch failed") || + lowerMessage.includes("network request") || + lowerMessage.includes("econnrefused") || + lowerMessage.includes("econnreset") || + lowerMessage.includes("etimedout") || + lowerMessage.includes("socket hang up") || + lowerMessage.includes("enotfound") || + lowerMessage.includes("abort") + ); +}; + export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) { const cfg = opts.config ?? loadConfig(); const account = resolveTelegramAccount({ @@ -152,12 +173,18 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) { if (opts.abortSignal?.aborted) { throw err; } - if (!isGetUpdatesConflict(err)) { + const isConflict = isGetUpdatesConflict(err); + const isNetworkError = isRecoverableNetworkError(err); + if (!isConflict && !isNetworkError) { throw err; } restartAttempts += 1; const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts); - log(`Telegram getUpdates conflict; retrying in ${formatDurationMs(delayMs)}.`); + const reason = isConflict ? "getUpdates conflict" : "network error"; + const errMsg = err instanceof Error ? err.message : String(err); + (opts.runtime?.error ?? console.error)( + `Telegram ${reason}: ${errMsg}; retrying in ${formatDurationMs(delayMs)}.`, + ); try { await sleepWithAbort(delayMs, opts.abortSignal); } catch (sleepErr) {