fix(telegram): handle network errors gracefully

- Add bot.catch() to prevent unhandled rejections from middleware
- Add isRecoverableNetworkError() to retry on transient failures
- Add maxRetryTime and exponential backoff to grammY runner
- Global unhandled rejection handler now logs recoverable errors
  instead of crashing (fetch failures, timeouts, connection resets)

Fixes crash loop when Telegram API is temporarily unreachable.
This commit is contained in:
techboss
2026-01-26 15:25:27 -07:00
committed by Gustavo Madeira Santana
parent a8ad242f88
commit e43f4c0628
4 changed files with 84 additions and 2 deletions

View File

@@ -13,6 +13,36 @@ export function registerUnhandledRejectionHandler(handler: UnhandledRejectionHan
};
}
/**
* Check if an error is a recoverable/transient error that shouldn't crash the process.
* These include network errors and abort signals during shutdown.
*/
function isRecoverableError(reason: unknown): boolean {
if (!reason) return false;
// Check error name for AbortError
if (reason instanceof Error && reason.name === "AbortError") {
return true;
}
const message = reason instanceof Error ? reason.message : String(reason);
const lowerMessage = message.toLowerCase();
return (
lowerMessage.includes("fetch failed") ||
lowerMessage.includes("network request") ||
lowerMessage.includes("econnrefused") ||
lowerMessage.includes("econnreset") ||
lowerMessage.includes("etimedout") ||
lowerMessage.includes("socket hang up") ||
lowerMessage.includes("enotfound") ||
lowerMessage.includes("network error") ||
lowerMessage.includes("getaddrinfo") ||
lowerMessage.includes("client network socket disconnected") ||
lowerMessage.includes("this operation was aborted") ||
lowerMessage.includes("aborted")
);
}
export function isUnhandledRejectionHandled(reason: unknown): boolean {
for (const handler of handlers) {
try {
@@ -30,6 +60,13 @@ export function isUnhandledRejectionHandled(reason: unknown): boolean {
export function installUnhandledRejectionHandler(): void {
process.on("unhandledRejection", (reason, _promise) => {
if (isUnhandledRejectionHandled(reason)) return;
// Don't crash on recoverable/transient errors - log them and continue
if (isRecoverableError(reason)) {
console.error("[clawdbot] Recoverable error (not crashing):", formatUncaughtError(reason));
return;
}
console.error("[clawdbot] Unhandled promise rejection:", formatUncaughtError(reason));
process.exit(1);
});

View File

@@ -138,6 +138,12 @@ export function createTelegramBot(opts: TelegramBotOptions) {
bot.api.config.use(apiThrottler());
bot.use(sequentialize(getTelegramSequentialKey));
// Catch all errors from bot middleware to prevent unhandled rejections
bot.catch((err) => {
const message = err instanceof Error ? err.message : String(err);
runtime.error?.(danger(`telegram bot error: ${message}`));
});
const recentUpdates = createTelegramUpdateDedupe();
let lastUpdateId =
typeof opts.updateOffset?.lastUpdateId === "number" ? opts.updateOffset.lastUpdateId : null;

View File

@@ -1,5 +1,17 @@
import { setDefaultAutoSelectFamily } from "net";
import { resolveFetch } from "../infra/fetch.js";
// Workaround for Node.js 22 "Happy Eyeballs" (autoSelectFamily) bug
// that causes intermittent ETIMEDOUT errors when connecting to Telegram's
// dual-stack servers. Disabling autoSelectFamily forces sequential IPv4/IPv6
// attempts which works reliably.
// See: https://github.com/nodejs/node/issues/54359
try {
setDefaultAutoSelectFamily(false);
} catch {
// Ignore if not available (older Node versions)
}
// Prefer wrapped fetch when available to normalize AbortSignal across runtimes.
export function resolveTelegramFetch(proxyFetch?: typeof fetch): typeof fetch | undefined {
if (proxyFetch) return resolveFetch(proxyFetch);

View File

@@ -40,6 +40,10 @@ export function createTelegramRunnerOptions(cfg: ClawdbotConfig): RunOptions<unk
},
// Suppress grammY getUpdates stack traces; we log concise errors ourselves.
silent: true,
// Retry failed getUpdates calls for up to 5 minutes before giving up
maxRetryTime: 5 * 60 * 1000,
// Use exponential backoff for retries
retryInterval: "exponential",
},
};
}
@@ -69,6 +73,23 @@ const isGetUpdatesConflict = (err: unknown) => {
return haystack.includes("getupdates");
};
const isRecoverableNetworkError = (err: unknown): boolean => {
if (!err) return false;
const message = err instanceof Error ? err.message : String(err);
const lowerMessage = message.toLowerCase();
// Recoverable network errors that should trigger retry, not crash
return (
lowerMessage.includes("fetch failed") ||
lowerMessage.includes("network request") ||
lowerMessage.includes("econnrefused") ||
lowerMessage.includes("econnreset") ||
lowerMessage.includes("etimedout") ||
lowerMessage.includes("socket hang up") ||
lowerMessage.includes("enotfound") ||
lowerMessage.includes("abort")
);
};
export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
const cfg = opts.config ?? loadConfig();
const account = resolveTelegramAccount({
@@ -152,12 +173,18 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
if (opts.abortSignal?.aborted) {
throw err;
}
if (!isGetUpdatesConflict(err)) {
const isConflict = isGetUpdatesConflict(err);
const isNetworkError = isRecoverableNetworkError(err);
if (!isConflict && !isNetworkError) {
throw err;
}
restartAttempts += 1;
const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts);
log(`Telegram getUpdates conflict; retrying in ${formatDurationMs(delayMs)}.`);
const reason = isConflict ? "getUpdates conflict" : "network error";
const errMsg = err instanceof Error ? err.message : String(err);
(opts.runtime?.error ?? console.error)(
`Telegram ${reason}: ${errMsg}; retrying in ${formatDurationMs(delayMs)}.`,
);
try {
await sleepWithAbort(delayMs, opts.abortSignal);
} catch (sleepErr) {