fix(telegram): handle network errors gracefully
- Add bot.catch() to prevent unhandled rejections from middleware - Add isRecoverableNetworkError() to retry on transient failures - Add maxRetryTime and exponential backoff to grammY runner - Global unhandled rejection handler now logs recoverable errors instead of crashing (fetch failures, timeouts, connection resets) Fixes crash loop when Telegram API is temporarily unreachable.
This commit is contained in:
committed by
Gustavo Madeira Santana
parent
a8ad242f88
commit
e43f4c0628
@@ -13,6 +13,36 @@ export function registerUnhandledRejectionHandler(handler: UnhandledRejectionHan
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error is a recoverable/transient error that shouldn't crash the process.
|
||||
* These include network errors and abort signals during shutdown.
|
||||
*/
|
||||
function isRecoverableError(reason: unknown): boolean {
|
||||
if (!reason) return false;
|
||||
|
||||
// Check error name for AbortError
|
||||
if (reason instanceof Error && reason.name === "AbortError") {
|
||||
return true;
|
||||
}
|
||||
|
||||
const message = reason instanceof Error ? reason.message : String(reason);
|
||||
const lowerMessage = message.toLowerCase();
|
||||
return (
|
||||
lowerMessage.includes("fetch failed") ||
|
||||
lowerMessage.includes("network request") ||
|
||||
lowerMessage.includes("econnrefused") ||
|
||||
lowerMessage.includes("econnreset") ||
|
||||
lowerMessage.includes("etimedout") ||
|
||||
lowerMessage.includes("socket hang up") ||
|
||||
lowerMessage.includes("enotfound") ||
|
||||
lowerMessage.includes("network error") ||
|
||||
lowerMessage.includes("getaddrinfo") ||
|
||||
lowerMessage.includes("client network socket disconnected") ||
|
||||
lowerMessage.includes("this operation was aborted") ||
|
||||
lowerMessage.includes("aborted")
|
||||
);
|
||||
}
|
||||
|
||||
export function isUnhandledRejectionHandled(reason: unknown): boolean {
|
||||
for (const handler of handlers) {
|
||||
try {
|
||||
@@ -30,6 +60,13 @@ export function isUnhandledRejectionHandled(reason: unknown): boolean {
|
||||
export function installUnhandledRejectionHandler(): void {
|
||||
process.on("unhandledRejection", (reason, _promise) => {
|
||||
if (isUnhandledRejectionHandled(reason)) return;
|
||||
|
||||
// Don't crash on recoverable/transient errors - log them and continue
|
||||
if (isRecoverableError(reason)) {
|
||||
console.error("[clawdbot] Recoverable error (not crashing):", formatUncaughtError(reason));
|
||||
return;
|
||||
}
|
||||
|
||||
console.error("[clawdbot] Unhandled promise rejection:", formatUncaughtError(reason));
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
@@ -138,6 +138,12 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
||||
bot.api.config.use(apiThrottler());
|
||||
bot.use(sequentialize(getTelegramSequentialKey));
|
||||
|
||||
// Catch all errors from bot middleware to prevent unhandled rejections
|
||||
bot.catch((err) => {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
runtime.error?.(danger(`telegram bot error: ${message}`));
|
||||
});
|
||||
|
||||
const recentUpdates = createTelegramUpdateDedupe();
|
||||
let lastUpdateId =
|
||||
typeof opts.updateOffset?.lastUpdateId === "number" ? opts.updateOffset.lastUpdateId : null;
|
||||
|
||||
@@ -1,5 +1,17 @@
|
||||
import { setDefaultAutoSelectFamily } from "net";
|
||||
import { resolveFetch } from "../infra/fetch.js";
|
||||
|
||||
// Workaround for Node.js 22 "Happy Eyeballs" (autoSelectFamily) bug
|
||||
// that causes intermittent ETIMEDOUT errors when connecting to Telegram's
|
||||
// dual-stack servers. Disabling autoSelectFamily forces sequential IPv4/IPv6
|
||||
// attempts which works reliably.
|
||||
// See: https://github.com/nodejs/node/issues/54359
|
||||
try {
|
||||
setDefaultAutoSelectFamily(false);
|
||||
} catch {
|
||||
// Ignore if not available (older Node versions)
|
||||
}
|
||||
|
||||
// Prefer wrapped fetch when available to normalize AbortSignal across runtimes.
|
||||
export function resolveTelegramFetch(proxyFetch?: typeof fetch): typeof fetch | undefined {
|
||||
if (proxyFetch) return resolveFetch(proxyFetch);
|
||||
|
||||
@@ -40,6 +40,10 @@ export function createTelegramRunnerOptions(cfg: ClawdbotConfig): RunOptions<unk
|
||||
},
|
||||
// Suppress grammY getUpdates stack traces; we log concise errors ourselves.
|
||||
silent: true,
|
||||
// Retry failed getUpdates calls for up to 5 minutes before giving up
|
||||
maxRetryTime: 5 * 60 * 1000,
|
||||
// Use exponential backoff for retries
|
||||
retryInterval: "exponential",
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -69,6 +73,23 @@ const isGetUpdatesConflict = (err: unknown) => {
|
||||
return haystack.includes("getupdates");
|
||||
};
|
||||
|
||||
const isRecoverableNetworkError = (err: unknown): boolean => {
|
||||
if (!err) return false;
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
const lowerMessage = message.toLowerCase();
|
||||
// Recoverable network errors that should trigger retry, not crash
|
||||
return (
|
||||
lowerMessage.includes("fetch failed") ||
|
||||
lowerMessage.includes("network request") ||
|
||||
lowerMessage.includes("econnrefused") ||
|
||||
lowerMessage.includes("econnreset") ||
|
||||
lowerMessage.includes("etimedout") ||
|
||||
lowerMessage.includes("socket hang up") ||
|
||||
lowerMessage.includes("enotfound") ||
|
||||
lowerMessage.includes("abort")
|
||||
);
|
||||
};
|
||||
|
||||
export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
|
||||
const cfg = opts.config ?? loadConfig();
|
||||
const account = resolveTelegramAccount({
|
||||
@@ -152,12 +173,18 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
|
||||
if (opts.abortSignal?.aborted) {
|
||||
throw err;
|
||||
}
|
||||
if (!isGetUpdatesConflict(err)) {
|
||||
const isConflict = isGetUpdatesConflict(err);
|
||||
const isNetworkError = isRecoverableNetworkError(err);
|
||||
if (!isConflict && !isNetworkError) {
|
||||
throw err;
|
||||
}
|
||||
restartAttempts += 1;
|
||||
const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts);
|
||||
log(`Telegram getUpdates conflict; retrying in ${formatDurationMs(delayMs)}.`);
|
||||
const reason = isConflict ? "getUpdates conflict" : "network error";
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
(opts.runtime?.error ?? console.error)(
|
||||
`Telegram ${reason}: ${errMsg}; retrying in ${formatDurationMs(delayMs)}.`,
|
||||
);
|
||||
try {
|
||||
await sleepWithAbort(delayMs, opts.abortSignal);
|
||||
} catch (sleepErr) {
|
||||
|
||||
Reference in New Issue
Block a user