fix(telegram): handle network errors gracefully
- Add bot.catch() to prevent unhandled rejections from middleware - Add isRecoverableNetworkError() to retry on transient failures - Add maxRetryTime and exponential backoff to grammY runner - Global unhandled rejection handler now logs recoverable errors instead of crashing (fetch failures, timeouts, connection resets) Fixes crash loop when Telegram API is temporarily unreachable.
This commit is contained in:
committed by
Gustavo Madeira Santana
parent
a8ad242f88
commit
e43f4c0628
@@ -13,6 +13,36 @@ export function registerUnhandledRejectionHandler(handler: UnhandledRejectionHan
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if an error is a recoverable/transient error that shouldn't crash the process.
|
||||||
|
* These include network errors and abort signals during shutdown.
|
||||||
|
*/
|
||||||
|
function isRecoverableError(reason: unknown): boolean {
|
||||||
|
if (!reason) return false;
|
||||||
|
|
||||||
|
// Check error name for AbortError
|
||||||
|
if (reason instanceof Error && reason.name === "AbortError") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const message = reason instanceof Error ? reason.message : String(reason);
|
||||||
|
const lowerMessage = message.toLowerCase();
|
||||||
|
return (
|
||||||
|
lowerMessage.includes("fetch failed") ||
|
||||||
|
lowerMessage.includes("network request") ||
|
||||||
|
lowerMessage.includes("econnrefused") ||
|
||||||
|
lowerMessage.includes("econnreset") ||
|
||||||
|
lowerMessage.includes("etimedout") ||
|
||||||
|
lowerMessage.includes("socket hang up") ||
|
||||||
|
lowerMessage.includes("enotfound") ||
|
||||||
|
lowerMessage.includes("network error") ||
|
||||||
|
lowerMessage.includes("getaddrinfo") ||
|
||||||
|
lowerMessage.includes("client network socket disconnected") ||
|
||||||
|
lowerMessage.includes("this operation was aborted") ||
|
||||||
|
lowerMessage.includes("aborted")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
export function isUnhandledRejectionHandled(reason: unknown): boolean {
|
export function isUnhandledRejectionHandled(reason: unknown): boolean {
|
||||||
for (const handler of handlers) {
|
for (const handler of handlers) {
|
||||||
try {
|
try {
|
||||||
@@ -30,6 +60,13 @@ export function isUnhandledRejectionHandled(reason: unknown): boolean {
|
|||||||
export function installUnhandledRejectionHandler(): void {
|
export function installUnhandledRejectionHandler(): void {
|
||||||
process.on("unhandledRejection", (reason, _promise) => {
|
process.on("unhandledRejection", (reason, _promise) => {
|
||||||
if (isUnhandledRejectionHandled(reason)) return;
|
if (isUnhandledRejectionHandled(reason)) return;
|
||||||
|
|
||||||
|
// Don't crash on recoverable/transient errors - log them and continue
|
||||||
|
if (isRecoverableError(reason)) {
|
||||||
|
console.error("[clawdbot] Recoverable error (not crashing):", formatUncaughtError(reason));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
console.error("[clawdbot] Unhandled promise rejection:", formatUncaughtError(reason));
|
console.error("[clawdbot] Unhandled promise rejection:", formatUncaughtError(reason));
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -138,6 +138,12 @@ export function createTelegramBot(opts: TelegramBotOptions) {
|
|||||||
bot.api.config.use(apiThrottler());
|
bot.api.config.use(apiThrottler());
|
||||||
bot.use(sequentialize(getTelegramSequentialKey));
|
bot.use(sequentialize(getTelegramSequentialKey));
|
||||||
|
|
||||||
|
// Catch all errors from bot middleware to prevent unhandled rejections
|
||||||
|
bot.catch((err) => {
|
||||||
|
const message = err instanceof Error ? err.message : String(err);
|
||||||
|
runtime.error?.(danger(`telegram bot error: ${message}`));
|
||||||
|
});
|
||||||
|
|
||||||
const recentUpdates = createTelegramUpdateDedupe();
|
const recentUpdates = createTelegramUpdateDedupe();
|
||||||
let lastUpdateId =
|
let lastUpdateId =
|
||||||
typeof opts.updateOffset?.lastUpdateId === "number" ? opts.updateOffset.lastUpdateId : null;
|
typeof opts.updateOffset?.lastUpdateId === "number" ? opts.updateOffset.lastUpdateId : null;
|
||||||
|
|||||||
@@ -1,5 +1,17 @@
|
|||||||
|
import { setDefaultAutoSelectFamily } from "net";
|
||||||
import { resolveFetch } from "../infra/fetch.js";
|
import { resolveFetch } from "../infra/fetch.js";
|
||||||
|
|
||||||
|
// Workaround for Node.js 22 "Happy Eyeballs" (autoSelectFamily) bug
|
||||||
|
// that causes intermittent ETIMEDOUT errors when connecting to Telegram's
|
||||||
|
// dual-stack servers. Disabling autoSelectFamily forces sequential IPv4/IPv6
|
||||||
|
// attempts which works reliably.
|
||||||
|
// See: https://github.com/nodejs/node/issues/54359
|
||||||
|
try {
|
||||||
|
setDefaultAutoSelectFamily(false);
|
||||||
|
} catch {
|
||||||
|
// Ignore if not available (older Node versions)
|
||||||
|
}
|
||||||
|
|
||||||
// Prefer wrapped fetch when available to normalize AbortSignal across runtimes.
|
// Prefer wrapped fetch when available to normalize AbortSignal across runtimes.
|
||||||
export function resolveTelegramFetch(proxyFetch?: typeof fetch): typeof fetch | undefined {
|
export function resolveTelegramFetch(proxyFetch?: typeof fetch): typeof fetch | undefined {
|
||||||
if (proxyFetch) return resolveFetch(proxyFetch);
|
if (proxyFetch) return resolveFetch(proxyFetch);
|
||||||
|
|||||||
@@ -40,6 +40,10 @@ export function createTelegramRunnerOptions(cfg: ClawdbotConfig): RunOptions<unk
|
|||||||
},
|
},
|
||||||
// Suppress grammY getUpdates stack traces; we log concise errors ourselves.
|
// Suppress grammY getUpdates stack traces; we log concise errors ourselves.
|
||||||
silent: true,
|
silent: true,
|
||||||
|
// Retry failed getUpdates calls for up to 5 minutes before giving up
|
||||||
|
maxRetryTime: 5 * 60 * 1000,
|
||||||
|
// Use exponential backoff for retries
|
||||||
|
retryInterval: "exponential",
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -69,6 +73,23 @@ const isGetUpdatesConflict = (err: unknown) => {
|
|||||||
return haystack.includes("getupdates");
|
return haystack.includes("getupdates");
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const isRecoverableNetworkError = (err: unknown): boolean => {
|
||||||
|
if (!err) return false;
|
||||||
|
const message = err instanceof Error ? err.message : String(err);
|
||||||
|
const lowerMessage = message.toLowerCase();
|
||||||
|
// Recoverable network errors that should trigger retry, not crash
|
||||||
|
return (
|
||||||
|
lowerMessage.includes("fetch failed") ||
|
||||||
|
lowerMessage.includes("network request") ||
|
||||||
|
lowerMessage.includes("econnrefused") ||
|
||||||
|
lowerMessage.includes("econnreset") ||
|
||||||
|
lowerMessage.includes("etimedout") ||
|
||||||
|
lowerMessage.includes("socket hang up") ||
|
||||||
|
lowerMessage.includes("enotfound") ||
|
||||||
|
lowerMessage.includes("abort")
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
|
export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
|
||||||
const cfg = opts.config ?? loadConfig();
|
const cfg = opts.config ?? loadConfig();
|
||||||
const account = resolveTelegramAccount({
|
const account = resolveTelegramAccount({
|
||||||
@@ -152,12 +173,18 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
|
|||||||
if (opts.abortSignal?.aborted) {
|
if (opts.abortSignal?.aborted) {
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
if (!isGetUpdatesConflict(err)) {
|
const isConflict = isGetUpdatesConflict(err);
|
||||||
|
const isNetworkError = isRecoverableNetworkError(err);
|
||||||
|
if (!isConflict && !isNetworkError) {
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
restartAttempts += 1;
|
restartAttempts += 1;
|
||||||
const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts);
|
const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts);
|
||||||
log(`Telegram getUpdates conflict; retrying in ${formatDurationMs(delayMs)}.`);
|
const reason = isConflict ? "getUpdates conflict" : "network error";
|
||||||
|
const errMsg = err instanceof Error ? err.message : String(err);
|
||||||
|
(opts.runtime?.error ?? console.error)(
|
||||||
|
`Telegram ${reason}: ${errMsg}; retrying in ${formatDurationMs(delayMs)}.`,
|
||||||
|
);
|
||||||
try {
|
try {
|
||||||
await sleepWithAbort(delayMs, opts.abortSignal);
|
await sleepWithAbort(delayMs, opts.abortSignal);
|
||||||
} catch (sleepErr) {
|
} catch (sleepErr) {
|
||||||
|
|||||||
Reference in New Issue
Block a user