refactor(agents): centralize failover normalization

This commit is contained in:
Peter Steinberger
2026-01-09 22:15:03 +01:00
parent 6220106ab2
commit 402c35b91c
4 changed files with 290 additions and 186 deletions

View File

@@ -842,6 +842,57 @@ export function calculateAuthProfileCooldownMs(errorCount: number): number {
);
}
type ResolvedAuthCooldownConfig = {
billingBackoffMs: number;
billingMaxMs: number;
failureWindowMs: number;
};
function resolveAuthCooldownConfig(params: {
cfg?: ClawdbotConfig;
providerId: string;
}): ResolvedAuthCooldownConfig {
const defaults = {
billingBackoffHours: 5,
billingMaxHours: 24,
failureWindowHours: 24,
} as const;
const resolveHours = (value: unknown, fallback: number) =>
typeof value === "number" && Number.isFinite(value) && value > 0
? value
: fallback;
const cooldowns = params.cfg?.auth?.cooldowns;
const billingOverride = (() => {
const map = cooldowns?.billingBackoffHoursByProvider;
if (!map) return undefined;
for (const [key, value] of Object.entries(map)) {
if (normalizeProviderId(key) === params.providerId) return value;
}
return undefined;
})();
const billingBackoffHours = resolveHours(
billingOverride ?? cooldowns?.billingBackoffHours,
defaults.billingBackoffHours,
);
const billingMaxHours = resolveHours(
cooldowns?.billingMaxHours,
defaults.billingMaxHours,
);
const failureWindowHours = resolveHours(
cooldowns?.failureWindowHours,
defaults.failureWindowHours,
);
return {
billingBackoffMs: billingBackoffHours * 60 * 60 * 1000,
billingMaxMs: billingMaxHours * 60 * 60 * 1000,
failureWindowMs: failureWindowHours * 60 * 60 * 1000,
};
}
function calculateAuthProfileBillingDisableMsWithConfig(params: {
errorCount: number;
baseMs: number;
@@ -872,6 +923,49 @@ export function resolveProfileUnusableUntilForDisplay(
return resolveProfileUnusableUntil(stats);
}
function computeNextProfileUsageStats(params: {
existing: ProfileUsageStats;
now: number;
reason: AuthProfileFailureReason;
cfgResolved: ResolvedAuthCooldownConfig;
}): ProfileUsageStats {
const windowMs = params.cfgResolved.failureWindowMs;
const windowExpired =
typeof params.existing.lastFailureAt === "number" &&
params.existing.lastFailureAt > 0 &&
params.now - params.existing.lastFailureAt > windowMs;
const baseErrorCount = windowExpired ? 0 : (params.existing.errorCount ?? 0);
const nextErrorCount = baseErrorCount + 1;
const failureCounts = windowExpired
? {}
: { ...params.existing.failureCounts };
failureCounts[params.reason] = (failureCounts[params.reason] ?? 0) + 1;
const updatedStats: ProfileUsageStats = {
...params.existing,
errorCount: nextErrorCount,
failureCounts,
lastFailureAt: params.now,
};
if (params.reason === "billing") {
const billingCount = failureCounts.billing ?? 1;
const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({
errorCount: billingCount,
baseMs: params.cfgResolved.billingBackoffMs,
maxMs: params.cfgResolved.billingMaxMs,
});
updatedStats.disabledUntil = params.now + backoffMs;
updatedStats.disabledReason = "billing";
} else {
const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount);
updatedStats.cooldownUntil = params.now + backoffMs;
}
return updatedStats;
}
/**
* Mark a profile as failed for a specific reason. Billing failures are treated
* as "disabled" (longer backoff) vs the regular cooldown window.
@@ -884,44 +978,6 @@ export async function markAuthProfileFailure(params: {
agentDir?: string;
}): Promise<void> {
const { store, profileId, reason, agentDir, cfg } = params;
const defaults = {
billingBackoffHours: 5,
billingMaxHours: 24,
failureWindowHours: 24,
} as const;
const resolveHours = (value: unknown, fallback: number) =>
typeof value === "number" && Number.isFinite(value) && value > 0
? value
: fallback;
const resolveCooldownConfig = (providerId: string) => {
const cooldowns = cfg?.auth?.cooldowns;
const billingOverride = (() => {
const map = cooldowns?.billingBackoffHoursByProvider;
if (!map) return undefined;
for (const [key, value] of Object.entries(map)) {
if (normalizeProviderId(key) === providerId) return value;
}
return undefined;
})();
const billingBackoffHours = resolveHours(
billingOverride ?? cooldowns?.billingBackoffHours,
defaults.billingBackoffHours,
);
const billingMaxHours = resolveHours(
cooldowns?.billingMaxHours,
defaults.billingMaxHours,
);
const failureWindowHours = resolveHours(
cooldowns?.failureWindowHours,
defaults.failureWindowHours,
);
return {
billingBackoffMs: billingBackoffHours * 60 * 60 * 1000,
billingMaxMs: billingMaxHours * 60 * 60 * 1000,
failureWindowMs: failureWindowHours * 60 * 60 * 1000,
};
};
const updated = await updateAuthProfileStoreWithLock({
agentDir,
updater: (freshStore) => {
@@ -932,41 +988,17 @@ export async function markAuthProfileFailure(params: {
const now = Date.now();
const providerKey = normalizeProviderId(profile.provider);
const cfgResolved = resolveCooldownConfig(providerKey);
const cfgResolved = resolveAuthCooldownConfig({
cfg,
providerId: providerKey,
});
const windowMs = cfgResolved.failureWindowMs;
const windowExpired =
typeof existing.lastFailureAt === "number" &&
existing.lastFailureAt > 0 &&
now - existing.lastFailureAt > windowMs;
const baseErrorCount = windowExpired ? 0 : (existing.errorCount ?? 0);
const nextErrorCount = baseErrorCount + 1;
const failureCounts = windowExpired ? {} : { ...existing.failureCounts };
failureCounts[reason] = (failureCounts[reason] ?? 0) + 1;
const updatedStats: ProfileUsageStats = {
...existing,
errorCount: nextErrorCount,
failureCounts,
lastFailureAt: now,
};
if (reason === "billing") {
const billingCount = failureCounts.billing ?? 1;
const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({
errorCount: billingCount,
baseMs: cfgResolved.billingBackoffMs,
maxMs: cfgResolved.billingMaxMs,
});
updatedStats.disabledUntil = now + backoffMs;
updatedStats.disabledReason = "billing";
} else {
const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount);
updatedStats.cooldownUntil = now + backoffMs;
}
freshStore.usageStats[profileId] = updatedStats;
freshStore.usageStats[profileId] = computeNextProfileUsageStats({
existing,
now,
reason,
cfgResolved,
});
return true;
},
});
@@ -982,38 +1014,17 @@ export async function markAuthProfileFailure(params: {
const providerKey = normalizeProviderId(
store.profiles[profileId]?.provider ?? "",
);
const cfgResolved = resolveCooldownConfig(providerKey);
const windowMs = cfgResolved.failureWindowMs;
const windowExpired =
typeof existing.lastFailureAt === "number" &&
existing.lastFailureAt > 0 &&
now - existing.lastFailureAt > windowMs;
const baseErrorCount = windowExpired ? 0 : (existing.errorCount ?? 0);
const nextErrorCount = baseErrorCount + 1;
const failureCounts = windowExpired ? {} : { ...existing.failureCounts };
failureCounts[reason] = (failureCounts[reason] ?? 0) + 1;
const cfgResolved = resolveAuthCooldownConfig({
cfg,
providerId: providerKey,
});
const updatedStats: ProfileUsageStats = {
...existing,
errorCount: nextErrorCount,
failureCounts,
lastFailureAt: now,
};
if (reason === "billing") {
const billingCount = failureCounts.billing ?? 1;
const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({
errorCount: billingCount,
baseMs: cfgResolved.billingBackoffMs,
maxMs: cfgResolved.billingMaxMs,
});
updatedStats.disabledUntil = now + backoffMs;
updatedStats.disabledReason = "billing";
} else {
const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount);
updatedStats.cooldownUntil = now + backoffMs;
}
store.usageStats[profileId] = updatedStats;
store.usageStats[profileId] = computeNextProfileUsageStats({
existing,
now,
reason,
cfgResolved,
});
saveAuthProfileStore(store, agentDir);
}