fix: reduce log noise for node disconnect/late invoke errors (#1607)

* fix: reduce log noise for node disconnect/late invoke errors

- Handle both 'node not connected' and 'node disconnected' errors at info level
- Return success with late:true for unknown invoke IDs instead of error
- Add 30-second throttle to skills change listener to prevent rapid-fire probes
- Add tests for isNodeUnavailableError and late invoke handling

* fix: clean up skills refresh timer and listener on shutdown

Store the return value from registerSkillsChangeListener() and call it
on gateway shutdown. Also clear any pending refresh timer. This follows
the same pattern used for agentUnsub and heartbeatUnsub.

* refactor: simplify KISS/YAGNI - inline checks, remove unit tests for internal utilities

* fix: reduce gateway log noise (#1607) (thanks @petter-b)

* test: align agent id casing expectations (#1607)

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
Petter Blomberg
2026-01-24 21:05:41 +01:00
committed by GitHub
parent 40ef3b5d30
commit 39d8c441eb
5 changed files with 165 additions and 9 deletions

View File

@@ -55,10 +55,10 @@ function extractErrorMessage(err: unknown): string | undefined {
function logRemoteBinProbeFailure(nodeId: string, err: unknown) {
const message = extractErrorMessage(err);
const label = describeNode(nodeId);
if (message?.includes("node not connected")) {
log.info(
`remote bin probe skipped: node not connected (${label}); check nodes list/status for ${label}`,
);
// Node unavailable errors (not connected or disconnected mid-operation) are expected
// when nodes have transient connections - log at info level instead of warn
if (message?.includes("node not connected") || message?.includes("node disconnected")) {
log.info(`remote bin probe skipped: node unavailable (${label})`);
return;
}
if (message?.includes("invoke timed out") || message?.includes("timeout")) {
@@ -213,6 +213,15 @@ function parseBinProbePayload(payloadJSON: string | null | undefined, payload?:
return [];
}
function areBinSetsEqual(a: Set<string> | undefined, b: Set<string>): boolean {
if (!a) return false;
if (a.size !== b.size) return false;
for (const bin of b) {
if (!a.has(bin)) return false;
}
return true;
}
export async function refreshRemoteNodeBins(params: {
nodeId: string;
platform?: string;
@@ -261,7 +270,11 @@ export async function refreshRemoteNodeBins(params: {
return;
}
const bins = parseBinProbePayload(res.payloadJSON, res.payload);
const existingBins = remoteNodes.get(params.nodeId)?.bins;
const nextBins = new Set(bins);
const hasChanged = !areBinSetsEqual(existingBins, nextBins);
recordRemoteNodeBins(params.nodeId, bins);
if (!hasChanged) return;
await updatePairedNodeMetadata(params.nodeId, { bins });
bumpSkillsSnapshotVersion({ reason: "remote-node" });
} catch (err) {