gateway: harden ws protocol and liveness

This commit is contained in:
Peter Steinberger
2025-12-09 17:02:58 +01:00
parent 20d247b3f7
commit 72eb240c3b
6 changed files with 108 additions and 42 deletions

View File

@@ -1,5 +1,6 @@
import { randomUUID } from "node:crypto";
import { GatewayClient } from "./client.js";
import { PROTOCOL_VERSION } from "./protocol/index.js";
export type CallGatewayOptions = {
url?: string;
@@ -39,8 +40,8 @@ export async function callGateway<T = unknown>(
clientVersion: opts.clientVersion ?? "dev",
platform: opts.platform,
mode: opts.mode ?? "cli",
minProtocol: opts.minProtocol ?? 1,
maxProtocol: opts.maxProtocol ?? 1,
minProtocol: opts.minProtocol ?? PROTOCOL_VERSION,
maxProtocol: opts.maxProtocol ?? PROTOCOL_VERSION,
onHelloOk: async () => {
try {
const result = await client.request<T>(opts.method, opts.params, {

View File

@@ -39,6 +39,10 @@ export class GatewayClient {
private backoffMs = 1000;
private closed = false;
private lastSeq: number | null = null;
// Track last tick to detect silent stalls.
private lastTick: number | null = null;
private tickIntervalMs = 30_000;
private tickTimer: NodeJS.Timeout | null = null;
constructor(opts: GatewayClientOptions) {
this.opts = opts;
@@ -66,6 +70,10 @@ export class GatewayClient {
stop() {
this.closed = true;
if (this.tickTimer) {
clearInterval(this.tickTimer);
this.tickTimer = null;
}
this.ws?.close();
this.ws = null;
this.flushPendingErrors(new Error("gateway client stopped"));
@@ -94,6 +102,12 @@ export class GatewayClient {
const parsed = JSON.parse(raw);
if (parsed?.type === "hello-ok") {
this.backoffMs = 1000;
this.tickIntervalMs =
typeof parsed.policy?.tickIntervalMs === "number"
? parsed.policy.tickIntervalMs
: 30_000;
this.lastTick = Date.now();
this.startTickWatch();
this.opts.onHelloOk?.(parsed as HelloOk);
return;
}
@@ -111,6 +125,9 @@ export class GatewayClient {
}
this.lastSeq = seq;
}
if (evt.event === "tick") {
this.lastTick = Date.now();
}
this.opts.onEvent?.(evt);
return;
}
@@ -134,6 +151,10 @@ export class GatewayClient {
private scheduleReconnect() {
if (this.closed) return;
if (this.tickTimer) {
clearInterval(this.tickTimer);
this.tickTimer = null;
}
const delay = this.backoffMs;
this.backoffMs = Math.min(this.backoffMs * 2, 30_000);
setTimeout(() => this.start(), delay).unref();
@@ -146,6 +167,19 @@ export class GatewayClient {
this.pending.clear();
}
private startTickWatch() {
if (this.tickTimer) clearInterval(this.tickTimer);
const interval = Math.max(this.tickIntervalMs, 1000);
this.tickTimer = setInterval(() => {
if (this.closed) return;
if (!this.lastTick) return;
const gap = Date.now() - this.lastTick;
if (gap > this.tickIntervalMs * 2) {
this.ws?.close(4000, "tick timeout");
}
}, interval);
}
async request<T = unknown>(
method: string,
params?: unknown,

View File

@@ -40,7 +40,6 @@ const METHODS = [
"status",
"system-presence",
"system-event",
"set-heartbeats",
"send",
"agent",
];
@@ -54,6 +53,8 @@ export type GatewayServer = {
let presenceVersion = 1;
let healthVersion = 1;
let seq = 0;
// Track per-run sequence to detect out-of-order/lost agent events.
const agentRunSeq = new Map<string, number>();
function buildSnapshot(): Snapshot {
const presence = listSystemPresence();
@@ -147,6 +148,21 @@ export async function startGatewayServer(port = 18789): Promise<GatewayServer> {
}, 60_000);
const agentUnsub = onAgentEvent((evt) => {
const last = agentRunSeq.get(evt.runId) ?? 0;
if (evt.seq !== last + 1) {
// Fan out an error event so clients can refresh the stream on gaps.
broadcast("agent", {
runId: evt.runId,
stream: "error",
ts: Date.now(),
data: {
reason: "seq gap",
expected: last + 1,
received: evt.seq,
},
});
}
agentRunSeq.set(evt.runId, evt.seq);
broadcast("agent", evt);
});
@@ -247,14 +263,15 @@ export async function startGatewayServer(port = 18789): Promise<GatewayServer> {
client = { socket, hello, connId };
clients.add(client);
// synthesize presence entry for this connection
// synthesize presence entry for this connection (client fingerprint)
const presenceKey = hello.client.instanceId || connId;
const remoteAddr = (
socket as WebSocket & { _socket?: { remoteAddress?: string } }
)._socket?.remoteAddress;
upsertPresence(presenceKey, {
host: os.hostname(),
version:
process.env.CLAWDIS_VERSION ??
process.env.npm_package_version ??
"dev",
host: hello.client.name || os.hostname(),
ip: remoteAddr,
version: hello.client.version,
mode: hello.client.mode,
instanceId: hello.client.instanceId,
reason: "connect",
@@ -352,10 +369,6 @@ export async function startGatewayServer(port = 18789): Promise<GatewayServer> {
respond(true, { ok: true }, undefined);
break;
}
case "set-heartbeats": {
respond(true, { ok: true }, undefined);
break;
}
case "send": {
const p = (req.params ?? {}) as Record<string, unknown>;
if (!validateSendParams(p)) {