diff --git a/src/cli/gateway-cli.coverage.test.ts b/src/cli/gateway-cli.coverage.test.ts index f43f8f1a9..5b4a7cef4 100644 --- a/src/cli/gateway-cli.coverage.test.ts +++ b/src/cli/gateway-cli.coverage.test.ts @@ -8,7 +8,11 @@ const startGatewayServer = vi.fn(async () => ({ })); const setVerbose = vi.fn(); const createDefaultDeps = vi.fn(); -const forceFreePort = vi.fn(() => []); +const forceFreePortAndWait = vi.fn(async () => ({ + killed: [], + waitedMs: 0, + escalatedToSigkill: false, +})); const runtimeLogs: string[] = []; const runtimeErrors: string[] = []; @@ -44,7 +48,7 @@ vi.mock("./deps.js", () => ({ })); vi.mock("./ports.js", () => ({ - forceFreePort: () => forceFreePort(), + forceFreePortAndWait: (port: number) => forceFreePortAndWait(port), })); describe("gateway-cli coverage", () => { @@ -141,7 +145,7 @@ describe("gateway-cli coverage", () => { ).rejects.toThrow("__exit__:1"); // Force free failure - forceFreePort.mockImplementationOnce(() => { + forceFreePortAndWait.mockImplementationOnce(async () => { throw new Error("boom"); }); const programForceFail = new Command(); diff --git a/src/cli/gateway-cli.ts b/src/cli/gateway-cli.ts index 2f53d1ea6..944f72314 100644 --- a/src/cli/gateway-cli.ts +++ b/src/cli/gateway-cli.ts @@ -10,7 +10,7 @@ import { info, setVerbose } from "../globals.js"; import { GatewayLockError } from "../infra/gateway-lock.js"; import { defaultRuntime } from "../runtime.js"; import { createDefaultDeps } from "./deps.js"; -import { forceFreePort } from "./ports.js"; +import { forceFreePortAndWait } from "./ports.js"; type GatewayRpcOpts = { url?: string; @@ -98,7 +98,12 @@ export function registerGatewayCli(program: Command) { } if (opts.force) { try { - const killed = forceFreePort(port); + const { killed, waitedMs, escalatedToSigkill } = + await forceFreePortAndWait(port, { + timeoutMs: 2000, + intervalMs: 100, + sigtermTimeoutMs: 700, + }); if (killed.length === 0) { defaultRuntime.log(info(`Force: no listeners on port ${port}`)); } else { @@ -109,7 +114,16 @@ export function registerGatewayCli(program: Command) { ), ); } - await new Promise((resolve) => setTimeout(resolve, 200)); + if (escalatedToSigkill) { + defaultRuntime.log( + info(`Force: escalated to SIGKILL while freeing port ${port}`), + ); + } + if (waitedMs > 0) { + defaultRuntime.log( + info(`Force: waited ${waitedMs}ms for port ${port} to free`), + ); + } } } catch (err) { defaultRuntime.error(`Force: ${String(err)}`); diff --git a/src/cli/ports.ts b/src/cli/ports.ts index dbb61d365..6288076a7 100644 --- a/src/cli/ports.ts +++ b/src/cli/ports.ts @@ -2,6 +2,16 @@ import { execFileSync } from "node:child_process"; export type PortProcess = { pid: number; command?: string }; +export type ForceFreePortResult = { + killed: PortProcess[]; + waitedMs: number; + escalatedToSigkill: boolean; +}; + +function sleep(ms: number) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + export function parseLsofOutput(output: string): PortProcess[] { const lines = output.split(/\r?\n/).filter(Boolean); const results: PortProcess[] = []; @@ -50,3 +60,77 @@ export function forceFreePort(port: number): PortProcess[] { } return listeners; } + +function killPids(listeners: PortProcess[], signal: NodeJS.Signals) { + for (const proc of listeners) { + try { + process.kill(proc.pid, signal); + } catch (err) { + throw new Error( + `failed to kill pid ${proc.pid}${proc.command ? ` (${proc.command})` : ""}: ${String(err)}`, + ); + } + } +} + +export async function forceFreePortAndWait( + port: number, + opts: { + /** Total wait budget across signals. */ + timeoutMs?: number; + /** Poll interval for checking whether lsof reports listeners. */ + intervalMs?: number; + /** How long to wait after SIGTERM before escalating to SIGKILL. */ + sigtermTimeoutMs?: number; + } = {}, +): Promise { + const timeoutMs = Math.max(opts.timeoutMs ?? 1500, 0); + const intervalMs = Math.max(opts.intervalMs ?? 100, 1); + const sigtermTimeoutMs = Math.min( + Math.max(opts.sigtermTimeoutMs ?? 600, 0), + timeoutMs, + ); + + const killed = forceFreePort(port); + if (killed.length === 0) { + return { killed, waitedMs: 0, escalatedToSigkill: false }; + } + + let waitedMs = 0; + const triesSigterm = + intervalMs > 0 ? Math.ceil(sigtermTimeoutMs / intervalMs) : 0; + for (let i = 0; i < triesSigterm; i++) { + if (listPortListeners(port).length === 0) { + return { killed, waitedMs, escalatedToSigkill: false }; + } + await sleep(intervalMs); + waitedMs += intervalMs; + } + + if (listPortListeners(port).length === 0) { + return { killed, waitedMs, escalatedToSigkill: false }; + } + + const remaining = listPortListeners(port); + killPids(remaining, "SIGKILL"); + + const remainingBudget = Math.max(timeoutMs - waitedMs, 0); + const triesSigkill = + intervalMs > 0 ? Math.ceil(remainingBudget / intervalMs) : 0; + for (let i = 0; i < triesSigkill; i++) { + if (listPortListeners(port).length === 0) { + return { killed, waitedMs, escalatedToSigkill: true }; + } + await sleep(intervalMs); + waitedMs += intervalMs; + } + + const still = listPortListeners(port); + if (still.length === 0) { + return { killed, waitedMs, escalatedToSigkill: true }; + } + + throw new Error( + `port ${port} still has listeners after --force: ${still.map((p) => p.pid).join(", ")}`, + ); +} diff --git a/src/cli/program.force.test.ts b/src/cli/program.force.test.ts index 0f85d08ee..3ccd8cb7c 100644 --- a/src/cli/program.force.test.ts +++ b/src/cli/program.force.test.ts @@ -14,6 +14,7 @@ vi.mock("node:child_process", async () => { import { execFileSync } from "node:child_process"; import { forceFreePort, + forceFreePortAndWait, listPortListeners, type PortProcess, parseLsofOutput, @@ -79,4 +80,66 @@ describe("gateway --force helpers", () => { { pid: 99, command: "ssh" }, ]); }); + + it("retries until the port is free", async () => { + vi.useFakeTimers(); + let call = 0; + (execFileSync as unknown as vi.Mock).mockImplementation(() => { + call += 1; + // 1st call: initial listeners to kill; 2nd call: still listed; 3rd call: gone. + if (call === 1) return ["p42", "cnode", ""].join("\n"); + if (call === 2) return ["p42", "cnode", ""].join("\n"); + return ""; + }); + + const killMock = vi.fn(); + // @ts-expect-error override for test + process.kill = killMock; + + const promise = forceFreePortAndWait(18789, { + timeoutMs: 500, + intervalMs: 100, + sigtermTimeoutMs: 400, + }); + + await vi.runAllTimersAsync(); + const res = await promise; + + expect(killMock).toHaveBeenCalledWith(42, "SIGTERM"); + expect(res.killed).toEqual([{ pid: 42, command: "node" }]); + expect(res.escalatedToSigkill).toBe(false); + expect(res.waitedMs).toBeGreaterThan(0); + + vi.useRealTimers(); + }); + + it("escalates to SIGKILL if SIGTERM doesn't free the port", async () => { + vi.useFakeTimers(); + let call = 0; + (execFileSync as unknown as vi.Mock).mockImplementation(() => { + call += 1; + // 1st call: initial kill list; then keep showing until after SIGKILL. + if (call <= 6) return ["p42", "cnode", ""].join("\n"); + return ""; + }); + + const killMock = vi.fn(); + // @ts-expect-error override for test + process.kill = killMock; + + const promise = forceFreePortAndWait(18789, { + timeoutMs: 800, + intervalMs: 100, + sigtermTimeoutMs: 300, + }); + + await vi.runAllTimersAsync(); + const res = await promise; + + expect(killMock).toHaveBeenCalledWith(42, "SIGTERM"); + expect(killMock).toHaveBeenCalledWith(42, "SIGKILL"); + expect(res.escalatedToSigkill).toBe(true); + + vi.useRealTimers(); + }); });