fix: cleanup suspended CLI processes (#978) (thanks @Nachx639)
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
import type { CliBackendConfig } from "../config/types.js";
|
||||
import { runCliAgent } from "./cli-runner.js";
|
||||
import { cleanupSuspendedCliProcesses } from "./cli-runner/helpers.js";
|
||||
|
||||
const runCommandWithTimeoutMock = vi.fn();
|
||||
const runExecMock = vi.fn();
|
||||
@@ -17,9 +19,12 @@ describe("runCliAgent resume cleanup", () => {
|
||||
});
|
||||
|
||||
it("kills stale resume processes for codex sessions", async () => {
|
||||
// First call is for cleanupSuspendedCliProcesses (returns count 0)
|
||||
// Second call is for cleanupResumeProcesses (pkill)
|
||||
runExecMock.mockResolvedValue({ stdout: "0", stderr: "" });
|
||||
runExecMock
|
||||
.mockResolvedValueOnce({
|
||||
stdout: " 1 S /bin/launchd\n",
|
||||
stderr: "",
|
||||
}) // cleanupSuspendedCliProcesses (ps)
|
||||
.mockResolvedValueOnce({ stdout: "", stderr: "" }); // cleanupResumeProcesses (pkill)
|
||||
runCommandWithTimeoutMock.mockResolvedValueOnce({
|
||||
stdout: "ok",
|
||||
stderr: "",
|
||||
@@ -45,11 +50,7 @@ describe("runCliAgent resume cleanup", () => {
|
||||
return;
|
||||
}
|
||||
|
||||
// First call: cleanupSuspendedCliProcesses (bash to count)
|
||||
// Second call: cleanupResumeProcesses (pkill)
|
||||
expect(runExecMock).toHaveBeenCalledTimes(2);
|
||||
|
||||
// Verify the pkill call for resume cleanup
|
||||
const pkillCall = runExecMock.mock.calls[1] ?? [];
|
||||
expect(pkillCall[0]).toBe("pkill");
|
||||
const pkillArgs = pkillCall[1] as string[];
|
||||
@@ -58,45 +59,94 @@ describe("runCliAgent resume cleanup", () => {
|
||||
expect(pkillArgs[1]).toContain("resume");
|
||||
expect(pkillArgs[1]).toContain("thread-123");
|
||||
});
|
||||
});
|
||||
|
||||
it("cleans up suspended processes when threshold exceeded", async () => {
|
||||
// Return count > 10 to trigger cleanup
|
||||
runExecMock
|
||||
.mockResolvedValueOnce({ stdout: "15", stderr: "" }) // count suspended
|
||||
.mockResolvedValueOnce({ stdout: "", stderr: "" }); // kill command
|
||||
runCommandWithTimeoutMock.mockResolvedValueOnce({
|
||||
stdout: "ok",
|
||||
stderr: "",
|
||||
code: 0,
|
||||
signal: null,
|
||||
killed: false,
|
||||
});
|
||||
describe("cleanupSuspendedCliProcesses", () => {
|
||||
beforeEach(() => {
|
||||
runExecMock.mockReset();
|
||||
});
|
||||
|
||||
await runCliAgent({
|
||||
sessionId: "s1",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: "/tmp",
|
||||
prompt: "hi",
|
||||
provider: "claude-cli",
|
||||
timeoutMs: 1_000,
|
||||
runId: "run-1",
|
||||
});
|
||||
it("skips when no session tokens are configured", async () => {
|
||||
await cleanupSuspendedCliProcesses(
|
||||
{
|
||||
command: "tool",
|
||||
} as CliBackendConfig,
|
||||
0,
|
||||
);
|
||||
|
||||
if (process.platform === "win32") {
|
||||
expect(runExecMock).not.toHaveBeenCalled();
|
||||
return;
|
||||
}
|
||||
|
||||
expect(runExecMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("matches sessionArg-based commands", async () => {
|
||||
runExecMock
|
||||
.mockResolvedValueOnce({
|
||||
stdout: [
|
||||
" 40 T+ claude --session-id thread-1 -p",
|
||||
" 41 S claude --session-id thread-2 -p",
|
||||
].join("\n"),
|
||||
stderr: "",
|
||||
})
|
||||
.mockResolvedValueOnce({ stdout: "", stderr: "" });
|
||||
|
||||
await cleanupSuspendedCliProcesses(
|
||||
{
|
||||
command: "claude",
|
||||
sessionArg: "--session-id",
|
||||
} as CliBackendConfig,
|
||||
0,
|
||||
);
|
||||
|
||||
if (process.platform === "win32") {
|
||||
expect(runExecMock).not.toHaveBeenCalled();
|
||||
return;
|
||||
}
|
||||
|
||||
// cleanupSuspendedCliProcesses: count + kill (2 calls)
|
||||
// cleanupResumeProcesses: not called for claude-cli (no resumeArgs)
|
||||
expect(runExecMock).toHaveBeenCalledTimes(2);
|
||||
|
||||
// First call: count suspended processes
|
||||
const countCall = runExecMock.mock.calls[0] ?? [];
|
||||
expect(countCall[0]).toBe("bash");
|
||||
|
||||
// Second call: kill suspended processes
|
||||
const killCall = runExecMock.mock.calls[1] ?? [];
|
||||
expect(killCall[0]).toBe("bash");
|
||||
expect(killCall[0]).toBe("kill");
|
||||
expect(killCall[1]).toEqual(["-9", "40"]);
|
||||
});
|
||||
|
||||
it("matches resumeArgs with positional session id", async () => {
|
||||
runExecMock
|
||||
.mockResolvedValueOnce({
|
||||
stdout: [
|
||||
" 50 T codex exec resume thread-99 --color never --sandbox read-only",
|
||||
" 51 T codex exec resume other --color never --sandbox read-only",
|
||||
].join("\n"),
|
||||
stderr: "",
|
||||
})
|
||||
.mockResolvedValueOnce({ stdout: "", stderr: "" });
|
||||
|
||||
await cleanupSuspendedCliProcesses(
|
||||
{
|
||||
command: "codex",
|
||||
resumeArgs: [
|
||||
"exec",
|
||||
"resume",
|
||||
"{sessionId}",
|
||||
"--color",
|
||||
"never",
|
||||
"--sandbox",
|
||||
"read-only",
|
||||
],
|
||||
} as CliBackendConfig,
|
||||
1,
|
||||
);
|
||||
|
||||
if (process.platform === "win32") {
|
||||
expect(runExecMock).not.toHaveBeenCalled();
|
||||
return;
|
||||
}
|
||||
|
||||
expect(runExecMock).toHaveBeenCalledTimes(2);
|
||||
const killCall = runExecMock.mock.calls[1] ?? [];
|
||||
expect(killCall[0]).toBe("kill");
|
||||
expect(killCall[1]).toEqual(["-9", "50", "51"]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -209,7 +209,6 @@ export async function runCliAgent(params: {
|
||||
|
||||
// Cleanup suspended processes that have accumulated (regardless of sessionId)
|
||||
await cleanupSuspendedCliProcesses(backend);
|
||||
|
||||
if (useResume && cliSessionIdToSend) {
|
||||
await cleanupResumeProcesses(backend, cliSessionIdToSend);
|
||||
}
|
||||
|
||||
@@ -44,42 +44,82 @@ export async function cleanupResumeProcesses(
|
||||
}
|
||||
}
|
||||
|
||||
function buildSessionMatchers(backend: CliBackendConfig): RegExp[] {
|
||||
const commandToken = path.basename(backend.command ?? "").trim();
|
||||
if (!commandToken) return [];
|
||||
const matchers: RegExp[] = [];
|
||||
const sessionArg = backend.sessionArg?.trim();
|
||||
const sessionArgs = backend.sessionArgs ?? [];
|
||||
const resumeArgs = backend.resumeArgs ?? [];
|
||||
|
||||
const addMatcher = (args: string[]) => {
|
||||
if (args.length === 0) return;
|
||||
const tokens = [commandToken, ...args];
|
||||
const pattern = tokens
|
||||
.map((token, index) => {
|
||||
const tokenPattern = tokenToRegex(token);
|
||||
return index === 0 ? `(?:^|\\s)${tokenPattern}` : `\\s+${tokenPattern}`;
|
||||
})
|
||||
.join("");
|
||||
matchers.push(new RegExp(pattern));
|
||||
};
|
||||
|
||||
if (sessionArgs.some((arg) => arg.includes("{sessionId}"))) {
|
||||
addMatcher(sessionArgs);
|
||||
} else if (sessionArg) {
|
||||
addMatcher([sessionArg, "{sessionId}"]);
|
||||
}
|
||||
|
||||
if (resumeArgs.some((arg) => arg.includes("{sessionId}"))) {
|
||||
addMatcher(resumeArgs);
|
||||
}
|
||||
|
||||
return matchers;
|
||||
}
|
||||
|
||||
function tokenToRegex(token: string): string {
|
||||
if (!token.includes("{sessionId}")) return escapeRegex(token);
|
||||
const parts = token.split("{sessionId}").map((part) => escapeRegex(part));
|
||||
return parts.join("\\S+");
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup suspended Clawdbot CLI processes that have accumulated.
|
||||
* Only cleans up if there are more than the threshold (default: 10).
|
||||
* Uses --session-id pattern to only target Clawdbot processes, not user's Claude Code sessions.
|
||||
*/
|
||||
export async function cleanupSuspendedCliProcesses(
|
||||
backend: CliBackendConfig,
|
||||
threshold = 10,
|
||||
): Promise<void> {
|
||||
if (process.platform === "win32") return;
|
||||
const commandToken = path.basename(backend.command ?? "").trim();
|
||||
if (!commandToken) return;
|
||||
|
||||
// Pattern includes --session-id to only match Clawdbot processes
|
||||
const pattern = `[${commandToken[0]}]${commandToken.slice(1)}.*--session-id`;
|
||||
const matchers = buildSessionMatchers(backend);
|
||||
if (matchers.length === 0) return;
|
||||
|
||||
try {
|
||||
// Count suspended Clawdbot processes
|
||||
const { stdout } = await runExec("bash", [
|
||||
"-c",
|
||||
`ps aux | grep -E '${pattern}' | grep -E '\\s+T\\s+' | wc -l`,
|
||||
]);
|
||||
const count = parseInt(stdout.trim(), 10) || 0;
|
||||
const { stdout } = await runExec("ps", ["-ax", "-o", "pid=,stat=,command="]);
|
||||
const suspended: number[] = [];
|
||||
for (const line of stdout.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
const match = /^(\d+)\s+(\S+)\s+(.*)$/.exec(trimmed);
|
||||
if (!match) continue;
|
||||
const pid = Number(match[1]);
|
||||
const stat = match[2] ?? "";
|
||||
const command = match[3] ?? "";
|
||||
if (!Number.isFinite(pid)) continue;
|
||||
if (!stat.includes("T")) continue;
|
||||
if (!matchers.some((matcher) => matcher.test(command))) continue;
|
||||
suspended.push(pid);
|
||||
}
|
||||
|
||||
if (count > threshold) {
|
||||
// Kill suspended Clawdbot processes only
|
||||
await runExec("bash", [
|
||||
"-c",
|
||||
`ps aux | grep -E '${pattern}' | grep -E '\\s+T\\s+' | awk '{print $2}' | xargs kill -9 2>/dev/null`,
|
||||
]);
|
||||
if (suspended.length > threshold) {
|
||||
// Verified locally: stopped (T) processes ignore SIGTERM, so use SIGKILL.
|
||||
await runExec("kill", ["-9", ...suspended.map((pid) => String(pid))]);
|
||||
}
|
||||
} catch {
|
||||
// ignore errors - best effort cleanup
|
||||
}
|
||||
}
|
||||
|
||||
export function enqueueCliRun<T>(key: string, task: () => Promise<T>): Promise<T> {
|
||||
const prior = CLI_RUN_QUEUE.get(key) ?? Promise.resolve();
|
||||
const chained = prior.catch(() => undefined).then(task);
|
||||
|
||||
Reference in New Issue
Block a user