fix: cleanup suspended Clawdbot CLI processes
Add cleanupSuspendedCliProcesses() to kill accumulated suspended processes from isolated sessions that don't share sessionIds (e.g., cron jobs). - Only targets Clawdbot processes (--session-id pattern) - Only kills suspended processes (state T) - Only triggers when >10 processes accumulated - Does not affect user's Claude Code sessions Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
committed by
Peter Steinberger
parent
a70fcc8ae0
commit
8befe7f8a7
@@ -17,7 +17,9 @@ describe("runCliAgent resume cleanup", () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it("kills stale resume processes for codex sessions", async () => {
|
it("kills stale resume processes for codex sessions", async () => {
|
||||||
runExecMock.mockResolvedValue({ stdout: "", stderr: "" });
|
// First call is for cleanupSuspendedCliProcesses (returns count 0)
|
||||||
|
// Second call is for cleanupResumeProcesses (pkill)
|
||||||
|
runExecMock.mockResolvedValue({ stdout: "0", stderr: "" });
|
||||||
runCommandWithTimeoutMock.mockResolvedValueOnce({
|
runCommandWithTimeoutMock.mockResolvedValueOnce({
|
||||||
stdout: "ok",
|
stdout: "ok",
|
||||||
stderr: "",
|
stderr: "",
|
||||||
@@ -43,13 +45,58 @@ describe("runCliAgent resume cleanup", () => {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(runExecMock).toHaveBeenCalledTimes(1);
|
// First call: cleanupSuspendedCliProcesses (bash to count)
|
||||||
const args = runExecMock.mock.calls[0] ?? [];
|
// Second call: cleanupResumeProcesses (pkill)
|
||||||
expect(args[0]).toBe("pkill");
|
expect(runExecMock).toHaveBeenCalledTimes(2);
|
||||||
const pkillArgs = args[1] as string[];
|
|
||||||
|
// Verify the pkill call for resume cleanup
|
||||||
|
const pkillCall = runExecMock.mock.calls[1] ?? [];
|
||||||
|
expect(pkillCall[0]).toBe("pkill");
|
||||||
|
const pkillArgs = pkillCall[1] as string[];
|
||||||
expect(pkillArgs[0]).toBe("-f");
|
expect(pkillArgs[0]).toBe("-f");
|
||||||
expect(pkillArgs[1]).toContain("codex");
|
expect(pkillArgs[1]).toContain("codex");
|
||||||
expect(pkillArgs[1]).toContain("resume");
|
expect(pkillArgs[1]).toContain("resume");
|
||||||
expect(pkillArgs[1]).toContain("thread-123");
|
expect(pkillArgs[1]).toContain("thread-123");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("cleans up suspended processes when threshold exceeded", async () => {
|
||||||
|
// Return count > 10 to trigger cleanup
|
||||||
|
runExecMock
|
||||||
|
.mockResolvedValueOnce({ stdout: "15", stderr: "" }) // count suspended
|
||||||
|
.mockResolvedValueOnce({ stdout: "", stderr: "" }); // kill command
|
||||||
|
runCommandWithTimeoutMock.mockResolvedValueOnce({
|
||||||
|
stdout: "ok",
|
||||||
|
stderr: "",
|
||||||
|
code: 0,
|
||||||
|
signal: null,
|
||||||
|
killed: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
await runCliAgent({
|
||||||
|
sessionId: "s1",
|
||||||
|
sessionFile: "/tmp/session.jsonl",
|
||||||
|
workspaceDir: "/tmp",
|
||||||
|
prompt: "hi",
|
||||||
|
provider: "claude-cli",
|
||||||
|
timeoutMs: 1_000,
|
||||||
|
runId: "run-1",
|
||||||
|
});
|
||||||
|
|
||||||
|
if (process.platform === "win32") {
|
||||||
|
expect(runExecMock).not.toHaveBeenCalled();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanupSuspendedCliProcesses: count + kill (2 calls)
|
||||||
|
// cleanupResumeProcesses: not called for claude-cli (no resumeArgs)
|
||||||
|
expect(runExecMock).toHaveBeenCalledTimes(2);
|
||||||
|
|
||||||
|
// First call: count suspended processes
|
||||||
|
const countCall = runExecMock.mock.calls[0] ?? [];
|
||||||
|
expect(countCall[0]).toBe("bash");
|
||||||
|
|
||||||
|
// Second call: kill suspended processes
|
||||||
|
const killCall = runExecMock.mock.calls[1] ?? [];
|
||||||
|
expect(killCall[0]).toBe("bash");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import {
|
|||||||
buildCliArgs,
|
buildCliArgs,
|
||||||
buildSystemPrompt,
|
buildSystemPrompt,
|
||||||
cleanupResumeProcesses,
|
cleanupResumeProcesses,
|
||||||
|
cleanupSuspendedCliProcesses,
|
||||||
enqueueCliRun,
|
enqueueCliRun,
|
||||||
normalizeCliModel,
|
normalizeCliModel,
|
||||||
parseCliJson,
|
parseCliJson,
|
||||||
@@ -206,6 +207,9 @@ export async function runCliAgent(params: {
|
|||||||
return next;
|
return next;
|
||||||
})();
|
})();
|
||||||
|
|
||||||
|
// Cleanup suspended processes that have accumulated (regardless of sessionId)
|
||||||
|
await cleanupSuspendedCliProcesses(backend);
|
||||||
|
|
||||||
if (useResume && cliSessionIdToSend) {
|
if (useResume && cliSessionIdToSend) {
|
||||||
await cleanupResumeProcesses(backend, cliSessionIdToSend);
|
await cleanupResumeProcesses(backend, cliSessionIdToSend);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,6 +44,42 @@ export async function cleanupResumeProcesses(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleanup suspended Clawdbot CLI processes that have accumulated.
|
||||||
|
* Only cleans up if there are more than the threshold (default: 10).
|
||||||
|
* Uses --session-id pattern to only target Clawdbot processes, not user's Claude Code sessions.
|
||||||
|
*/
|
||||||
|
export async function cleanupSuspendedCliProcesses(
|
||||||
|
backend: CliBackendConfig,
|
||||||
|
threshold = 10,
|
||||||
|
): Promise<void> {
|
||||||
|
if (process.platform === "win32") return;
|
||||||
|
const commandToken = path.basename(backend.command ?? "").trim();
|
||||||
|
if (!commandToken) return;
|
||||||
|
|
||||||
|
// Pattern includes --session-id to only match Clawdbot processes
|
||||||
|
const pattern = `[${commandToken[0]}]${commandToken.slice(1)}.*--session-id`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Count suspended Clawdbot processes
|
||||||
|
const { stdout } = await runExec("bash", [
|
||||||
|
"-c",
|
||||||
|
`ps aux | grep -E '${pattern}' | grep -E '\\s+T\\s+' | wc -l`,
|
||||||
|
]);
|
||||||
|
const count = parseInt(stdout.trim(), 10) || 0;
|
||||||
|
|
||||||
|
if (count > threshold) {
|
||||||
|
// Kill suspended Clawdbot processes only
|
||||||
|
await runExec("bash", [
|
||||||
|
"-c",
|
||||||
|
`ps aux | grep -E '${pattern}' | grep -E '\\s+T\\s+' | awk '{print $2}' | xargs kill -9 2>/dev/null`,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore errors - best effort cleanup
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function enqueueCliRun<T>(key: string, task: () => Promise<T>): Promise<T> {
|
export function enqueueCliRun<T>(key: string, task: () => Promise<T>): Promise<T> {
|
||||||
const prior = CLI_RUN_QUEUE.get(key) ?? Promise.resolve();
|
const prior = CLI_RUN_QUEUE.get(key) ?? Promise.resolve();
|
||||||
const chained = prior.catch(() => undefined).then(task);
|
const chained = prior.catch(() => undefined).then(task);
|
||||||
|
|||||||
Reference in New Issue
Block a user