fix: cleanup stale resume cli processes

This commit is contained in:
Peter Steinberger
2026-01-13 02:21:20 +00:00
parent 3c8d0083cb
commit 8edf2146ae
2 changed files with 92 additions and 1 deletions

View File

@@ -0,0 +1,56 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { runCliAgent } from "./cli-runner.js";
const runCommandWithTimeoutMock = vi.fn();
const runExecMock = vi.fn();
vi.mock("../process/exec.js", () => ({
runCommandWithTimeout: (...args: unknown[]) =>
runCommandWithTimeoutMock(...args),
runExec: (...args: unknown[]) => runExecMock(...args),
}));
describe("runCliAgent resume cleanup", () => {
beforeEach(() => {
runCommandWithTimeoutMock.mockReset();
runExecMock.mockReset();
});
it("kills stale resume processes for codex sessions", async () => {
runExecMock.mockResolvedValue({ stdout: "", stderr: "" });
runCommandWithTimeoutMock.mockResolvedValueOnce({
stdout: "ok",
stderr: "",
code: 0,
signal: null,
killed: false,
});
await runCliAgent({
sessionId: "s1",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp",
prompt: "hi",
provider: "codex-cli",
model: "gpt-5.2-codex",
timeoutMs: 1_000,
runId: "run-1",
cliSessionId: "thread-123",
});
if (process.platform === "win32") {
expect(runExecMock).not.toHaveBeenCalled();
return;
}
expect(runExecMock).toHaveBeenCalledTimes(1);
const args = runExecMock.mock.calls[0] ?? [];
expect(args[0]).toBe("pkill");
const pkillArgs = args[1] as string[];
expect(pkillArgs[0]).toBe("-f");
expect(pkillArgs[1]).toContain("codex");
expect(pkillArgs[1]).toContain("resume");
expect(pkillArgs[1]).toContain("thread-123");
});
});

View File

@@ -11,7 +11,7 @@ import type { ClawdbotConfig } from "../config/config.js";
import type { CliBackendConfig } from "../config/types.js";
import { shouldLogVerbose } from "../globals.js";
import { createSubsystemLogger } from "../logging.js";
import { runCommandWithTimeout } from "../process/exec.js";
import { runCommandWithTimeout, runExec } from "../process/exec.js";
import { resolveUserPath } from "../utils.js";
import { resolveSessionAgentIds } from "./agent-scope.js";
import { resolveCliBackendConfig } from "./cli-backends.js";
@@ -32,6 +32,37 @@ import {
const log = createSubsystemLogger("agent/claude-cli");
const CLI_RUN_QUEUE = new Map<string, Promise<unknown>>();
function escapeRegex(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
async function cleanupResumeProcesses(
backend: CliBackendConfig,
sessionId: string,
): Promise<void> {
if (process.platform === "win32") return;
const resumeArgs = backend.resumeArgs ?? [];
if (resumeArgs.length === 0) return;
if (!resumeArgs.some((arg) => arg.includes("{sessionId}"))) return;
const commandToken = path.basename(backend.command ?? "").trim();
if (!commandToken) return;
const resumeTokens = resumeArgs.map((arg) =>
arg.replaceAll("{sessionId}", sessionId),
);
const pattern = [commandToken, ...resumeTokens]
.filter(Boolean)
.map((token) => escapeRegex(token))
.join(".*");
if (!pattern) return;
try {
await runExec("pkill", ["-f", pattern]);
} catch {
// ignore missing pkill or no matches
}
}
function enqueueCliRun<T>(key: string, task: () => Promise<T>): Promise<T> {
const prior = CLI_RUN_QUEUE.get(key) ?? Promise.resolve();
const chained = prior.catch(() => undefined).then(task);
@@ -602,6 +633,10 @@ export async function runCliAgent(params: {
return next;
})();
if (useResume && cliSessionIdToSend) {
await cleanupResumeProcesses(backend, cliSessionIdToSend);
}
const result = await runCommandWithTimeout([backend.command, ...args], {
timeoutMs: params.timeoutMs,
cwd: workspaceDir,