diff --git a/CHANGELOG.md b/CHANGELOG.md index 96c2b7f35..61183c1ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - Auth/Status: keep auth profiles sticky per session (rotate on compaction/new), surface provider usage headers in `/status` and `clawdbot models status`, and update docs. - Fix: guard model fallback against undefined provider/model values. (#954) — thanks @roshanasingh4. - Fix: refactor session store updates, add chat.inject, and harden subagent cleanup flow. (#944) — thanks @tyler6204. +- Fix: clean up suspended CLI processes across backends. (#978) — thanks @Nachx639. - Memory: make `node-llama-cpp` an optional dependency (avoid Node 25 install failures) and improve local-embeddings fallback/errors. - Browser: add `snapshot refs=aria` (Playwright aria-ref ids) for self-resolving refs across `snapshot` → `act`. - Browser: `profile="chrome"` now defaults to host control and returns clearer “attach a tab” errors. diff --git a/src/agents/cli-runner.test.ts b/src/agents/cli-runner.test.ts index d1fa88316..b7a505e11 100644 --- a/src/agents/cli-runner.test.ts +++ b/src/agents/cli-runner.test.ts @@ -1,6 +1,8 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { CliBackendConfig } from "../config/types.js"; import { runCliAgent } from "./cli-runner.js"; +import { cleanupSuspendedCliProcesses } from "./cli-runner/helpers.js"; const runCommandWithTimeoutMock = vi.fn(); const runExecMock = vi.fn(); @@ -17,9 +19,12 @@ describe("runCliAgent resume cleanup", () => { }); it("kills stale resume processes for codex sessions", async () => { - // First call is for cleanupSuspendedCliProcesses (returns count 0) - // Second call is for cleanupResumeProcesses (pkill) - runExecMock.mockResolvedValue({ stdout: "0", stderr: "" }); + runExecMock + .mockResolvedValueOnce({ + stdout: " 1 S /bin/launchd\n", + stderr: "", + }) // cleanupSuspendedCliProcesses (ps) + .mockResolvedValueOnce({ stdout: "", stderr: "" }); // cleanupResumeProcesses (pkill) runCommandWithTimeoutMock.mockResolvedValueOnce({ stdout: "ok", stderr: "", @@ -45,11 +50,7 @@ describe("runCliAgent resume cleanup", () => { return; } - // First call: cleanupSuspendedCliProcesses (bash to count) - // Second call: cleanupResumeProcesses (pkill) expect(runExecMock).toHaveBeenCalledTimes(2); - - // Verify the pkill call for resume cleanup const pkillCall = runExecMock.mock.calls[1] ?? []; expect(pkillCall[0]).toBe("pkill"); const pkillArgs = pkillCall[1] as string[]; @@ -58,45 +59,94 @@ describe("runCliAgent resume cleanup", () => { expect(pkillArgs[1]).toContain("resume"); expect(pkillArgs[1]).toContain("thread-123"); }); +}); - it("cleans up suspended processes when threshold exceeded", async () => { - // Return count > 10 to trigger cleanup - runExecMock - .mockResolvedValueOnce({ stdout: "15", stderr: "" }) // count suspended - .mockResolvedValueOnce({ stdout: "", stderr: "" }); // kill command - runCommandWithTimeoutMock.mockResolvedValueOnce({ - stdout: "ok", - stderr: "", - code: 0, - signal: null, - killed: false, - }); +describe("cleanupSuspendedCliProcesses", () => { + beforeEach(() => { + runExecMock.mockReset(); + }); - await runCliAgent({ - sessionId: "s1", - sessionFile: "/tmp/session.jsonl", - workspaceDir: "/tmp", - prompt: "hi", - provider: "claude-cli", - timeoutMs: 1_000, - runId: "run-1", - }); + it("skips when no session tokens are configured", async () => { + await cleanupSuspendedCliProcesses( + { + command: "tool", + } as CliBackendConfig, + 0, + ); + + if (process.platform === "win32") { + expect(runExecMock).not.toHaveBeenCalled(); + return; + } + + expect(runExecMock).not.toHaveBeenCalled(); + }); + + it("matches sessionArg-based commands", async () => { + runExecMock + .mockResolvedValueOnce({ + stdout: [ + " 40 T+ claude --session-id thread-1 -p", + " 41 S claude --session-id thread-2 -p", + ].join("\n"), + stderr: "", + }) + .mockResolvedValueOnce({ stdout: "", stderr: "" }); + + await cleanupSuspendedCliProcesses( + { + command: "claude", + sessionArg: "--session-id", + } as CliBackendConfig, + 0, + ); if (process.platform === "win32") { expect(runExecMock).not.toHaveBeenCalled(); return; } - // cleanupSuspendedCliProcesses: count + kill (2 calls) - // cleanupResumeProcesses: not called for claude-cli (no resumeArgs) expect(runExecMock).toHaveBeenCalledTimes(2); - - // First call: count suspended processes - const countCall = runExecMock.mock.calls[0] ?? []; - expect(countCall[0]).toBe("bash"); - - // Second call: kill suspended processes const killCall = runExecMock.mock.calls[1] ?? []; - expect(killCall[0]).toBe("bash"); + expect(killCall[0]).toBe("kill"); + expect(killCall[1]).toEqual(["-9", "40"]); + }); + + it("matches resumeArgs with positional session id", async () => { + runExecMock + .mockResolvedValueOnce({ + stdout: [ + " 50 T codex exec resume thread-99 --color never --sandbox read-only", + " 51 T codex exec resume other --color never --sandbox read-only", + ].join("\n"), + stderr: "", + }) + .mockResolvedValueOnce({ stdout: "", stderr: "" }); + + await cleanupSuspendedCliProcesses( + { + command: "codex", + resumeArgs: [ + "exec", + "resume", + "{sessionId}", + "--color", + "never", + "--sandbox", + "read-only", + ], + } as CliBackendConfig, + 1, + ); + + if (process.platform === "win32") { + expect(runExecMock).not.toHaveBeenCalled(); + return; + } + + expect(runExecMock).toHaveBeenCalledTimes(2); + const killCall = runExecMock.mock.calls[1] ?? []; + expect(killCall[0]).toBe("kill"); + expect(killCall[1]).toEqual(["-9", "50", "51"]); }); }); diff --git a/src/agents/cli-runner.ts b/src/agents/cli-runner.ts index ab62a2e9f..8902aa573 100644 --- a/src/agents/cli-runner.ts +++ b/src/agents/cli-runner.ts @@ -209,7 +209,6 @@ export async function runCliAgent(params: { // Cleanup suspended processes that have accumulated (regardless of sessionId) await cleanupSuspendedCliProcesses(backend); - if (useResume && cliSessionIdToSend) { await cleanupResumeProcesses(backend, cliSessionIdToSend); } diff --git a/src/agents/cli-runner/helpers.ts b/src/agents/cli-runner/helpers.ts index ea9e1cf91..6cb3278c9 100644 --- a/src/agents/cli-runner/helpers.ts +++ b/src/agents/cli-runner/helpers.ts @@ -44,42 +44,82 @@ export async function cleanupResumeProcesses( } } +function buildSessionMatchers(backend: CliBackendConfig): RegExp[] { + const commandToken = path.basename(backend.command ?? "").trim(); + if (!commandToken) return []; + const matchers: RegExp[] = []; + const sessionArg = backend.sessionArg?.trim(); + const sessionArgs = backend.sessionArgs ?? []; + const resumeArgs = backend.resumeArgs ?? []; + + const addMatcher = (args: string[]) => { + if (args.length === 0) return; + const tokens = [commandToken, ...args]; + const pattern = tokens + .map((token, index) => { + const tokenPattern = tokenToRegex(token); + return index === 0 ? `(?:^|\\s)${tokenPattern}` : `\\s+${tokenPattern}`; + }) + .join(""); + matchers.push(new RegExp(pattern)); + }; + + if (sessionArgs.some((arg) => arg.includes("{sessionId}"))) { + addMatcher(sessionArgs); + } else if (sessionArg) { + addMatcher([sessionArg, "{sessionId}"]); + } + + if (resumeArgs.some((arg) => arg.includes("{sessionId}"))) { + addMatcher(resumeArgs); + } + + return matchers; +} + +function tokenToRegex(token: string): string { + if (!token.includes("{sessionId}")) return escapeRegex(token); + const parts = token.split("{sessionId}").map((part) => escapeRegex(part)); + return parts.join("\\S+"); +} + /** * Cleanup suspended Clawdbot CLI processes that have accumulated. * Only cleans up if there are more than the threshold (default: 10). - * Uses --session-id pattern to only target Clawdbot processes, not user's Claude Code sessions. */ export async function cleanupSuspendedCliProcesses( backend: CliBackendConfig, threshold = 10, ): Promise { if (process.platform === "win32") return; - const commandToken = path.basename(backend.command ?? "").trim(); - if (!commandToken) return; - - // Pattern includes --session-id to only match Clawdbot processes - const pattern = `[${commandToken[0]}]${commandToken.slice(1)}.*--session-id`; + const matchers = buildSessionMatchers(backend); + if (matchers.length === 0) return; try { - // Count suspended Clawdbot processes - const { stdout } = await runExec("bash", [ - "-c", - `ps aux | grep -E '${pattern}' | grep -E '\\s+T\\s+' | wc -l`, - ]); - const count = parseInt(stdout.trim(), 10) || 0; + const { stdout } = await runExec("ps", ["-ax", "-o", "pid=,stat=,command="]); + const suspended: number[] = []; + for (const line of stdout.split("\n")) { + const trimmed = line.trim(); + if (!trimmed) continue; + const match = /^(\d+)\s+(\S+)\s+(.*)$/.exec(trimmed); + if (!match) continue; + const pid = Number(match[1]); + const stat = match[2] ?? ""; + const command = match[3] ?? ""; + if (!Number.isFinite(pid)) continue; + if (!stat.includes("T")) continue; + if (!matchers.some((matcher) => matcher.test(command))) continue; + suspended.push(pid); + } - if (count > threshold) { - // Kill suspended Clawdbot processes only - await runExec("bash", [ - "-c", - `ps aux | grep -E '${pattern}' | grep -E '\\s+T\\s+' | awk '{print $2}' | xargs kill -9 2>/dev/null`, - ]); + if (suspended.length > threshold) { + // Verified locally: stopped (T) processes ignore SIGTERM, so use SIGKILL. + await runExec("kill", ["-9", ...suspended.map((pid) => String(pid))]); } } catch { // ignore errors - best effort cleanup } } - export function enqueueCliRun(key: string, task: () => Promise): Promise { const prior = CLI_RUN_QUEUE.get(key) ?? Promise.resolve(); const chained = prior.catch(() => undefined).then(task);