fix(agent): serialize runs per session

This commit is contained in:
Peter Steinberger
2025-12-25 23:50:52 +01:00
parent 9fa9199747
commit 198f8ea700
6 changed files with 288 additions and 255 deletions

View File

@@ -131,6 +131,7 @@ Controls the embedded agent runtime (provider/model/thinking/verbose/timeouts).
timeoutSeconds: 600, timeoutSeconds: 600,
mediaMaxMb: 5, mediaMaxMb: 5,
heartbeatMinutes: 30, heartbeatMinutes: 30,
maxConcurrent: 3,
bash: { bash: {
backgroundMs: 20000, backgroundMs: 20000,
timeoutSec: 1800, timeoutSec: 1800,
@@ -146,6 +147,10 @@ Controls the embedded agent runtime (provider/model/thinking/verbose/timeouts).
- `timeoutSec`: auto-kill after this runtime (seconds, default 1800) - `timeoutSec`: auto-kill after this runtime (seconds, default 1800)
- `cleanupMs`: how long to keep finished sessions in memory (ms, default 1800000) - `cleanupMs`: how long to keep finished sessions in memory (ms, default 1800000)
`agent.maxConcurrent` sets the maximum number of embedded agent runs that can
execute in parallel across sessions. Each session is still serialized (one run
per session key at a time). Default: 1.
### `models` (custom providers + base URLs) ### `models` (custom providers + base URLs)
Clawdis uses the **pi-coding-agent** model catalog. You can add custom providers Clawdis uses the **pi-coding-agent** model catalog. You can add custom providers

View File

@@ -5,22 +5,24 @@ read_when:
--- ---
# Command Queue (2025-11-25) # Command Queue (2025-11-25)
We now serialize all command-based auto-replies (WhatsApp Web listener) through a tiny in-process queue to prevent multiple commands from running at once. We now serialize command-based auto-replies (WhatsApp Web listener) through a tiny in-process queue to prevent multiple commands from running at once, while allowing safe parallelism across sessions.
## Why ## Why
- Some auto-reply commands are expensive (LLM calls) and can collide when multiple inbound messages arrive close together. - Some auto-reply commands are expensive (LLM calls) and can collide when multiple inbound messages arrive close together.
- Serializing avoids competing for terminal/stdin, keeps logs readable, and reduces the chance of rate limits from upstream tools. - Serializing avoids competing for terminal/stdin, keeps logs readable, and reduces the chance of rate limits from upstream tools.
## How it works ## How it works
- `src/process/command-queue.ts` holds a single FIFO queue and drains it synchronously; only one task runs at a time. - `src/process/command-queue.ts` holds a lane-aware FIFO queue and drains each lane synchronously.
- `getReplyFromConfig` wraps command execution with `enqueueCommand(...)`, so every config-driven command reply flows through the queue automatically. - `runEmbeddedPiAgent` enqueues by **session key** (lane `session:<key>`) to guarantee only one active run per session.
- Each session run is then queued into a **global lane** (`main` by default) so overall parallelism is capped by `agent.maxConcurrent`.
- When verbose logging is enabled, queued commands emit a short notice if they waited more than ~2s before starting. - When verbose logging is enabled, queued commands emit a short notice if they waited more than ~2s before starting.
- Typing indicators (`onReplyStart`) still fire immediately on enqueue so user experience is unchanged while we wait our turn. - Typing indicators (`onReplyStart`) still fire immediately on enqueue so user experience is unchanged while we wait our turn.
## Scope and guarantees ## Scope and guarantees
- Applies only to config-driven command replies; plain text replies are unaffected. - Applies only to config-driven command replies; plain text replies are unaffected.
- Default lane (`main`) is process-wide for inbound + main heartbeats to keep the primary workflow serialized. - Default lane (`main`) is process-wide for inbound + main heartbeats; set `agent.maxConcurrent` to allow multiple sessions in parallel.
- Additional lanes may exist (e.g. `cron`) so background jobs can run in parallel without blocking inbound replies. - Additional lanes may exist (e.g. `cron`) so background jobs can run in parallel without blocking inbound replies.
- Per-session lanes guarantee that only one agent run touches a given session at a time.
- No external dependencies or background worker threads; pure TypeScript + promises. - No external dependencies or background worker threads; pure TypeScript + promises.
## Troubleshooting ## Troubleshooting

View File

@@ -25,7 +25,10 @@ import { formatToolAggregate } from "../auto-reply/tool-meta.js";
import type { ClawdisConfig } from "../config/config.js"; import type { ClawdisConfig } from "../config/config.js";
import { getMachineDisplayName } from "../infra/machine-name.js"; import { getMachineDisplayName } from "../infra/machine-name.js";
import { splitMediaFromOutput } from "../media/parse.js"; import { splitMediaFromOutput } from "../media/parse.js";
import { enqueueCommand } from "../process/command-queue.js"; import {
enqueueCommand,
enqueueCommandInLane,
} from "../process/command-queue.js";
import { CONFIG_DIR, resolveUserPath } from "../utils.js"; import { CONFIG_DIR, resolveUserPath } from "../utils.js";
import { resolveClawdisAgentDir } from "./agent-paths.js"; import { resolveClawdisAgentDir } from "./agent-paths.js";
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
@@ -90,6 +93,16 @@ const DEFAULT_OAUTH_DIR = path.join(CONFIG_DIR, "credentials");
let oauthStorageConfigured = false; let oauthStorageConfigured = false;
let cachedDefaultApiKey: ReturnType<typeof defaultGetApiKey> | null = null; let cachedDefaultApiKey: ReturnType<typeof defaultGetApiKey> | null = null;
function resolveSessionLane(key: string) {
const cleaned = key.trim() || "main";
return cleaned.startsWith("session:") ? cleaned : `session:${cleaned}`;
}
function resolveGlobalLane(lane?: string) {
const cleaned = lane?.trim();
return cleaned ? cleaned : "main";
}
function resolveClawdisOAuthPath(): string { function resolveClawdisOAuthPath(): string {
const overrideDir = const overrideDir =
process.env.CLAWDIS_OAUTH_DIR?.trim() || DEFAULT_OAUTH_DIR; process.env.CLAWDIS_OAUTH_DIR?.trim() || DEFAULT_OAUTH_DIR;
@@ -242,6 +255,7 @@ function resolvePromptSkills(
export async function runEmbeddedPiAgent(params: { export async function runEmbeddedPiAgent(params: {
sessionId: string; sessionId: string;
sessionKey?: string;
sessionFile: string; sessionFile: string;
workspaceDir: string; workspaceDir: string;
config?: ClawdisConfig; config?: ClawdisConfig;
@@ -267,13 +281,21 @@ export async function runEmbeddedPiAgent(params: {
stream: string; stream: string;
data: Record<string, unknown>; data: Record<string, unknown>;
}) => void; }) => void;
lane?: string;
enqueue?: typeof enqueueCommand; enqueue?: typeof enqueueCommand;
extraSystemPrompt?: string; extraSystemPrompt?: string;
ownerNumbers?: string[]; ownerNumbers?: string[];
enforceFinalTag?: boolean; enforceFinalTag?: boolean;
}): Promise<EmbeddedPiRunResult> { }): Promise<EmbeddedPiRunResult> {
const enqueue = params.enqueue ?? enqueueCommand; const sessionLane = resolveSessionLane(
return enqueue(async () => { params.sessionKey?.trim() || params.sessionId,
);
const globalLane = resolveGlobalLane(params.lane);
const enqueueGlobal =
params.enqueue ??
((task, opts) => enqueueCommandInLane(globalLane, task, opts));
return enqueueCommandInLane(sessionLane, () =>
enqueueGlobal(async () => {
const started = Date.now(); const started = Date.now();
const resolvedWorkspace = resolveUserPath(params.workspaceDir); const resolvedWorkspace = resolveUserPath(params.workspaceDir);
const prevCwd = process.cwd(); const prevCwd = process.cwd();
@@ -530,5 +552,6 @@ export async function runEmbeddedPiAgent(params: {
restoreSkillEnv?.(); restoreSkillEnv?.();
process.chdir(prevCwd); process.chdir(prevCwd);
} }
}); }),
);
} }

View File

@@ -983,6 +983,7 @@ export async function getReplyFromConfig(
const runId = crypto.randomUUID(); const runId = crypto.randomUUID();
const runResult = await runEmbeddedPiAgent({ const runResult = await runEmbeddedPiAgent({
sessionId: sessionIdFinal, sessionId: sessionIdFinal,
sessionKey,
sessionFile, sessionFile,
workspaceDir, workspaceDir,
config: cfg, config: cfg,

View File

@@ -313,6 +313,7 @@ export async function agentCommand(
try { try {
result = await runEmbeddedPiAgent({ result = await runEmbeddedPiAgent({
sessionId, sessionId,
sessionKey,
sessionFile, sessionFile,
workspaceDir, workspaceDir,
config: cfg, config: cfg,

View File

@@ -146,7 +146,6 @@ export async function runCronIsolatedAgentTurn(params: {
lane?: string; lane?: string;
}): Promise<RunCronAgentTurnResult> { }): Promise<RunCronAgentTurnResult> {
const agentCfg = params.cfg.agent; const agentCfg = params.cfg.agent;
void params.lane;
const workspaceDirRaw = const workspaceDirRaw =
params.cfg.agent?.workspace ?? DEFAULT_AGENT_WORKSPACE_DIR; params.cfg.agent?.workspace ?? DEFAULT_AGENT_WORKSPACE_DIR;
const workspace = await ensureAgentWorkspace({ const workspace = await ensureAgentWorkspace({
@@ -236,11 +235,13 @@ export async function runCronIsolatedAgentTurn(params: {
); );
runResult = await runEmbeddedPiAgent({ runResult = await runEmbeddedPiAgent({
sessionId: cronSession.sessionEntry.sessionId, sessionId: cronSession.sessionEntry.sessionId,
sessionKey: params.sessionKey,
sessionFile, sessionFile,
workspaceDir, workspaceDir,
config: params.cfg, config: params.cfg,
skillsSnapshot, skillsSnapshot,
prompt: commandBody, prompt: commandBody,
lane: params.lane ?? "cron",
provider, provider,
model, model,
thinkLevel, thinkLevel,