diff --git a/CHANGELOG.md b/CHANGELOG.md index e288f110c..64d343056 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Sessions: primary session key is fixed to `main` (or `global` for global scope); `session.mainKey` is ignored. ### Features +- Highlight: agent-to-agent ping-pong (reply-back loop) with `REPLY_SKIP` plus target announce step with `ANNOUNCE_SKIP` (max turns configurable, 0–5). - Gateway: support `gateway.port` + `CLAWDIS_GATEWAY_PORT` across CLI, TUI, and macOS app. - Gateway: add config hot reload with hybrid restart strategy (`gateway.reload`) and per-section reload handling. - UI: centralize tool display metadata and show action/detail summaries across Web Chat, SwiftUI, Android, and the TUI. @@ -20,7 +21,6 @@ - Agent: add optional per-session Docker sandbox for tool execution (`agent.sandbox`) with allow/deny policy and auto-pruning. - Agent: add sandboxed Chromium browser (CDP + optional noVNC observer) for sandboxed sessions. - Nodes: add `location.get` with Always/Precise settings on macOS/iOS/Android plus CLI/tool support. -- Sessions: add agent‑to‑agent post step with `ANNOUNCE_SKIP` to suppress channel announcements. ### Fixes - CI: fix lint ordering after merge cleanup (#156) — thanks @steipete. @@ -72,7 +72,8 @@ - Queue: clarify steer-backlog behavior with inline commands and update examples for streaming surfaces. - Sandbox: document per-session agent sandbox setup, browser image, and Docker build. - macOS: clarify menu bar uses sessionKey from agent events. -- Sessions: document agent-to-agent post step and `ANNOUNCE_SKIP`. +- Sessions: document agent-to-agent reply loop (`REPLY_SKIP`) and announce step (`ANNOUNCE_SKIP`). +- Skills: clarify wacli third-party messaging scope and JID format examples. ## 2.0.0-beta5 — 2026-01-03 diff --git a/docs/configuration.md b/docs/configuration.md index 297aebbaf..74f49d3b9 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -621,6 +621,10 @@ Controls session scoping, idle expiry, reset triggers, and where the session sto resetTriggers: ["/new", "/reset"], store: "~/.clawdis/sessions/sessions.json", // mainKey is ignored; primary key is fixed to "main" + agentToAgent: { + // Max ping-pong reply turns between requester/target (0–5). + maxPingPongTurns: 5 + }, sendPolicy: { rules: [ { action: "deny", match: { surface: "discord", chatType: "group" } } @@ -632,6 +636,7 @@ Controls session scoping, idle expiry, reset triggers, and where the session sto ``` Fields: +- `agentToAgent.maxPingPongTurns`: max reply-back turns between requester/target (0–5, default 5). - `sendPolicy.default`: `allow` or `deny` fallback when no rule matches. - `sendPolicy.rules[]`: match by `surface` (provider), `chatType` (`direct|group|room`), or `keyPrefix` (e.g. `cron:`). First deny wins; otherwise allow. diff --git a/docs/session-tool.md b/docs/session-tool.md index ef12fa776..5b3c11d37 100644 --- a/docs/session-tool.md +++ b/docs/session-tool.md @@ -76,11 +76,14 @@ Behavior: - If the run fails: `{ runId, status: "error", error }`. - Waits via gateway `agent.wait` (server-side) so reconnects don't drop the wait. - Agent-to-agent message context is injected for the primary run. -- After the primary run completes, Clawdis starts an **agent-to-agent post step**: - - The agent can reply with the announcement to post to the target session. - - To stay silent, reply exactly `ANNOUNCE_SKIP`. +- After the primary run completes, Clawdis runs a **reply-back loop**: + - Round 2+ alternates between requester and target agents. + - Reply exactly `REPLY_SKIP` to stop the ping‑pong. + - Max turns is `session.agentToAgent.maxPingPongTurns` (0–5, default 5). +- Once the loop ends, Clawdis runs the **agent‑to‑agent announce step** (target agent only): + - Reply exactly `ANNOUNCE_SKIP` to stay silent. - Any other reply is sent to the target channel. - - The post step includes the original request and round‑1 reply in context. + - Announce step includes the original request + round‑1 reply + latest ping‑pong reply. ## Provider Field - For groups, `provider` is the `surface` recorded on the session entry. diff --git a/docs/tools.md b/docs/tools.md index fc2a92996..4d43706ff 100644 --- a/docs/tools.md +++ b/docs/tools.md @@ -119,7 +119,8 @@ Notes: - `main` is the canonical direct-chat key; global/unknown are hidden. - `messageLimit > 0` fetches last N messages per session (tool messages filtered). - `sessions_send` waits for final completion when `timeoutSeconds > 0`. -- `sessions_send` always runs a follow‑up **agent‑to‑agent post step**; reply `ANNOUNCE_SKIP` to suppress the announcement. +- `sessions_send` runs a reply‑back ping‑pong (reply `REPLY_SKIP` to stop; max turns via `session.agentToAgent.maxPingPongTurns`, 0–5). +- After the ping‑pong, the target agent runs an **announce step**; reply `ANNOUNCE_SKIP` to suppress the announcement. ### `discord` Send Discord reactions, stickers, or polls. diff --git a/src/agents/clawdis-tools.sessions.test.ts b/src/agents/clawdis-tools.sessions.test.ts index e764e20f8..93dc225af 100644 --- a/src/agents/clawdis-tools.sessions.test.ts +++ b/src/agents/clawdis-tools.sessions.test.ts @@ -7,7 +7,11 @@ vi.mock("../gateway/call.js", () => ({ vi.mock("../config/config.js", () => ({ loadConfig: () => ({ - session: { mainKey: "main", scope: "per-sender" }, + session: { + mainKey: "main", + scope: "per-sender", + agentToAgent: { maxPingPongTurns: 2 }, + }, }), resolveGatewayPort: () => 18789, })); @@ -127,18 +131,28 @@ describe("sessions tools", () => { let agentCallCount = 0; let historyCallCount = 0; let sendCallCount = 0; - let waitRunId: string | undefined; - let nextHistoryIsWaitReply = false; + let lastWaitedRunId: string | undefined; + const replyByRunId = new Map(); + const requesterKey = "discord:group:req"; callGatewayMock.mockImplementation(async (opts: unknown) => { const request = opts as { method?: string; params?: unknown }; calls.push(request); if (request.method === "agent") { agentCallCount += 1; const runId = `run-${agentCallCount}`; - const params = request.params as { message?: string } | undefined; - if (params?.message === "wait") { - waitRunId = runId; + const params = request.params as + | { message?: string; sessionKey?: string } + | undefined; + const message = params?.message ?? ""; + let reply = "REPLY_SKIP"; + if (message === "ping" || message === "wait") { + reply = "done"; + } else if (message === "Agent-to-agent announce step.") { + reply = "ANNOUNCE_SKIP"; + } else if (params?.sessionKey === requesterKey) { + reply = "pong"; } + replyByRunId.set(runId, reply); return { runId, status: "accepted", @@ -147,15 +161,13 @@ describe("sessions tools", () => { } if (request.method === "agent.wait") { const params = request.params as { runId?: string } | undefined; - if (params?.runId && params.runId === waitRunId) { - nextHistoryIsWaitReply = true; - } + lastWaitedRunId = params?.runId; return { runId: params?.runId ?? "run-1", status: "ok" }; } if (request.method === "chat.history") { historyCallCount += 1; - const text = nextHistoryIsWaitReply ? "done" : "ANNOUNCE_SKIP"; - nextHistoryIsWaitReply = false; + const text = + (lastWaitedRunId && replyByRunId.get(lastWaitedRunId)) ?? ""; return { messages: [ { @@ -178,7 +190,10 @@ describe("sessions tools", () => { return {}; }); - const tool = createClawdisTools().find( + const tool = createClawdisTools({ + agentSessionKey: requesterKey, + agentSurface: "discord", + }).find( (candidate) => candidate.name === "sessions_send", ); expect(tool).toBeDefined(); @@ -191,6 +206,7 @@ describe("sessions tools", () => { }); expect(fire.details).toMatchObject({ status: "accepted", runId: "run-1" }); await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); const waitPromise = tool.execute("call6", { sessionKey: "main", @@ -204,13 +220,14 @@ describe("sessions tools", () => { }); expect(typeof (waited.details as { runId?: string }).runId).toBe("string"); await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); const agentCalls = calls.filter((call) => call.method === "agent"); const waitCalls = calls.filter((call) => call.method === "agent.wait"); const historyOnlyCalls = calls.filter( (call) => call.method === "chat.history", ); - expect(agentCalls).toHaveLength(4); + expect(agentCalls).toHaveLength(8); for (const call of agentCalls) { expect(call.params).toMatchObject({ lane: "nested" }); } @@ -229,11 +246,20 @@ describe("sessions tools", () => { typeof (call.params as { extraSystemPrompt?: string }) ?.extraSystemPrompt === "string" && (call.params as { extraSystemPrompt?: string }) - ?.extraSystemPrompt?.includes("Agent-to-agent post step"), + ?.extraSystemPrompt?.includes("Agent-to-agent reply step"), ), ).toBe(true); - expect(waitCalls).toHaveLength(3); - expect(historyOnlyCalls).toHaveLength(3); + expect( + agentCalls.some( + (call) => + typeof (call.params as { extraSystemPrompt?: string }) + ?.extraSystemPrompt === "string" && + (call.params as { extraSystemPrompt?: string }) + ?.extraSystemPrompt?.includes("Agent-to-agent announce step"), + ), + ).toBe(true); + expect(waitCalls).toHaveLength(8); + expect(historyOnlyCalls).toHaveLength(8); expect( waitCalls.some( (call) => diff --git a/src/agents/clawdis-tools.ts b/src/agents/clawdis-tools.ts index 7f785baba..062272a25 100644 --- a/src/agents/clawdis-tools.ts +++ b/src/agents/clawdis-tools.ts @@ -2713,6 +2713,9 @@ function createSessionsHistoryTool(): AnyAgentTool { } const ANNOUNCE_SKIP_TOKEN = "ANNOUNCE_SKIP"; +const REPLY_SKIP_TOKEN = "REPLY_SKIP"; +const DEFAULT_PING_PONG_TURNS = 5; +const MAX_PING_PONG_TURNS = 5; type AnnounceTarget = { channel: string; @@ -2747,38 +2750,72 @@ function buildAgentToAgentMessageContext(params: { const lines = [ "Agent-to-agent message context:", params.requesterSessionKey - ? `Requester session: ${params.requesterSessionKey}.` + ? `Agent 1 (requester) session: ${params.requesterSessionKey}.` : undefined, params.requesterSurface - ? `Requester surface: ${params.requesterSurface}.` + ? `Agent 1 (requester) surface: ${params.requesterSurface}.` : undefined, - `Target session: ${params.targetSessionKey}.`, + `Agent 2 (target) session: ${params.targetSessionKey}.`, ].filter(Boolean); return lines.join("\n"); } -function buildAgentToAgentPostContext(params: { +function buildAgentToAgentReplyContext(params: { + requesterSessionKey?: string; + requesterSurface?: string; + targetSessionKey: string; + targetChannel?: string; + currentRole: "requester" | "target"; + turn: number; + maxTurns: number; +}) { + const currentLabel = + params.currentRole === "requester" + ? "Agent 1 (requester)" + : "Agent 2 (target)"; + const lines = [ + "Agent-to-agent reply step:", + `Current agent: ${currentLabel}.`, + `Turn ${params.turn} of ${params.maxTurns}.`, + params.requesterSessionKey + ? `Agent 1 (requester) session: ${params.requesterSessionKey}.` + : undefined, + params.requesterSurface + ? `Agent 1 (requester) surface: ${params.requesterSurface}.` + : undefined, + `Agent 2 (target) session: ${params.targetSessionKey}.`, + params.targetChannel ? `Agent 2 (target) surface: ${params.targetChannel}.` : undefined, + `If you want to stop the ping-pong, reply exactly "${REPLY_SKIP_TOKEN}".`, + ].filter(Boolean); + return lines.join("\n"); +} + +function buildAgentToAgentAnnounceContext(params: { requesterSessionKey?: string; requesterSurface?: string; targetSessionKey: string; targetChannel?: string; originalMessage: string; roundOneReply?: string; + latestReply?: string; }) { const lines = [ - "Agent-to-agent post step:", + "Agent-to-agent announce step:", params.requesterSessionKey - ? `Requester session: ${params.requesterSessionKey}.` + ? `Agent 1 (requester) session: ${params.requesterSessionKey}.` : undefined, params.requesterSurface - ? `Requester surface: ${params.requesterSurface}.` + ? `Agent 1 (requester) surface: ${params.requesterSurface}.` : undefined, - `Target session: ${params.targetSessionKey}.`, - params.targetChannel ? `Target surface: ${params.targetChannel}.` : undefined, + `Agent 2 (target) session: ${params.targetSessionKey}.`, + params.targetChannel ? `Agent 2 (target) surface: ${params.targetChannel}.` : undefined, `Original request: ${params.originalMessage}`, params.roundOneReply ? `Round 1 reply: ${params.roundOneReply}` : "Round 1 reply: (not available).", + params.latestReply + ? `Latest reply: ${params.latestReply}` + : "Latest reply: (not available).", `If you want to remain silent, reply exactly "${ANNOUNCE_SKIP_TOKEN}".`, "Any other reply will be posted to the target channel.", "After this reply, the agent-to-agent conversation is over.", @@ -2790,6 +2827,18 @@ function isAnnounceSkip(text?: string) { return (text ?? "").trim() === ANNOUNCE_SKIP_TOKEN; } +function isReplySkip(text?: string) { + return (text ?? "").trim() === REPLY_SKIP_TOKEN; +} + +function resolvePingPongTurns(cfg?: ClawdisConfig) { + const raw = cfg?.session?.agentToAgent?.maxPingPongTurns; + const fallback = DEFAULT_PING_PONG_TURNS; + if (typeof raw !== "number" || !Number.isFinite(raw)) return fallback; + const rounded = Math.floor(raw); + return Math.max(0, Math.min(MAX_PING_PONG_TURNS, rounded)); +} + function createSessionsSendTool(opts?: { agentSessionKey?: string; agentSurface?: string; @@ -2839,6 +2888,9 @@ function createSessionsSendTool(opts?: { lane: "nested", extraSystemPrompt: agentMessageContext, }; + const requesterSessionKey = opts?.agentSessionKey; + const requesterSurface = opts?.agentSurface; + const maxPingPongTurns = resolvePingPongTurns(cfg); const resolveAnnounceTarget = async (): Promise => { const parsed = resolveAnnounceTargetFromKey(resolvedKey); @@ -2869,85 +2921,160 @@ function createSessionsSendTool(opts?: { return null; }; - const runAgentToAgentPost = async (roundOneReply?: string) => { - const announceTarget = await resolveAnnounceTarget(); + const readLatestAssistantReply = async ( + sessionKeyToRead: string, + ): Promise => { + const history = (await callGateway({ + method: "chat.history", + params: { sessionKey: sessionKeyToRead, limit: 50 }, + })) as { messages?: unknown[] }; + const filtered = stripToolMessages( + Array.isArray(history?.messages) ? history.messages : [], + ); + const last = + filtered.length > 0 ? filtered[filtered.length - 1] : undefined; + return last ? extractAssistantText(last) : undefined; + }; + + const runAgentStep = async (params: { + sessionKey: string; + message: string; + extraSystemPrompt: string; + timeoutMs: number; + }): Promise => { + const stepIdem = crypto.randomUUID(); + const response = (await callGateway({ + method: "agent", + params: { + message: params.message, + sessionKey: params.sessionKey, + idempotencyKey: stepIdem, + deliver: false, + lane: "nested", + extraSystemPrompt: params.extraSystemPrompt, + }, + timeoutMs: 10_000, + })) as { runId?: string; acceptedAt?: number }; + const stepRunId = + typeof response?.runId === "string" && response.runId + ? response.runId + : stepIdem; + const stepAcceptedAt = + typeof response?.acceptedAt === "number" + ? response.acceptedAt + : undefined; + const stepWaitMs = Math.min(params.timeoutMs, 60_000); + const wait = (await callGateway({ + method: "agent.wait", + params: { + runId: stepRunId, + afterMs: stepAcceptedAt, + timeoutMs: stepWaitMs, + }, + timeoutMs: stepWaitMs + 2000, + })) as { status?: string }; + if (wait?.status !== "ok") return undefined; + return readLatestAssistantReply(params.sessionKey); + }; + + const runAgentToAgentFlow = async ( + roundOneReply?: string, + runInfo?: { runId: string; acceptedAt?: number }, + ) => { try { - const postPrompt = buildAgentToAgentPostContext({ - requesterSessionKey: opts?.agentSessionKey, - requesterSurface: opts?.agentSurface, - targetSessionKey: displayKey, - targetChannel: announceTarget?.channel ?? "unknown", - originalMessage: message, - roundOneReply, - }); - const postIdem = crypto.randomUUID(); - const postResponse = (await callGateway({ - method: "agent", - params: { - message: "Agent-to-agent post step.", - sessionKey: resolvedKey, - idempotencyKey: postIdem, - deliver: false, - lane: "nested", - extraSystemPrompt: postPrompt, - }, - timeoutMs: 10_000, - })) as { runId?: string; acceptedAt?: number }; - const postRunId = - typeof postResponse?.runId === "string" && postResponse.runId - ? postResponse.runId - : postIdem; - const postAcceptedAt = - typeof postResponse?.acceptedAt === "number" - ? postResponse.acceptedAt - : undefined; - const postWaitMs = Math.min(announceTimeoutMs, 60_000); - const postWait = (await callGateway({ - method: "agent.wait", - params: { - runId: postRunId, - afterMs: postAcceptedAt, - timeoutMs: postWaitMs, - }, - timeoutMs: postWaitMs + 2000, - })) as { status?: string }; - if (postWait?.status === "ok") { - const postHistory = (await callGateway({ - method: "chat.history", - params: { sessionKey: resolvedKey, limit: 50 }, - })) as { messages?: unknown[] }; - const postFiltered = stripToolMessages( - Array.isArray(postHistory?.messages) - ? postHistory.messages - : [], - ); - const postLast = - postFiltered.length > 0 - ? postFiltered[postFiltered.length - 1] - : undefined; - const postReply = postLast - ? extractAssistantText(postLast) - : undefined; - if ( - announceTarget && - postReply && - postReply.trim() && - !isAnnounceSkip(postReply) - ) { - await callGateway({ - method: "send", - params: { - to: announceTarget.to, - message: postReply.trim(), - provider: announceTarget.channel, - idempotencyKey: crypto.randomUUID(), - }, - timeoutMs: 10_000, - }); + let primaryReply = roundOneReply; + let latestReply = roundOneReply; + if (!primaryReply && runInfo?.runId) { + const waitMs = Math.min(announceTimeoutMs, 60_000); + const wait = (await callGateway({ + method: "agent.wait", + params: { + runId: runInfo.runId, + afterMs: runInfo.acceptedAt, + timeoutMs: waitMs, + }, + timeoutMs: waitMs + 2000, + })) as { status?: string }; + if (wait?.status === "ok") { + primaryReply = await readLatestAssistantReply(resolvedKey); + latestReply = primaryReply; } } + if (!latestReply) return; + const announceTarget = await resolveAnnounceTarget(); + const targetChannel = announceTarget?.channel ?? "unknown"; + if ( + maxPingPongTurns > 0 && + requesterSessionKey && + requesterSessionKey !== resolvedKey + ) { + let currentSessionKey = requesterSessionKey; + let nextSessionKey = resolvedKey; + let incomingMessage = latestReply; + for (let turn = 1; turn <= maxPingPongTurns; turn += 1) { + const currentRole = + currentSessionKey === requesterSessionKey + ? "requester" + : "target"; + const replyPrompt = buildAgentToAgentReplyContext({ + requesterSessionKey, + requesterSurface, + targetSessionKey: displayKey, + targetChannel, + currentRole, + turn, + maxTurns: maxPingPongTurns, + }); + const replyText = await runAgentStep({ + sessionKey: currentSessionKey, + message: incomingMessage, + extraSystemPrompt: replyPrompt, + timeoutMs: announceTimeoutMs, + }); + if (!replyText || isReplySkip(replyText)) { + break; + } + latestReply = replyText; + incomingMessage = replyText; + const swap = currentSessionKey; + currentSessionKey = nextSessionKey; + nextSessionKey = swap; + } + } + const announcePrompt = buildAgentToAgentAnnounceContext({ + requesterSessionKey, + requesterSurface, + targetSessionKey: displayKey, + targetChannel, + originalMessage: message, + roundOneReply: primaryReply, + latestReply, + }); + const announceReply = await runAgentStep({ + sessionKey: resolvedKey, + message: "Agent-to-agent announce step.", + extraSystemPrompt: announcePrompt, + timeoutMs: announceTimeoutMs, + }); + if ( + announceTarget && + announceReply && + announceReply.trim() && + !isAnnounceSkip(announceReply) + ) { + await callGateway({ + method: "send", + params: { + to: announceTarget.to, + message: announceReply.trim(), + provider: announceTarget.channel, + idempotencyKey: crypto.randomUUID(), + }, + timeoutMs: 10_000, + }); + } } catch { - // Best-effort announce; ignore failures to avoid breaking the caller response. + // Best-effort follow-ups; ignore failures to avoid breaking the caller response. } }; @@ -2957,11 +3084,15 @@ function createSessionsSendTool(opts?: { method: "agent", params: sendParams, timeoutMs: 10_000, - })) as { runId?: string }; + })) as { runId?: string; acceptedAt?: number }; + const acceptedAt = + typeof response?.acceptedAt === "number" + ? response.acceptedAt + : undefined; if (typeof response?.runId === "string" && response.runId) { runId = response.runId; } - void runAgentToAgentPost(); + void runAgentToAgentFlow(undefined, { runId, acceptedAt }); return jsonResult({ runId, status: "accepted", @@ -3067,7 +3198,7 @@ function createSessionsSendTool(opts?: { const last = filtered.length > 0 ? filtered[filtered.length - 1] : undefined; const reply = last ? extractAssistantText(last) : undefined; - void runAgentToAgentPost(reply ?? undefined); + void runAgentToAgentFlow(reply ?? undefined); return jsonResult({ runId, diff --git a/src/config/config.ts b/src/config/config.ts index 051b6daf9..68af18c09 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -44,6 +44,10 @@ export type SessionConfig = { typingIntervalSeconds?: number; mainKey?: string; sendPolicy?: SessionSendPolicyConfig; + agentToAgent?: { + /** Max ping-pong turns between requester/target (0–5). Default: 5. */ + maxPingPongTurns?: number; + }; }; export type LoggingConfig = { @@ -894,6 +898,11 @@ const SessionSchema = z .optional(), }) .optional(), + agentToAgent: z + .object({ + maxPingPongTurns: z.number().int().min(0).max(5).optional(), + }) + .optional(), }) .optional(); diff --git a/src/config/schema.ts b/src/config/schema.ts index 25f5c153b..f076c9ba6 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -88,6 +88,7 @@ const FIELD_LABELS: Record = { "agent.model": "Default Model", "ui.seamColor": "Accent Color", "browser.controlUrl": "Browser Control URL", + "session.agentToAgent.maxPingPongTurns": "Agent-to-Agent Ping-Pong Turns", "talk.apiKey": "Talk API Key", "telegram.botToken": "Telegram Bot Token", "discord.token": "Discord Bot Token", @@ -106,6 +107,8 @@ const FIELD_HELP: Record = { 'Hot reload strategy for config changes ("hybrid" recommended).', "gateway.reload.debounceMs": "Debounce window (ms) before applying config changes.", + "session.agentToAgent.maxPingPongTurns": + "Max reply-back turns between requester and target (0–5).", }; const FIELD_PLACEHOLDERS: Record = {