From f31e89d5af2ec84357a9e23ab748dc36c9fe6133 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 2 Dec 2025 10:42:27 +0000 Subject: [PATCH] Agents: add pluggable CLIs Co-authored-by: RealSid08 --- docs/agent.md | 77 ++++++++++++++++ src/agents/claude.ts | 67 ++++++++++++++ src/agents/codex.ts | 66 ++++++++++++++ src/agents/index.ts | 19 ++++ src/agents/opencode.ts | 54 +++++++++++ src/agents/pi.ts | 65 ++++++++++++++ src/agents/types.ts | 42 +++++++++ src/auto-reply/claude.ts | 3 + src/auto-reply/command-reply.test.ts | 29 +++--- src/auto-reply/command-reply.ts | 129 +++++++++------------------ src/auto-reply/opencode.ts | 105 ++++++++++++++++++++++ src/auto-reply/reply.ts | 4 +- src/config/config.ts | 47 ++++++---- src/index.core.test.ts | 6 +- src/web/auto-reply.ts | 61 +++++++------ 15 files changed, 624 insertions(+), 150 deletions(-) create mode 100644 docs/agent.md create mode 100644 src/agents/claude.ts create mode 100644 src/agents/codex.ts create mode 100644 src/agents/index.ts create mode 100644 src/agents/opencode.ts create mode 100644 src/agents/pi.ts create mode 100644 src/agents/types.ts create mode 100644 src/auto-reply/opencode.ts diff --git a/docs/agent.md b/docs/agent.md new file mode 100644 index 000000000..eacd853b5 --- /dev/null +++ b/docs/agent.md @@ -0,0 +1,77 @@ +# Agent Abstraction Refactor Plan + +Goal: support multiple agent CLIs (Claude, Codex, Pi, Opencode) cleanly, without legacy flags, and make parsing/injection per-agent. Keep WhatsApp/Twilio plumbing intact. + +## Overview +- Introduce a pluggable agent layer (`src/agents/*`), selected by config. +- Normalize config (`agent` block) and remove `claudeOutputFormat` legacy knobs. +- Provide per-agent argv builders and output parsers (including NDJSON streams). +- Preserve MEDIA-token handling and shared queue/heartbeat behavior. + +## Configuration +- New shape (no backward compat): + ```json5 + inbound: { + reply: { + mode: "command", + agent: { + kind: "claude" | "opencode" | "pi" | "codex", + format?: "text" | "json", + identityPrefix?: string + }, + command: ["claude", "{{Body}}"], + cwd?: string, + session?: { ... }, + timeoutSeconds?: number, + bodyPrefix?: string, + mediaUrl?: string, + mediaMaxMb?: number, + typingIntervalSeconds?: number, + heartbeatMinutes?: number + } + } + ``` +- Validation moves to `config.ts` (new `AgentKind`/`AgentConfig` types). +- If `agent` is missing → config error. + +## Agent modules +- `src/agents/types.ts` – `AgentKind`, `AgentSpec`: + - `buildArgs(argv: string[], body: string, ctx: { sessionId?, isNewSession?, sendSystemOnce?, systemSent?, identityPrefix? }): string[]` + - `parse(stdout: string): { text?: string; mediaUrls?: string[]; meta?: AgentMeta }` +- `src/agents/claude.ts` – current flag injection (`--output-format`, `-p`), identity prepend. +- `src/agents/opencode.ts` – reuse `parseOpencodeJson` (from PR #5), inject `--format json`, session flag `--session` defaults, identity prefix. +- `src/agents/pi.ts` – parse NDJSON `AssistantMessageEvent` (final `message_end.message.content[text]`), inject `--mode json`/`-p` defaults, session flags. +- `src/agents/codex.ts` – parse Codex JSONL (last `item` with `type:"agent_message"`; usage from `turn.completed`), inject `codex exec --json --skip-git-repo-check`, sandbox default read-only. +- Shared MEDIA extraction stays in `media/parse.ts`. + +## Command runner changes +- `runCommandReply`: + - Resolve agent spec from config. + - Apply `buildArgs` (handles identity prepend and session args per agent). + - Run command; send stdout to `spec.parse` → `text`, `mediaUrls`, `meta` (stored as `agentMeta`). + - Remove `claudeMeta` naming; tests updated to `agentMeta`. + +## Sessions +- Session arg defaults become agent-specific (Claude: `--resume/--session-id`; Opencode/Pi/Codex: `--session`). +- Still overridable via `sessionArgNew/sessionArgResume` in config. + +## Tests +- Update existing tests to new config (no `claudeOutputFormat`). +- Add fixtures: + - Opencode NDJSON sample (from PR #5) → parsed text + meta. + - Codex NDJSON sample (captured: thread/turn/item/usage) → parsed text. + - Pi NDJSON sample (AssistantMessageEvent) → parsed text. +- Ensure MEDIA token parsing works on agent text output. + +## Docs +- README: rename “Claude-aware” → “Multi-agent (Claude, Codex, Pi, Opencode)”. +- New short guide per agent (Opencode doc from PR #5; add Codex/Pi snippets). +- Mention identityPrefix override and session arg differences. + +## Migration +- Breaking change: configs must specify `agent`. Remove old `claudeOutputFormat` keys. +- Provide migration note in CHANGELOG 1.3.x. + +## Out of scope +- No media binary support; still relies on MEDIA tokens in text. +- No UI changes; WhatsApp/Twilio plumbing unchanged. diff --git a/src/agents/claude.ts b/src/agents/claude.ts new file mode 100644 index 000000000..80cd767bb --- /dev/null +++ b/src/agents/claude.ts @@ -0,0 +1,67 @@ +import path from "node:path"; + +import { + CLAUDE_BIN, + CLAUDE_IDENTITY_PREFIX, + parseClaudeJson, + summarizeClaudeMetadata, + type ClaudeJsonParseResult, +} from "../auto-reply/claude.js"; +import type { + AgentMeta, + AgentParseResult, + AgentSpec, + BuildArgsContext, +} from "./types.js"; + +function toMeta(parsed?: ClaudeJsonParseResult): AgentMeta | undefined { + if (!parsed?.parsed) return undefined; + const summary = summarizeClaudeMetadata(parsed.parsed); + return summary ? { extra: { summary } } : undefined; +} + +export const claudeSpec: AgentSpec = { + kind: "claude", + isInvocation: (argv) => argv.length > 0 && path.basename(argv[0]) === CLAUDE_BIN, + buildArgs: (ctx) => { + // Work off a split of "before body" and "after body" so we don't lose the + // body index when inserting flags. + const argv = [...ctx.argv]; + const body = argv[ctx.bodyIndex] ?? ""; + const beforeBody = argv.slice(0, ctx.bodyIndex); + const afterBody = argv.slice(ctx.bodyIndex + 1); + + const wantsOutputFormat = typeof ctx.format === "string"; + if (wantsOutputFormat) { + const hasOutputFormat = argv.some( + (part) => part === "--output-format" || part.startsWith("--output-format="), + ); + if (!hasOutputFormat) { + beforeBody.push("--output-format", ctx.format!); + } + } + + const hasPrintFlag = argv.some((part) => part === "-p" || part === "--print"); + if (!hasPrintFlag) { + beforeBody.push("-p"); + } + + const shouldPrependIdentity = !(ctx.sendSystemOnce && ctx.systemSent); + const bodyWithIdentity = + shouldPrependIdentity && body + ? [ctx.identityPrefix ?? CLAUDE_IDENTITY_PREFIX, body] + .filter(Boolean) + .join("\n\n") + : body; + + return [...beforeBody, bodyWithIdentity, ...afterBody]; + }, + parseOutput: (rawStdout) => { + const parsed = parseClaudeJson(rawStdout); + const text = parsed?.text ?? rawStdout.trim(); + return { + text: text?.trim(), + meta: toMeta(parsed), + }; + }, +}; diff --git a/src/agents/codex.ts b/src/agents/codex.ts new file mode 100644 index 000000000..3b2066d05 --- /dev/null +++ b/src/agents/codex.ts @@ -0,0 +1,66 @@ +import path from "node:path"; + +import type { AgentMeta, AgentParseResult, AgentSpec, BuildArgsContext } from "./types.js"; + +function parseCodexJson(raw: string): AgentParseResult { + const lines = raw.split(/\n+/).filter((l) => l.trim().startsWith("{")); + let text: string | undefined; + let meta: AgentMeta | undefined; + + for (const line of lines) { + try { + const ev = JSON.parse(line) as { type?: string; item?: { type?: string; text?: string }; usage?: unknown }; + if (ev.type === "item.completed" && ev.item?.type === "agent_message" && typeof ev.item.text === "string") { + text = ev.item.text; + } + if (ev.type === "turn.completed" && ev.usage && typeof ev.usage === "object") { + const u = ev.usage as { + input_tokens?: number; + cached_input_tokens?: number; + output_tokens?: number; + }; + meta = { + usage: { + input: u.input_tokens, + output: u.output_tokens, + cacheRead: u.cached_input_tokens, + total: + (u.input_tokens ?? 0) + + (u.output_tokens ?? 0) + + (u.cached_input_tokens ?? 0), + }, + }; + } + } catch { + // ignore + } + } + + return { text: text?.trim(), meta }; +} + +export const codexSpec: AgentSpec = { + kind: "codex", + isInvocation: (argv) => argv.length > 0 && path.basename(argv[0]) === "codex", + buildArgs: (ctx) => { + const argv = [...ctx.argv]; + const hasExec = argv.length > 0 && argv[1] === "exec"; + if (!hasExec) { + argv.splice(1, 0, "exec"); + } + // Ensure JSON output + if (!argv.includes("--json")) { + argv.splice(argv.length - 1, 0, "--json"); + } + // Safety defaults + if (!argv.includes("--skip-git-repo-check")) { + argv.splice(argv.length - 1, 0, "--skip-git-repo-check"); + } + if (!argv.some((p) => p === "--sandbox" || p.startsWith("--sandbox="))) { + argv.splice(argv.length - 1, 0, "--sandbox", "read-only"); + } + return argv; + }, + parseOutput: parseCodexJson, +}; + diff --git a/src/agents/index.ts b/src/agents/index.ts new file mode 100644 index 000000000..508b3811c --- /dev/null +++ b/src/agents/index.ts @@ -0,0 +1,19 @@ +import { claudeSpec } from "./claude.js"; +import { codexSpec } from "./codex.js"; +import { opencodeSpec } from "./opencode.js"; +import { piSpec } from "./pi.js"; +import type { AgentKind, AgentSpec } from "./types.js"; + +const specs: Record = { + claude: claudeSpec, + codex: codexSpec, + opencode: opencodeSpec, + pi: piSpec, +}; + +export function getAgentSpec(kind: AgentKind): AgentSpec { + return specs[kind]; +} + +export { AgentKind, AgentMeta, AgentParseResult } from "./types.js"; + diff --git a/src/agents/opencode.ts b/src/agents/opencode.ts new file mode 100644 index 000000000..a19bfae7b --- /dev/null +++ b/src/agents/opencode.ts @@ -0,0 +1,54 @@ +import path from "node:path"; + +import { + OPENCODE_BIN, + OPENCODE_IDENTITY_PREFIX, + parseOpencodeJson, + summarizeOpencodeMetadata, +} from "../auto-reply/opencode.js"; +import type { AgentMeta, AgentParseResult, AgentSpec, BuildArgsContext } from "./types.js"; + +function toMeta(parsed: ReturnType): AgentMeta | undefined { + const summary = summarizeOpencodeMetadata(parsed.meta); + return summary ? { extra: { summary } } : undefined; +} + +export const opencodeSpec: AgentSpec = { + kind: "opencode", + isInvocation: (argv) => argv.length > 0 && path.basename(argv[0]) === OPENCODE_BIN, + buildArgs: (ctx) => { + const argv = [...ctx.argv]; + const wantsJson = ctx.format === "json"; + + // Ensure format json for parsing + if (wantsJson) { + const hasFormat = argv.some( + (part) => part === "--format" || part.startsWith("--format="), + ); + if (!hasFormat) { + const insertBeforeBody = Math.max(argv.length - 1, 0); + argv.splice(insertBeforeBody, 0, "--format", "json"); + } + } + + // Session args default to --session + // Identity prefix + const shouldPrependIdentity = !(ctx.sendSystemOnce && ctx.systemSent); + if (shouldPrependIdentity && argv[ctx.bodyIndex]) { + const existingBody = argv[ctx.bodyIndex]; + argv[ctx.bodyIndex] = [ctx.identityPrefix ?? OPENCODE_IDENTITY_PREFIX, existingBody] + .filter(Boolean) + .join("\n\n"); + } + + return argv; + }, + parseOutput: (rawStdout) => { + const parsed = parseOpencodeJson(rawStdout); + const text = parsed.text ?? rawStdout.trim(); + return { + text: text?.trim(), + meta: toMeta(parsed), + }; + }, +}; diff --git a/src/agents/pi.ts b/src/agents/pi.ts new file mode 100644 index 000000000..18efc0531 --- /dev/null +++ b/src/agents/pi.ts @@ -0,0 +1,65 @@ +import path from "node:path"; + +import type { AgentMeta, AgentParseResult, AgentSpec, BuildArgsContext } from "./types.js"; + +type PiAssistantMessage = { + role?: string; + content?: Array<{ type?: string; text?: string }>; + usage?: { input?: number; output?: number }; + model?: string; + provider?: string; + stopReason?: string; +}; + +function parsePiJson(raw: string): AgentParseResult { + const lines = raw.split(/\n+/).filter((l) => l.trim().startsWith("{")); + let lastMessage: PiAssistantMessage | undefined; + for (const line of lines) { + try { + const ev = JSON.parse(line) as { type?: string; message?: PiAssistantMessage }; + if (ev.type === "message_end" && ev.message?.role === "assistant") { + lastMessage = ev.message; + } + } catch { + // ignore + } + } + const text = + lastMessage?.content + ?.filter((c) => c?.type === "text" && typeof c.text === "string") + .map((c) => c.text) + .join("\n") + ?.trim() ?? undefined; + const meta: AgentMeta | undefined = lastMessage + ? { + model: lastMessage.model, + provider: lastMessage.provider, + stopReason: lastMessage.stopReason, + usage: lastMessage.usage, + } + : undefined; + return { text, meta }; +} + +export const piSpec: AgentSpec = { + kind: "pi", + isInvocation: (argv) => argv.length > 0 && path.basename(argv[0]) === "pi", + buildArgs: (ctx) => { + const argv = [...ctx.argv]; + // Non-interactive print + JSON + if (!argv.includes("-p") && !argv.includes("--print")) { + argv.splice(argv.length - 1, 0, "-p"); + } + if (ctx.format === "json" && !argv.includes("--mode") && !argv.some((a) => a === "--mode")) { + argv.splice(argv.length - 1, 0, "--mode", "json"); + } + // Session defaults + // Identity prefix optional; Pi usually doesn't need, but allow + if (!(ctx.sendSystemOnce && ctx.systemSent) && argv[ctx.bodyIndex]) { + const existingBody = argv[ctx.bodyIndex]; + argv[ctx.bodyIndex] = [ctx.identityPrefix, existingBody].filter(Boolean).join("\n\n"); + } + return argv; + }, + parseOutput: parsePiJson, +}; diff --git a/src/agents/types.ts b/src/agents/types.ts new file mode 100644 index 000000000..76b6ba1d2 --- /dev/null +++ b/src/agents/types.ts @@ -0,0 +1,42 @@ +export type AgentKind = "claude" | "opencode" | "pi" | "codex"; + +export type AgentMeta = { + model?: string; + provider?: string; + stopReason?: string; + usage?: { + input?: number; + output?: number; + cacheRead?: number; + cacheWrite?: number; + total?: number; + }; + extra?: Record; +}; + +export type AgentParseResult = { + text?: string; + mediaUrls?: string[]; + meta?: AgentMeta; +}; + +export type BuildArgsContext = { + argv: string[]; + bodyIndex: number; // index of prompt/body argument in argv + isNewSession: boolean; + sessionId?: string; + sendSystemOnce: boolean; + systemSent: boolean; + identityPrefix?: string; + format?: "text" | "json"; + sessionArgNew?: string[]; + sessionArgResume?: string[]; +}; + +export interface AgentSpec { + kind: AgentKind; + isInvocation: (argv: string[]) => boolean; + buildArgs: (ctx: BuildArgsContext) => string[]; + parseOutput: (rawStdout: string) => AgentParseResult; +} + diff --git a/src/auto-reply/claude.ts b/src/auto-reply/claude.ts index b1a6518f4..ca3cfa9df 100644 --- a/src/auto-reply/claude.ts +++ b/src/auto-reply/claude.ts @@ -160,3 +160,6 @@ export function parseClaudeJsonText(raw: string): string | undefined { const parsed = parseClaudeJson(raw); return parsed?.text; } + +// Re-export from command-reply for backwards compatibility +export { summarizeClaudeMetadata } from "./command-reply.js"; diff --git a/src/auto-reply/command-reply.test.ts b/src/auto-reply/command-reply.test.ts index db1e64767..39ca340b8 100644 --- a/src/auto-reply/command-reply.test.ts +++ b/src/auto-reply/command-reply.test.ts @@ -70,7 +70,7 @@ describe("runCommandReply", () => { reply: { mode: "command", command: ["claude", "{{Body}}"], - claudeOutputFormat: "json", + agent: { kind: "claude", format: "json" }, }, templatingCtx: noopTemplateCtx, sendSystemOnce: false, @@ -98,7 +98,7 @@ describe("runCommandReply", () => { reply: { mode: "command", command: ["claude", "{{Body}}"], - claudeOutputFormat: "json", + agent: { kind: "claude", format: "json" }, }, templatingCtx: noopTemplateCtx, sendSystemOnce: true, @@ -121,7 +121,7 @@ describe("runCommandReply", () => { reply: { mode: "command", command: ["claude", "{{Body}}"], - claudeOutputFormat: "json", + agent: { kind: "claude", format: "json" }, }, templatingCtx: noopTemplateCtx, sendSystemOnce: true, @@ -144,7 +144,7 @@ describe("runCommandReply", () => { reply: { mode: "command", command: ["claude", "{{Body}}"], - claudeOutputFormat: "json", + agent: { kind: "claude", format: "json" }, }, templatingCtx: noopTemplateCtx, sendSystemOnce: true, @@ -167,6 +167,7 @@ describe("runCommandReply", () => { reply: { mode: "command", command: ["cli", "{{Body}}"], + agent: { kind: "claude" }, session: { sessionArgNew: ["--new", "{{SessionId}}"], sessionArgResume: ["--resume", "{{SessionId}}"], @@ -192,7 +193,7 @@ describe("runCommandReply", () => { throw { stdout: "partial output here", killed: true, signal: "SIGKILL" }; }); const { payload, meta } = await runCommandReply({ - reply: { mode: "command", command: ["echo", "hi"] }, + reply: { mode: "command", command: ["echo", "hi"], agent: { kind: "claude" } }, templatingCtx: noopTemplateCtx, sendSystemOnce: false, isNewSession: true, @@ -213,7 +214,7 @@ describe("runCommandReply", () => { throw { stdout: "", killed: true, signal: "SIGKILL" }; }); const { payload } = await runCommandReply({ - reply: { mode: "command", command: ["echo", "hi"], cwd: "/tmp/work" }, + reply: { mode: "command", command: ["echo", "hi"], cwd: "/tmp/work", agent: { kind: "claude" } }, templatingCtx: noopTemplateCtx, sendSystemOnce: false, isNewSession: true, @@ -235,7 +236,7 @@ describe("runCommandReply", () => { stdout: `hi\nMEDIA:${tmp}\nMEDIA:https://example.com/img.jpg`, }); const { payload } = await runCommandReply({ - reply: { mode: "command", command: ["echo", "hi"], mediaMaxMb: 1 }, + reply: { mode: "command", command: ["echo", "hi"], mediaMaxMb: 1, agent: { kind: "claude" } }, templatingCtx: noopTemplateCtx, sendSystemOnce: false, isNewSession: true, @@ -259,7 +260,7 @@ describe("runCommandReply", () => { reply: { mode: "command", command: ["claude", "{{Body}}"], - claudeOutputFormat: "json", + agent: { kind: "claude", format: "json" }, }, templatingCtx: noopTemplateCtx, sendSystemOnce: false, @@ -271,14 +272,14 @@ describe("runCommandReply", () => { commandRunner: runner, enqueue: enqueueImmediate, }); - expect(meta.claudeMeta).toContain("duration=50ms"); - expect(meta.claudeMeta).toContain("tool_calls=1"); + expect(meta.agentMeta?.extra?.summary).toContain("duration=50ms"); + expect(meta.agentMeta?.extra?.summary).toContain("tool_calls=1"); }); it("captures queue wait metrics in meta", async () => { const runner = makeRunner({ stdout: "ok" }); const { meta } = await runCommandReply({ - reply: { mode: "command", command: ["echo", "{{Body}}"] }, + reply: { mode: "command", command: ["echo", "{{Body}}"], agent: { kind: "claude" } }, templatingCtx: noopTemplateCtx, sendSystemOnce: false, isNewSession: true, @@ -303,7 +304,7 @@ describe("runCommandReply", () => { reply: { mode: "command", command: ["claude", "{{Body}}"], - claudeOutputFormat: "json", + agent: { kind: "claude", format: "json" }, }, templatingCtx: noopTemplateCtx, sendSystemOnce: false, @@ -328,7 +329,7 @@ describe("runCommandReply", () => { reply: { mode: "command", command: ["claude", "{{Body}}"], - claudeOutputFormat: "json", + agent: { kind: "claude", format: "json" }, }, templatingCtx: noopTemplateCtx, sendSystemOnce: false, @@ -353,7 +354,7 @@ describe("runCommandReply", () => { reply: { mode: "command", command: ["claude", "{{Body}}"], - claudeOutputFormat: "json", + agent: { kind: "claude", format: "json" }, }, templatingCtx: noopTemplateCtx, sendSystemOnce: false, diff --git a/src/auto-reply/command-reply.ts b/src/auto-reply/command-reply.ts index 71e564948..a897520ba 100644 --- a/src/auto-reply/command-reply.ts +++ b/src/auto-reply/command-reply.ts @@ -1,18 +1,14 @@ import fs from "node:fs/promises"; import path from "node:path"; +import { getAgentSpec } from "../agents/index.js"; +import type { AgentMeta } from "../agents/types.js"; import type { WarelayConfig } from "../config/config.js"; import { isVerbose, logVerbose } from "../globals.js"; import { logError } from "../logger.js"; import { splitMediaFromOutput } from "../media/parse.js"; import { enqueueCommand } from "../process/command-queue.js"; import type { runCommandWithTimeout } from "../process/exec.js"; -import { - CLAUDE_BIN, - CLAUDE_IDENTITY_PREFIX, - type ClaudeJsonParseResult, - parseClaudeJson, -} from "./claude.js"; import { applyTemplate, type TemplateContext } from "./templating.js"; import type { ReplyPayload } from "./types.js"; @@ -42,7 +38,7 @@ export type CommandReplyMeta = { exitCode?: number | null; signal?: string | null; killed?: boolean; - claudeMeta?: string; + agentMeta?: AgentMeta; }; export type CommandReplyResult = { @@ -119,6 +115,8 @@ export async function runCommandReply( if (!reply.command?.length) { throw new Error("reply.command is required for mode=command"); } + const agentCfg = reply.agent ?? { kind: "claude" }; + const agent = getAgentSpec(agentCfg.kind as any); let argv = reply.command.map((part) => applyTemplate(part, templatingCtx)); const templatePrefix = @@ -129,66 +127,47 @@ export async function runCommandReply( argv = [argv[0], templatePrefix, ...argv.slice(1)]; } - // Ensure Claude commands can emit plain text by forcing --output-format when configured. - if ( - reply.claudeOutputFormat && - argv.length > 0 && - path.basename(argv[0]) === CLAUDE_BIN - ) { - const hasOutputFormat = argv.some( - (part) => - part === "--output-format" || part.startsWith("--output-format="), - ); - const insertBeforeBody = Math.max(argv.length - 1, 0); - if (!hasOutputFormat) { - argv = [ - ...argv.slice(0, insertBeforeBody), - "--output-format", - reply.claudeOutputFormat, - ...argv.slice(insertBeforeBody), - ]; - } - const hasPrintFlag = argv.some( - (part) => part === "-p" || part === "--print", - ); - if (!hasPrintFlag) { - const insertIdx = Math.max(argv.length - 1, 0); - argv = [...argv.slice(0, insertIdx), "-p", ...argv.slice(insertIdx)]; - } - } + // Default body index is last arg + let bodyIndex = Math.max(argv.length - 1, 0); - // Inject session args if configured (use resume for existing, session-id for new) + // Session args prepared (templated) and injected generically if (reply.session) { + const defaultNew = + agentCfg.kind === "claude" + ? ["--session-id", "{{SessionId}}"] + : ["--session", "{{SessionId}}"]; + const defaultResume = + agentCfg.kind === "claude" + ? ["--resume", "{{SessionId}}"] + : ["--session", "{{SessionId}}"]; const sessionArgList = ( isNewSession - ? (reply.session.sessionArgNew ?? ["--session-id", "{{SessionId}}"]) - : (reply.session.sessionArgResume ?? ["--resume", "{{SessionId}}"]) - ).map((part) => applyTemplate(part, templatingCtx)); + ? reply.session.sessionArgNew ?? defaultNew + : reply.session.sessionArgResume ?? defaultResume + ).map((p) => applyTemplate(p, templatingCtx)); if (sessionArgList.length) { const insertBeforeBody = reply.session.sessionArgBeforeBody ?? true; const insertAt = insertBeforeBody && argv.length > 1 ? argv.length - 1 : argv.length; - argv = [ - ...argv.slice(0, insertAt), - ...sessionArgList, - ...argv.slice(insertAt), - ]; + argv = [...argv.slice(0, insertAt), ...sessionArgList, ...argv.slice(insertAt)]; + bodyIndex = Math.max(argv.length - 1, 0); } } - let finalArgv = argv; - const isClaudeInvocation = - finalArgv.length > 0 && path.basename(finalArgv[0]) === CLAUDE_BIN; - const shouldPrependIdentity = - isClaudeInvocation && !(sendSystemOnce && systemSent); - if (shouldPrependIdentity && finalArgv.length > 0) { - const bodyIdx = finalArgv.length - 1; - const existingBody = finalArgv[bodyIdx] ?? ""; - finalArgv = [ - ...finalArgv.slice(0, bodyIdx), - [CLAUDE_IDENTITY_PREFIX, existingBody].filter(Boolean).join("\n\n"), - ]; - } + const shouldApplyAgent = agent.isInvocation(argv); + const finalArgv = shouldApplyAgent + ? agent.buildArgs({ + argv, + bodyIndex, + isNewSession, + sessionId: templatingCtx.SessionId, + sendSystemOnce, + systemSent, + identityPrefix: agentCfg.identityPrefix, + format: agentCfg.format, + }) + : argv; + logVerbose( `Running command auto-reply: ${finalArgv.join(" ")}${reply.cwd ? ` (cwd: ${reply.cwd})` : ""}`, ); @@ -217,28 +196,12 @@ export async function runCommandReply( if (stderr?.trim()) { logVerbose(`Command auto-reply stderr: ${stderr.trim()}`); } - let parsed: ClaudeJsonParseResult | undefined; - if ( - trimmed && - (reply.claudeOutputFormat === "json" || isClaudeInvocation) - ) { - parsed = parseClaudeJson(trimmed); - if (parsed?.parsed && isVerbose()) { - const summary = summarizeClaudeMetadata(parsed.parsed); - if (summary) logVerbose(`Claude JSON meta: ${summary}`); - logVerbose( - `Claude JSON raw: ${JSON.stringify(parsed.parsed, null, 2)}`, - ); - } - if (typeof parsed?.text === "string") { - logVerbose( - `Claude JSON parsed -> ${parsed.text.slice(0, 120)}${parsed.text.length > 120 ? "…" : ""}`, - ); - trimmed = parsed.text.trim(); - } else { - logVerbose("Claude JSON parse failed; returning raw stdout"); - } + + const parsed = trimmed ? agent.parseOutput(trimmed) : undefined; + if (parsed && parsed.text !== undefined) { + trimmed = parsed.text.trim(); } + const { text: cleanedText, mediaUrls: mediaFound } = splitMediaFromOutput(trimmed); trimmed = cleanedText; @@ -249,7 +212,7 @@ export async function runCommandReply( logVerbose("No MEDIA token extracted from final text"); } if (!trimmed && !mediaFromCommand) { - const meta = parsed ? summarizeClaudeMetadata(parsed.parsed) : undefined; + const meta = parsed?.meta?.extra?.summary ?? undefined; trimmed = `(command produced no output${meta ? `; ${meta}` : ""})`; logVerbose("No text/media produced; injecting fallback notice to user"); } @@ -271,9 +234,7 @@ export async function runCommandReply( exitCode: code, signal, killed, - claudeMeta: parsed - ? summarizeClaudeMetadata(parsed.parsed) - : undefined, + agentMeta: parsed?.meta, }, }; } @@ -291,9 +252,7 @@ export async function runCommandReply( exitCode: code, signal, killed, - claudeMeta: parsed - ? summarizeClaudeMetadata(parsed.parsed) - : undefined, + agentMeta: parsed?.meta, }, }; } @@ -341,7 +300,7 @@ export async function runCommandReply( exitCode: code, signal, killed, - claudeMeta: parsed ? summarizeClaudeMetadata(parsed.parsed) : undefined, + agentMeta: parsed?.meta, }; if (isVerbose()) { logVerbose(`Command auto-reply meta: ${JSON.stringify(meta)}`); diff --git a/src/auto-reply/opencode.ts b/src/auto-reply/opencode.ts new file mode 100644 index 000000000..19b16055d --- /dev/null +++ b/src/auto-reply/opencode.ts @@ -0,0 +1,105 @@ +// Helpers specific to Opencode CLI output/argv handling. + +// Preferred binary name for Opencode CLI invocations. +export const OPENCODE_BIN = "opencode"; + +export const OPENCODE_IDENTITY_PREFIX = + "You are Openclawd running on the user's Mac via warelay. Your scratchpad is /Users/steipete/openclawd; this is your folder and you can add what you like in markdown files and/or images. You don't need to be concise, but WhatsApp replies must stay under ~1500 characters. Media you can send: images ≤6MB, audio/video ≤16MB, documents ≤100MB. The prompt may include a media path and an optional Transcript: section—use them when present. If a prompt is a heartbeat poll and nothing needs attention, reply with exactly HEARTBEAT_OK and nothing else; for any alert, do not include HEARTBEAT_OK."; + +export type OpencodeJsonParseResult = { + text?: string; + parsed: unknown[]; + valid: boolean; + meta?: { + durationMs?: number; + cost?: number; + tokens?: { + input?: number; + output?: number; + }; + }; +}; + +export function parseOpencodeJson(raw: string): OpencodeJsonParseResult { + const lines = raw.split(/\n+/).filter((s) => s.trim()); + const parsed: unknown[] = []; + let text = ""; + let valid = false; + let startTime: number | undefined; + let endTime: number | undefined; + let cost = 0; + let inputTokens = 0; + let outputTokens = 0; + + for (const line of lines) { + try { + const event = JSON.parse(line); + parsed.push(event); + if (event && typeof event === "object") { + // Opencode emits a stream of events. + if (event.type === "step_start") { + valid = true; + if (typeof event.timestamp === "number") { + if (startTime === undefined || event.timestamp < startTime) { + startTime = event.timestamp; + } + } + } + + if (event.type === "text" && event.part?.text) { + text += event.part.text; + valid = true; + } + + if (event.type === "step_finish") { + valid = true; + if (typeof event.timestamp === "number") { + endTime = event.timestamp; + } + if (event.part) { + if (typeof event.part.cost === "number") { + cost += event.part.cost; + } + if (event.part.tokens) { + inputTokens += event.part.tokens.input || 0; + outputTokens += event.part.tokens.output || 0; + } + } + } + } + } catch { + // ignore non-JSON lines + } + } + + const meta: OpencodeJsonParseResult["meta"] = {}; + if (startTime !== undefined && endTime !== undefined) { + meta.durationMs = endTime - startTime; + } + if (cost > 0) meta.cost = cost; + if (inputTokens > 0 || outputTokens > 0) { + meta.tokens = { input: inputTokens, output: outputTokens }; + } + + return { + text: text || undefined, + parsed, + valid: valid && parsed.length > 0, + meta: Object.keys(meta).length > 0 ? meta : undefined, + }; +} + +export function summarizeOpencodeMetadata( + meta: OpencodeJsonParseResult["meta"], +): string | undefined { + if (!meta) return undefined; + const parts: string[] = []; + if (meta.durationMs !== undefined) + parts.push(`duration=${meta.durationMs}ms`); + if (meta.cost !== undefined) parts.push(`cost=$${meta.cost.toFixed(4)}`); + if (meta.tokens) { + parts.push(`tokens=${meta.tokens.input}+${meta.tokens.output}`); + } + return parts.length ? parts.join(", ") : undefined; +} + diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index 20aef572c..e59f2c683 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -265,8 +265,8 @@ export async function getReplyFromConfig( timeoutSeconds, commandRunner, }); - if (meta.claudeMeta && isVerbose()) { - logVerbose(`Claude JSON meta: ${meta.claudeMeta}`); + if (meta.agentMeta && isVerbose()) { + logVerbose(`Agent meta: ${JSON.stringify(meta.agentMeta)}`); } return payload; } finally { diff --git a/src/config/config.ts b/src/config/config.ts index f0e46c6c4..ccb7c06b1 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -5,8 +5,9 @@ import path from "node:path"; import JSON5 from "json5"; import { z } from "zod"; +import type { AgentKind } from "../agents/index.js"; + export type ReplyMode = "text" | "command"; -export type ClaudeOutputFormat = "text" | "json" | "stream-json"; export type SessionScope = "per-sender" | "global"; export type SessionConfig = { @@ -56,18 +57,22 @@ export type WarelayConfig = { }; reply?: { mode: ReplyMode; - text?: string; // for mode=text, can contain {{Body}} - command?: string[]; // for mode=command, argv with templates - cwd?: string; // working directory for command execution - template?: string; // prepend template string when building command/prompt - timeoutSeconds?: number; // optional command timeout; defaults to 600s - bodyPrefix?: string; // optional string prepended to Body before templating - mediaUrl?: string; // optional media attachment (path or URL) + text?: string; + command?: string[]; + cwd?: string; + template?: string; + timeoutSeconds?: number; + bodyPrefix?: string; + mediaUrl?: string; session?: SessionConfig; - claudeOutputFormat?: ClaudeOutputFormat; // when command starts with `claude`, force an output format - mediaMaxMb?: number; // optional cap for outbound media (default 5MB) - typingIntervalSeconds?: number; // how often to refresh typing indicator while command runs - heartbeatMinutes?: number; // auto-ping cadence for command mode + mediaMaxMb?: number; + typingIntervalSeconds?: number; + heartbeatMinutes?: number; + agent?: { + kind: AgentKind; + format?: "text" | "json"; + identityPrefix?: string; + }; }; }; web?: WebConfig; @@ -105,13 +110,17 @@ const ReplySchema = z }) .optional(), heartbeatMinutes: z.number().int().nonnegative().optional(), - claudeOutputFormat: z - .union([ - z.literal("text"), - z.literal("json"), - z.literal("stream-json"), - z.undefined(), - ]) + agent: z + .object({ + kind: z.union([ + z.literal("claude"), + z.literal("opencode"), + z.literal("pi"), + z.literal("codex"), + ]), + format: z.union([z.literal("text"), z.literal("json")]).optional(), + identityPrefix: z.string().optional(), + }) .optional(), }) .refine( diff --git a/src/index.core.test.ts b/src/index.core.test.ts index 63093b801..f213719d4 100644 --- a/src/index.core.test.ts +++ b/src/index.core.test.ts @@ -762,7 +762,7 @@ describe("config and templating", () => { reply: { mode: "command" as const, command: ["claude", "{{Body}}"], - claudeOutputFormat: "text" as const, + agent: { kind: "claude", format: "text" as const }, }, }, }; @@ -802,7 +802,7 @@ describe("config and templating", () => { reply: { mode: "command" as const, command: ["claude", "{{Body}}"], - claudeOutputFormat: "json" as const, + agent: { kind: "claude", format: "json" as const }, }, }, }; @@ -830,7 +830,7 @@ describe("config and templating", () => { reply: { mode: "command" as const, command: ["claude", "{{Body}}"], - // No claudeOutputFormat set on purpose + agent: { kind: "claude" }, }, }, }; diff --git a/src/web/auto-reply.ts b/src/web/auto-reply.ts index 19634a3f1..7862f25cf 100644 --- a/src/web/auto-reply.ts +++ b/src/web/auto-reply.ts @@ -18,7 +18,7 @@ import { monitorWebInbox } from "./inbound.js"; import { sendViaIpc, startIpcServer, stopIpcServer } from "./ipc.js"; import { loadWebMedia } from "./media.js"; import { sendMessageWeb } from "./outbound.js"; -import { getQueueSize } from "../process/command-queue.js"; +import { enqueueCommand, getQueueSize } from "../process/command-queue.js"; import { computeBackoff, newConnectionId, @@ -621,19 +621,21 @@ export async function monitorWebProvider( : new Date().toISOString(); console.log(`\n[${tsDisplay}] ${from} -> ${latest.to}: ${combinedBody}`); - const replyResult = await (replyResolver ?? getReplyFromConfig)( - { - Body: combinedBody, - From: latest.from, - To: latest.to, - MessageSid: latest.id, - MediaPath: latest.mediaPath, - MediaUrl: latest.mediaUrl, - MediaType: latest.mediaType, - }, - { - onReplyStart: latest.sendComposing, - }, + const replyResult = await enqueueCommand(() => + (replyResolver ?? getReplyFromConfig)( + { + Body: combinedBody, + From: latest.from, + To: latest.to, + MessageSid: latest.id, + MediaPath: latest.mediaPath, + MediaUrl: latest.mediaUrl, + MediaType: latest.mediaType, + }, + { + onReplyStart: latest.sendComposing, + }, + ), ); if ( @@ -917,19 +919,24 @@ export async function monitorWebProvider( "reply heartbeat start", ); } - const replyResult = await (replyResolver ?? getReplyFromConfig)( - { - Body: HEARTBEAT_PROMPT, - From: lastInboundMsg.from, - To: lastInboundMsg.to, - MessageSid: snapshot.entry?.sessionId, - MediaPath: undefined, - MediaUrl: undefined, - MediaType: undefined, - }, - { - onReplyStart: lastInboundMsg.sendComposing, - }, + const hbFrom = lastInboundMsg.from; + const hbTo = lastInboundMsg.to; + const hbComposing = lastInboundMsg.sendComposing; + const replyResult = await enqueueCommand(() => + (replyResolver ?? getReplyFromConfig)( + { + Body: HEARTBEAT_PROMPT, + From: hbFrom, + To: hbTo, + MessageSid: snapshot.entry?.sessionId, + MediaPath: undefined, + MediaUrl: undefined, + MediaType: undefined, + }, + { + onReplyStart: hbComposing, + }, + ), ); if (