From 086dd284d610ad6a1a39677fb44f5925b203828c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 3 Dec 2025 09:04:37 +0000 Subject: [PATCH] Auto-reply: add /verbose directives and tool result replies --- CHANGELOG.md | 1 + README.md | 6 ++ docs/thinking.md | 6 ++ src/agents/pi.ts | 28 ++++++-- src/agents/types.ts | 1 + src/auto-reply/command-reply.ts | 16 +++++ src/auto-reply/reply.ts | 80 ++++++++++++++++++++++- src/config/config.ts | 2 + src/config/sessions.ts | 1 + src/index.core.test.ts | 109 ++++++++++++++++++++++++++++++++ 10 files changed, 242 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e9c02e7c..fa615eb8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Highlights - **Thinking directives & state:** `/t|/think|/thinking ` (aliases off|minimal|low|medium|high|max/highest). Inline applies to that message; directive-only message pins the level for the session; `/think:off` clears. Resolution: inline > session override > `inbound.reply.thinkingDefault` > off. Pi/Tau get `--thinking ` (except off); other agents append cue words (`think` → `think hard` → `think harder` → `ultrathink`). Heartbeat probe uses `HEARTBEAT /think:high`. +- **Verbose directives:** `/v|/verbose on|full|off` mirrors thinking: inline > session > config default. Directive-only replies with an acknowledgement; invalid levels return a hint. When enabled, tool results from JSON-emitting agents (Pi/Tau, etc.) are forwarded as `🛠️ …` messages. - **Directive confirmations:** Directive-only messages now reply with an acknowledgement (`Thinking level set to high.` / `Thinking disabled.`) and reject unknown levels with a helpful hint (state is unchanged). - **Pi/Tau stability:** RPC replies buffered until the assistant turn finishes; parsers return consistent `texts[]`; web auto-replies keep a warm Tau RPC process to avoid cold starts. - **Claude prompt flow:** One-time `sessionIntro` with per-message `/think:high` bodyPrefix; system prompt always sent on first turn even with `sendSystemOnce`. diff --git a/README.md b/README.md index c16f81dd5..52c8b7e84 100644 --- a/README.md +++ b/README.md @@ -162,6 +162,12 @@ warelay supports running on the same phone number you message from—you chat wi - Resolution order: inline directive > session default > `inbound.reply.thinkingDefault` (config) > off. - `/think:off` (or no directive) leaves the prompt unchanged. +#### Verbose directives (`/verbose` or `/v`) +- Levels: `on|full` (same) or `off` (default). Use `/v on`, `/verbose:full`, `/v off`, etc.; colon optional. +- Directive-only message sets a session-level verbose flag (`Verbose logging enabled./disabled.`); invalid levels reply with a hint and don’t change state. +- Inline directive applies only to that message; resolution: inline > session default > `inbound.reply.verboseDefault` (config) > off. +- When verbose is on **and the agent emits structured tool results (Pi/Tau and other JSON-emitting agents)**, tool results are sent back as separate messages prefixed with `🛠️`. + ### Logging (optional) - File logs are written to `/tmp/warelay/warelay-YYYY-MM-DD.log` by default (rotated daily; files older than 24h are pruned). Levels: `silent | fatal | error | warn | info | debug | trace` (CLI `--verbose` forces `debug`). Web-provider inbound/outbound entries include message bodies and auto-reply text for easier auditing. - Override in `~/.warelay/warelay.json`: diff --git a/docs/thinking.md b/docs/thinking.md index 2baa04325..311608a9f 100644 --- a/docs/thinking.md +++ b/docs/thinking.md @@ -24,5 +24,11 @@ - **Pi/Tau**: injects `--thinking ` (skipped for `off`). - **Claude & other text agents**: appends the cue word to the prompt text as above. +## Verbose directives (/verbose or /v) +- Levels: `on|full` or `off` (default). +- Directive-only message toggles session verbose and replies `Verbose logging enabled.` / `Verbose logging disabled.`; invalid levels return a hint without changing state. +- Inline directive affects only that message; session/global defaults apply otherwise. +- When verbose is on, agents that emit structured tool results (Pi/Tau, other JSON agents) send each tool result back as its own message, prefixed with `🛠️`. + ## Heartbeats - Heartbeat probe body is `HEARTBEAT /think:high`, so it always asks for max thinking on the probe. Inline directive wins; session/global defaults are used only when no directive is present. diff --git a/src/agents/pi.ts b/src/agents/pi.ts index a9a75ff07..d2cbc2fd6 100644 --- a/src/agents/pi.ts +++ b/src/agents/pi.ts @@ -9,6 +9,7 @@ type PiAssistantMessage = { model?: string; provider?: string; stopReason?: string; + toolCallId?: string; }; function parsePiJson(raw: string): AgentParseResult { @@ -16,6 +17,7 @@ function parsePiJson(raw: string): AgentParseResult { // Collect only completed assistant messages (skip streaming updates/toolcalls). const texts: string[] = []; + const toolResults: string[] = []; let lastAssistant: PiAssistantMessage | undefined; let lastPushed: string | undefined; @@ -26,12 +28,17 @@ function parsePiJson(raw: string): AgentParseResult { message?: PiAssistantMessage; }; + const isToolResult = + (ev.type === "message" || ev.type === "message_end") && + ev.message?.role && + typeof ev.message.role === "string" && + ev.message.role.toLowerCase().includes("tool"); const isAssistantMessage = (ev.type === "message" || ev.type === "message_end") && ev.message?.role === "assistant" && Array.isArray(ev.message.content); - if (!isAssistantMessage) continue; + if (!isAssistantMessage && !isToolResult) continue; const msg = ev.message as PiAssistantMessage; const msgText = msg.content @@ -40,10 +47,19 @@ function parsePiJson(raw: string): AgentParseResult { .join("\n") .trim(); - if (msgText && msgText !== lastPushed) { - texts.push(msgText); - lastPushed = msgText; - lastAssistant = msg; + if (isAssistantMessage) { + if (msgText && msgText !== lastPushed) { + texts.push(msgText); + lastPushed = msgText; + lastAssistant = msg; + } + } else if (isToolResult && msg.content) { + const toolText = msg.content + ?.filter((c) => c?.type === "text" && typeof c.text === "string") + .map((c) => c.text) + .join("\n") + .trim(); + if (toolText) toolResults.push(toolText); } } catch { // ignore malformed lines @@ -60,7 +76,7 @@ function parsePiJson(raw: string): AgentParseResult { } : undefined; - return { texts, meta }; + return { texts, toolResults: toolResults.length ? toolResults : undefined, meta }; } export const piSpec: AgentSpec = { diff --git a/src/agents/types.ts b/src/agents/types.ts index 2c868847f..7d16b2b47 100644 --- a/src/agents/types.ts +++ b/src/agents/types.ts @@ -19,6 +19,7 @@ export type AgentParseResult = { // Plural to support agents that emit multiple assistant turns per prompt. texts?: string[]; mediaUrls?: string[]; + toolResults?: string[]; meta?: AgentMeta; }; diff --git a/src/auto-reply/command-reply.ts b/src/auto-reply/command-reply.ts index c89d5f4d6..8a7c42fd3 100644 --- a/src/auto-reply/command-reply.ts +++ b/src/auto-reply/command-reply.ts @@ -34,6 +34,7 @@ type CommandReplyParams = { commandRunner: typeof runCommandWithTimeout; enqueue?: EnqueueRunner; thinkLevel?: ThinkLevel; + verboseLevel?: "off" | "on"; }; export type CommandReplyMeta = { @@ -141,6 +142,7 @@ export async function runCommandReply( commandRunner, enqueue = enqueueCommand, thinkLevel, + verboseLevel, } = params; if (!reply.command?.length) { @@ -301,6 +303,8 @@ export async function runCommandReply( // Collect one message per assistant text from parseOutput (tau RPC can emit many). const parsedTexts = parsed?.texts?.map((t) => t.trim()).filter(Boolean) ?? []; + const parsedToolResults = + parsed?.toolResults?.map((t) => t.trim()).filter(Boolean) ?? []; type ReplyItem = { text: string; media?: string[] }; const replyItems: ReplyItem[] = []; @@ -314,6 +318,18 @@ export async function runCommandReply( }); } + if (verboseLevel === "on") { + for (const tr of parsedToolResults) { + const prefixed = `🛠️ ${tr}`; + const { text: cleanedText, mediaUrls: mediaFound } = + splitMediaFromOutput(prefixed); + replyItems.push({ + text: cleanedText, + media: mediaFound?.length ? mediaFound : undefined, + }); + } + } + // If parser gave nothing, fall back to raw stdout as a single message. if (replyItems.length === 0 && trimmed && !parserProvided) { const { text: cleanedText, mediaUrls: mediaFound } = diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index 94ea4688e..2d6f95d83 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -34,6 +34,7 @@ const ABORT_TRIGGERS = new Set(["stop", "esc", "abort", "wait", "exit"]); const ABORT_MEMORY = new Map(); type ThinkLevel = "off" | "minimal" | "low" | "medium" | "high"; +type VerboseLevel = "off" | "on"; function normalizeThinkLevel(raw?: string | null): ThinkLevel | undefined { if (!raw) return undefined; @@ -50,6 +51,14 @@ function normalizeThinkLevel(raw?: string | null): ThinkLevel | undefined { return undefined; } +function normalizeVerboseLevel(raw?: string | null): VerboseLevel | undefined { + if (!raw) return undefined; + const key = raw.toLowerCase(); + if (["off", "false", "no", "0"].includes(key)) return "off"; + if (["on", "full", "true", "yes", "1"].includes(key)) return "on"; + return undefined; +} + function extractThinkDirective(body?: string): { cleaned: string; thinkLevel?: ThinkLevel; @@ -73,6 +82,26 @@ function extractThinkDirective(body?: string): { }; } +function extractVerboseDirective(body?: string): { + cleaned: string; + verboseLevel?: VerboseLevel; + rawLevel?: string; + hasDirective: boolean; +} { + if (!body) return { cleaned: "", hasDirective: false }; + const match = body.match(/\/(?:verbose|v)\s*:?\s*([a-zA-Z-]+)\b/i); + const verboseLevel = normalizeVerboseLevel(match?.[1]); + const cleaned = match + ? body.replace(match[0], "").replace(/\s+/g, " ").trim() + : body.trim(); + return { + cleaned, + verboseLevel, + rawLevel: match?.[1], + hasDirective: !!match, + }; +} + function isAbortTrigger(text?: string): boolean { if (!text) return false; const normalized = text.trim().toLowerCase(); @@ -156,6 +185,7 @@ export async function getReplyFromConfig( let abortedLastRun = false; let persistedThinking: string | undefined; + let persistedVerbose: string | undefined; if (sessionCfg) { const trimmedBody = (ctx.Body ?? "").trim(); @@ -185,6 +215,7 @@ export async function getReplyFromConfig( systemSent = entry.systemSent ?? false; abortedLastRun = entry.abortedLastRun ?? false; persistedThinking = entry.thinkingLevel; + persistedVerbose = entry.verboseLevel; } else { sessionId = crypto.randomUUID(); isNewSession = true; @@ -198,6 +229,7 @@ export async function getReplyFromConfig( systemSent, abortedLastRun, thinkingLevel: persistedThinking, + verboseLevel: persistedVerbose, }; sessionStore[sessionKey] = sessionEntry; await saveSessionStore(storePath, sessionStore); @@ -216,14 +248,25 @@ export async function getReplyFromConfig( rawLevel: rawThinkLevel, hasDirective: hasThinkDirective, } = extractThinkDirective(sessionCtx.BodyStripped ?? sessionCtx.Body ?? ""); - sessionCtx.Body = thinkCleaned; - sessionCtx.BodyStripped = thinkCleaned; + const { + cleaned: verboseCleaned, + verboseLevel: inlineVerbose, + rawLevel: rawVerboseLevel, + hasDirective: hasVerboseDirective, + } = extractVerboseDirective(thinkCleaned); + sessionCtx.Body = verboseCleaned; + sessionCtx.BodyStripped = verboseCleaned; let resolvedThinkLevel = inlineThink ?? (sessionEntry?.thinkingLevel as ThinkLevel | undefined) ?? (reply?.thinkingDefault as ThinkLevel | undefined); + let resolvedVerboseLevel = + inlineVerbose ?? + (sessionEntry?.verboseLevel as VerboseLevel | undefined) ?? + (reply?.verboseDefault as VerboseLevel | undefined); + const directiveOnly = (() => { if (!hasThinkDirective) return false; if (!thinkCleaned) return true; @@ -258,6 +301,38 @@ export async function getReplyFromConfig( return { text: ack }; } + const verboseDirectiveOnly = (() => { + if (!hasVerboseDirective) return false; + if (!verboseCleaned) return true; + const stripped = verboseCleaned.replace(/\[[^\]]+\]\s*/g, "").trim(); + return stripped.length === 0; + })(); + + if (verboseDirectiveOnly) { + if (!inlineVerbose) { + cleanupTyping(); + return { + text: `Unrecognized verbose level "${rawVerboseLevel ?? ""}". Valid levels: off, on.`, + }; + } + if (sessionEntry && sessionStore && sessionKey) { + if (inlineVerbose === "off") { + delete sessionEntry.verboseLevel; + } else { + sessionEntry.verboseLevel = inlineVerbose; + } + sessionEntry.updatedAt = Date.now(); + sessionStore[sessionKey] = sessionEntry; + await saveSessionStore(storePath, sessionStore); + } + const ack = + inlineVerbose === "off" + ? "Verbose logging disabled." + : "Verbose logging enabled."; + cleanupTyping(); + return { text: ack }; + } + // Optional allowlist by origin number (E.164 without whatsapp: prefix) const allowFrom = cfg.inbound?.allowFrom; const from = (ctx.From ?? "").replace(/^whatsapp:/, ""); @@ -445,6 +520,7 @@ export async function getReplyFromConfig( timeoutSeconds, commandRunner, thinkLevel: resolvedThinkLevel, + verboseLevel: resolvedVerboseLevel, }); const payloadArray = runResult.payloads ?? []; const meta = runResult.meta; diff --git a/src/config/config.ts b/src/config/config.ts index cd23696b0..bba0b86a5 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -61,6 +61,7 @@ export type WarelayConfig = { command?: string[]; heartbeatCommand?: string[]; thinkingDefault?: "off" | "minimal" | "low" | "medium" | "high"; + verboseDefault?: "off" | "on"; cwd?: string; template?: string; timeoutSeconds?: number; @@ -97,6 +98,7 @@ const ReplySchema = z z.literal("high"), ]) .optional(), + verboseDefault: z.union([z.literal("off"), z.literal("on")]).optional(), cwd: z.string().optional(), template: z.string().optional(), timeoutSeconds: z.number().int().positive().optional(), diff --git a/src/config/sessions.ts b/src/config/sessions.ts index ccd7ea540..cb437f315 100644 --- a/src/config/sessions.ts +++ b/src/config/sessions.ts @@ -14,6 +14,7 @@ export type SessionEntry = { systemSent?: boolean; abortedLastRun?: boolean; thinkingLevel?: string; + verboseLevel?: string; }; export const SESSION_STORE_DEFAULT = path.join(CONFIG_DIR, "sessions.json"); diff --git a/src/index.core.test.ts b/src/index.core.test.ts index fbcd1854b..a6ca9420a 100644 --- a/src/index.core.test.ts +++ b/src/index.core.test.ts @@ -641,6 +641,115 @@ describe("config and templating", () => { expect(ack?.text).toBe("Thinking level set to high."); }); + it("enables verbose via directive-only and skips command", async () => { + const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({ + stdout: "ok", + stderr: "", + code: 0, + signal: null, + killed: false, + }); + const cfg = { + inbound: { + reply: { + mode: "command" as const, + command: ["echo", "{{Body}}"], + agent: { kind: "claude" }, + }, + }, + }; + + const ack = await index.getReplyFromConfig( + { Body: "/v:on", From: "+1", To: "+2" }, + undefined, + cfg, + runSpy, + ); + + expect(runSpy).not.toHaveBeenCalled(); + expect(ack?.text).toBe("Verbose logging enabled."); + }); + + it("rejects invalid verbose directive-only and preserves state", async () => { + const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({ + stdout: "ok", + stderr: "", + code: 0, + signal: null, + killed: false, + }); + const storeDir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), "warelay-session-"), + ); + const storePath = path.join(storeDir, "sessions.json"); + const cfg = { + inbound: { + reply: { + mode: "command" as const, + command: ["echo", "{{Body}}"], + agent: { kind: "claude" }, + session: { store: storePath }, + }, + }, + }; + + const ack = await index.getReplyFromConfig( + { Body: "/verbose maybe", From: "+1", To: "+2" }, + undefined, + cfg, + runSpy, + ); + + expect(runSpy).not.toHaveBeenCalled(); + expect(ack?.text).toContain("Unrecognized verbose level"); + + await index.getReplyFromConfig( + { Body: "hi", From: "+1", To: "+2" }, + undefined, + cfg, + runSpy, + ); + expect(runSpy).toHaveBeenCalledTimes(1); + const args = runSpy.mock.calls[0][0] as string[]; + const bodyArg = args[args.length - 1]; + expect(bodyArg).toBe("hi"); + }); + + it("shows tool results when verbose is on for pi", async () => { + const rpcSpy = vi.spyOn(tauRpc, "runPiRpc").mockResolvedValue({ + stdout: + '{"type":"message","message":{"role":"assistant","content":[{"type":"text","text":"summary"}]}}\n' + + '{"type":"message_end","message":{"role":"tool_result","content":[{"type":"text","text":"ls output"}]}}', + stderr: "", + code: 0, + signal: null, + killed: false, + }); + const cfg = { + inbound: { + reply: { + mode: "command" as const, + command: ["pi", "--mode", "json", "{{Body}}"], + agent: { kind: "pi" }, + }, + }, + }; + + const res = await index.getReplyFromConfig( + { Body: "/v on hi", From: "+1", To: "+2" }, + undefined, + cfg, + ); + + expect(rpcSpy).toHaveBeenCalled(); + const payloads = Array.isArray(res) ? res : res ? [res] : []; + expect(payloads.length).toBeGreaterThanOrEqual(2); + expect(payloads[0]?.text).toContain("summary"); + expect(payloads.find((p) => p.text?.includes("🛠️"))?.text).toContain( + "ls output", + ); + }); + it("treats directive-only even when bracket prefixes are present", async () => { const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({ stdout: "ok",