From 086dd284d610ad6a1a39677fb44f5925b203828c Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Wed, 3 Dec 2025 09:04:37 +0000
Subject: [PATCH] Auto-reply: add /verbose directives and tool result replies

---
 CHANGELOG.md                    |   1 +
 README.md                       |   6 ++
 docs/thinking.md                |   6 ++
 src/agents/pi.ts                |  28 ++++++--
 src/agents/types.ts             |   1 +
 src/auto-reply/command-reply.ts |  16 +++++
 src/auto-reply/reply.ts         |  80 ++++++++++++++++++++++-
 src/config/config.ts            |   2 +
 src/config/sessions.ts          |   1 +
 src/index.core.test.ts          | 109 ++++++++++++++++++++++++++++++++
 10 files changed, 242 insertions(+), 8 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e9c02e7c..fa615eb8a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 ### Highlights
 - **Thinking directives & state:** `/t|/think|/thinking <level>` (aliases off|minimal|low|medium|high|max/highest). Inline applies to that message; directive-only message pins the level for the session; `/think:off` clears. Resolution: inline > session override > `inbound.reply.thinkingDefault` > off. Pi/Tau get `--thinking <level>` (except off); other agents append cue words (`think` → `think hard` → `think harder` → `ultrathink`). Heartbeat probe uses `HEARTBEAT /think:high`.
+- **Verbose directives:** `/v|/verbose on|full|off` mirrors thinking: inline > session > config default. Directive-only replies with an acknowledgement; invalid levels return a hint. When enabled, tool results from JSON-emitting agents (Pi/Tau, etc.) are forwarded as `🛠️ …` messages.
 - **Directive confirmations:** Directive-only messages now reply with an acknowledgement (`Thinking level set to high.` / `Thinking disabled.`) and reject unknown levels with a helpful hint (state is unchanged).
 - **Pi/Tau stability:** RPC replies buffered until the assistant turn finishes; parsers return consistent `texts[]`; web auto-replies keep a warm Tau RPC process to avoid cold starts.
 - **Claude prompt flow:** One-time `sessionIntro` with per-message `/think:high` bodyPrefix; system prompt always sent on first turn even with `sendSystemOnce`.
diff --git a/README.md b/README.md
index c16f81dd5..52c8b7e84 100644
--- a/README.md
+++ b/README.md
@@ -162,6 +162,12 @@ warelay supports running on the same phone number you message from—you chat wi
 - Resolution order: inline directive > session default > `inbound.reply.thinkingDefault` (config) > off.
 - `/think:off` (or no directive) leaves the prompt unchanged.
 
+#### Verbose directives (`/verbose` or `/v`)
+- Levels: `on|full` (same) or `off` (default). Use `/v on`, `/verbose:full`, `/v off`, etc.; colon optional.
+- Directive-only message sets a session-level verbose flag (`Verbose logging enabled./disabled.`); invalid levels reply with a hint and don’t change state.
+- Inline directive applies only to that message; resolution: inline > session default > `inbound.reply.verboseDefault` (config) > off.
+- When verbose is on **and the agent emits structured tool results (Pi/Tau and other JSON-emitting agents)**, tool results are sent back as separate messages prefixed with `🛠️`.
+
 ### Logging (optional)
 - File logs are written to `/tmp/warelay/warelay-YYYY-MM-DD.log` by default (rotated daily; files older than 24h are pruned). Levels: `silent | fatal | error | warn | info | debug | trace` (CLI `--verbose` forces `debug`). Web-provider inbound/outbound entries include message bodies and auto-reply text for easier auditing.
 - Override in `~/.warelay/warelay.json`:
diff --git a/docs/thinking.md b/docs/thinking.md
index 2baa04325..311608a9f 100644
--- a/docs/thinking.md
+++ b/docs/thinking.md
@@ -24,5 +24,11 @@
 - **Pi/Tau**: injects `--thinking <level>` (skipped for `off`).
 - **Claude & other text agents**: appends the cue word to the prompt text as above.
 
+## Verbose directives (/verbose or /v)
+- Levels: `on|full` or `off` (default).
+- Directive-only message toggles session verbose and replies `Verbose logging enabled.` / `Verbose logging disabled.`; invalid levels return a hint without changing state.
+- Inline directive affects only that message; session/global defaults apply otherwise.
+- When verbose is on, agents that emit structured tool results (Pi/Tau, other JSON agents) send each tool result back as its own message, prefixed with `🛠️`.
+
 ## Heartbeats
 - Heartbeat probe body is `HEARTBEAT /think:high`, so it always asks for max thinking on the probe. Inline directive wins; session/global defaults are used only when no directive is present.
diff --git a/src/agents/pi.ts b/src/agents/pi.ts
index a9a75ff07..d2cbc2fd6 100644
--- a/src/agents/pi.ts
+++ b/src/agents/pi.ts
@@ -9,6 +9,7 @@ type PiAssistantMessage = {
   model?: string;
   provider?: string;
   stopReason?: string;
+  toolCallId?: string;
 };
 
 function parsePiJson(raw: string): AgentParseResult {
@@ -16,6 +17,7 @@ function parsePiJson(raw: string): AgentParseResult {
 
   // Collect only completed assistant messages (skip streaming updates/toolcalls).
   const texts: string[] = [];
+  const toolResults: string[] = [];
   let lastAssistant: PiAssistantMessage | undefined;
   let lastPushed: string | undefined;
 
@@ -26,12 +28,17 @@ function parsePiJson(raw: string): AgentParseResult {
         message?: PiAssistantMessage;
       };
 
+      const isToolResult =
+        (ev.type === "message" || ev.type === "message_end") &&
+        ev.message?.role &&
+        typeof ev.message.role === "string" &&
+        ev.message.role.toLowerCase().includes("tool");
       const isAssistantMessage =
         (ev.type === "message" || ev.type === "message_end") &&
         ev.message?.role === "assistant" &&
         Array.isArray(ev.message.content);
 
-      if (!isAssistantMessage) continue;
+      if (!isAssistantMessage && !isToolResult) continue;
 
       const msg = ev.message as PiAssistantMessage;
       const msgText = msg.content
@@ -40,10 +47,19 @@ function parsePiJson(raw: string): AgentParseResult {
         .join("\n")
         .trim();
 
-      if (msgText && msgText !== lastPushed) {
-        texts.push(msgText);
-        lastPushed = msgText;
-        lastAssistant = msg;
+      if (isAssistantMessage) {
+        if (msgText && msgText !== lastPushed) {
+          texts.push(msgText);
+          lastPushed = msgText;
+          lastAssistant = msg;
+        }
+      } else if (isToolResult && msg.content) {
+        const toolText = msg.content
+          ?.filter((c) => c?.type === "text" && typeof c.text === "string")
+          .map((c) => c.text)
+          .join("\n")
+          .trim();
+        if (toolText) toolResults.push(toolText);
       }
     } catch {
       // ignore malformed lines
@@ -60,7 +76,7 @@ function parsePiJson(raw: string): AgentParseResult {
         }
       : undefined;
 
-  return { texts, meta };
+  return { texts, toolResults: toolResults.length ? toolResults : undefined, meta };
 }
 
 export const piSpec: AgentSpec = {
diff --git a/src/agents/types.ts b/src/agents/types.ts
index 2c868847f..7d16b2b47 100644
--- a/src/agents/types.ts
+++ b/src/agents/types.ts
@@ -19,6 +19,7 @@ export type AgentParseResult = {
   // Plural to support agents that emit multiple assistant turns per prompt.
   texts?: string[];
   mediaUrls?: string[];
+  toolResults?: string[];
   meta?: AgentMeta;
 };
 
diff --git a/src/auto-reply/command-reply.ts b/src/auto-reply/command-reply.ts
index c89d5f4d6..8a7c42fd3 100644
--- a/src/auto-reply/command-reply.ts
+++ b/src/auto-reply/command-reply.ts
@@ -34,6 +34,7 @@ type CommandReplyParams = {
   commandRunner: typeof runCommandWithTimeout;
   enqueue?: EnqueueRunner;
   thinkLevel?: ThinkLevel;
+  verboseLevel?: "off" | "on";
 };
 
 export type CommandReplyMeta = {
@@ -141,6 +142,7 @@ export async function runCommandReply(
     commandRunner,
     enqueue = enqueueCommand,
     thinkLevel,
+    verboseLevel,
   } = params;
 
   if (!reply.command?.length) {
@@ -301,6 +303,8 @@ export async function runCommandReply(
     // Collect one message per assistant text from parseOutput (tau RPC can emit many).
     const parsedTexts =
       parsed?.texts?.map((t) => t.trim()).filter(Boolean) ?? [];
+    const parsedToolResults =
+      parsed?.toolResults?.map((t) => t.trim()).filter(Boolean) ?? [];
 
     type ReplyItem = { text: string; media?: string[] };
     const replyItems: ReplyItem[] = [];
@@ -314,6 +318,18 @@ export async function runCommandReply(
       });
     }
 
+    if (verboseLevel === "on") {
+      for (const tr of parsedToolResults) {
+        const prefixed = `🛠️ ${tr}`;
+        const { text: cleanedText, mediaUrls: mediaFound } =
+          splitMediaFromOutput(prefixed);
+        replyItems.push({
+          text: cleanedText,
+          media: mediaFound?.length ? mediaFound : undefined,
+        });
+      }
+    }
+
     // If parser gave nothing, fall back to raw stdout as a single message.
     if (replyItems.length === 0 && trimmed && !parserProvided) {
       const { text: cleanedText, mediaUrls: mediaFound } =
diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts
index 94ea4688e..2d6f95d83 100644
--- a/src/auto-reply/reply.ts
+++ b/src/auto-reply/reply.ts
@@ -34,6 +34,7 @@ const ABORT_TRIGGERS = new Set(["stop", "esc", "abort", "wait", "exit"]);
 const ABORT_MEMORY = new Map<string, boolean>();
 
 type ThinkLevel = "off" | "minimal" | "low" | "medium" | "high";
+type VerboseLevel = "off" | "on";
 
 function normalizeThinkLevel(raw?: string | null): ThinkLevel | undefined {
 	if (!raw) return undefined;
@@ -50,6 +51,14 @@ function normalizeThinkLevel(raw?: string | null): ThinkLevel | undefined {
 	return undefined;
 }
 
+function normalizeVerboseLevel(raw?: string | null): VerboseLevel | undefined {
+	if (!raw) return undefined;
+	const key = raw.toLowerCase();
+	if (["off", "false", "no", "0"].includes(key)) return "off";
+	if (["on", "full", "true", "yes", "1"].includes(key)) return "on";
+	return undefined;
+}
+
 function extractThinkDirective(body?: string): {
 	cleaned: string;
 	thinkLevel?: ThinkLevel;
@@ -73,6 +82,26 @@ function extractThinkDirective(body?: string): {
 	};
 }
 
+function extractVerboseDirective(body?: string): {
+	cleaned: string;
+	verboseLevel?: VerboseLevel;
+	rawLevel?: string;
+	hasDirective: boolean;
+} {
+	if (!body) return { cleaned: "", hasDirective: false };
+	const match = body.match(/\/(?:verbose|v)\s*:?\s*([a-zA-Z-]+)\b/i);
+	const verboseLevel = normalizeVerboseLevel(match?.[1]);
+	const cleaned = match
+		? body.replace(match[0], "").replace(/\s+/g, " ").trim()
+		: body.trim();
+	return {
+		cleaned,
+		verboseLevel,
+		rawLevel: match?.[1],
+		hasDirective: !!match,
+	};
+}
+
 function isAbortTrigger(text?: string): boolean {
   if (!text) return false;
   const normalized = text.trim().toLowerCase();
@@ -156,6 +185,7 @@ export async function getReplyFromConfig(
   let abortedLastRun = false;
 
   let persistedThinking: string | undefined;
+  let persistedVerbose: string | undefined;
 
   if (sessionCfg) {
     const trimmedBody = (ctx.Body ?? "").trim();
@@ -185,6 +215,7 @@ export async function getReplyFromConfig(
       systemSent = entry.systemSent ?? false;
       abortedLastRun = entry.abortedLastRun ?? false;
       persistedThinking = entry.thinkingLevel;
+      persistedVerbose = entry.verboseLevel;
     } else {
       sessionId = crypto.randomUUID();
       isNewSession = true;
@@ -198,6 +229,7 @@ export async function getReplyFromConfig(
       systemSent,
       abortedLastRun,
       thinkingLevel: persistedThinking,
+      verboseLevel: persistedVerbose,
     };
     sessionStore[sessionKey] = sessionEntry;
     await saveSessionStore(storePath, sessionStore);
@@ -216,14 +248,25 @@ export async function getReplyFromConfig(
 		rawLevel: rawThinkLevel,
 		hasDirective: hasThinkDirective,
 	} = extractThinkDirective(sessionCtx.BodyStripped ?? sessionCtx.Body ?? "");
-	sessionCtx.Body = thinkCleaned;
-	sessionCtx.BodyStripped = thinkCleaned;
+	const {
+		cleaned: verboseCleaned,
+		verboseLevel: inlineVerbose,
+		rawLevel: rawVerboseLevel,
+		hasDirective: hasVerboseDirective,
+	} = extractVerboseDirective(thinkCleaned);
+	sessionCtx.Body = verboseCleaned;
+	sessionCtx.BodyStripped = verboseCleaned;
 
 	let resolvedThinkLevel =
 		inlineThink ??
 		(sessionEntry?.thinkingLevel as ThinkLevel | undefined) ??
 		(reply?.thinkingDefault as ThinkLevel | undefined);
 
+	let resolvedVerboseLevel =
+		inlineVerbose ??
+		(sessionEntry?.verboseLevel as VerboseLevel | undefined) ??
+		(reply?.verboseDefault as VerboseLevel | undefined);
+
 	const directiveOnly = (() => {
 		if (!hasThinkDirective) return false;
 		if (!thinkCleaned) return true;
@@ -258,6 +301,38 @@ export async function getReplyFromConfig(
 		return { text: ack };
 	}
 
+	const verboseDirectiveOnly = (() => {
+		if (!hasVerboseDirective) return false;
+		if (!verboseCleaned) return true;
+		const stripped = verboseCleaned.replace(/\[[^\]]+\]\s*/g, "").trim();
+		return stripped.length === 0;
+	})();
+
+	if (verboseDirectiveOnly) {
+		if (!inlineVerbose) {
+			cleanupTyping();
+			return {
+				text: `Unrecognized verbose level "${rawVerboseLevel ?? ""}". Valid levels: off, on.`,
+			};
+		}
+		if (sessionEntry && sessionStore && sessionKey) {
+			if (inlineVerbose === "off") {
+				delete sessionEntry.verboseLevel;
+			} else {
+				sessionEntry.verboseLevel = inlineVerbose;
+			}
+			sessionEntry.updatedAt = Date.now();
+			sessionStore[sessionKey] = sessionEntry;
+			await saveSessionStore(storePath, sessionStore);
+		}
+		const ack =
+			inlineVerbose === "off"
+				? "Verbose logging disabled."
+				: "Verbose logging enabled.";
+		cleanupTyping();
+		return { text: ack };
+	}
+
   // Optional allowlist by origin number (E.164 without whatsapp: prefix)
   const allowFrom = cfg.inbound?.allowFrom;
   const from = (ctx.From ?? "").replace(/^whatsapp:/, "");
@@ -445,6 +520,7 @@ export async function getReplyFromConfig(
         timeoutSeconds,
         commandRunner,
         thinkLevel: resolvedThinkLevel,
+        verboseLevel: resolvedVerboseLevel,
       });
       const payloadArray = runResult.payloads ?? [];
       const meta = runResult.meta;
diff --git a/src/config/config.ts b/src/config/config.ts
index cd23696b0..bba0b86a5 100644
--- a/src/config/config.ts
+++ b/src/config/config.ts
@@ -61,6 +61,7 @@ export type WarelayConfig = {
       command?: string[];
       heartbeatCommand?: string[];
       thinkingDefault?: "off" | "minimal" | "low" | "medium" | "high";
+      verboseDefault?: "off" | "on";
       cwd?: string;
       template?: string;
       timeoutSeconds?: number;
@@ -97,6 +98,7 @@ const ReplySchema = z
         z.literal("high"),
       ])
       .optional(),
+    verboseDefault: z.union([z.literal("off"), z.literal("on")]).optional(),
     cwd: z.string().optional(),
     template: z.string().optional(),
     timeoutSeconds: z.number().int().positive().optional(),
diff --git a/src/config/sessions.ts b/src/config/sessions.ts
index ccd7ea540..cb437f315 100644
--- a/src/config/sessions.ts
+++ b/src/config/sessions.ts
@@ -14,6 +14,7 @@ export type SessionEntry = {
 	systemSent?: boolean;
 	abortedLastRun?: boolean;
 	thinkingLevel?: string;
+	verboseLevel?: string;
 };
 
 export const SESSION_STORE_DEFAULT = path.join(CONFIG_DIR, "sessions.json");
diff --git a/src/index.core.test.ts b/src/index.core.test.ts
index fbcd1854b..a6ca9420a 100644
--- a/src/index.core.test.ts
+++ b/src/index.core.test.ts
@@ -641,6 +641,115 @@ describe("config and templating", () => {
     expect(ack?.text).toBe("Thinking level set to high.");
   });
 
+  it("enables verbose via directive-only and skips command", async () => {
+    const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({
+      stdout: "ok",
+      stderr: "",
+      code: 0,
+      signal: null,
+      killed: false,
+    });
+    const cfg = {
+      inbound: {
+        reply: {
+          mode: "command" as const,
+          command: ["echo", "{{Body}}"],
+          agent: { kind: "claude" },
+        },
+      },
+    };
+
+    const ack = await index.getReplyFromConfig(
+      { Body: "/v:on", From: "+1", To: "+2" },
+      undefined,
+      cfg,
+      runSpy,
+    );
+
+    expect(runSpy).not.toHaveBeenCalled();
+    expect(ack?.text).toBe("Verbose logging enabled.");
+  });
+
+  it("rejects invalid verbose directive-only and preserves state", async () => {
+    const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({
+      stdout: "ok",
+      stderr: "",
+      code: 0,
+      signal: null,
+      killed: false,
+    });
+    const storeDir = await fs.promises.mkdtemp(
+      path.join(os.tmpdir(), "warelay-session-"),
+    );
+    const storePath = path.join(storeDir, "sessions.json");
+    const cfg = {
+      inbound: {
+        reply: {
+          mode: "command" as const,
+          command: ["echo", "{{Body}}"],
+          agent: { kind: "claude" },
+          session: { store: storePath },
+        },
+      },
+    };
+
+    const ack = await index.getReplyFromConfig(
+      { Body: "/verbose maybe", From: "+1", To: "+2" },
+      undefined,
+      cfg,
+      runSpy,
+    );
+
+    expect(runSpy).not.toHaveBeenCalled();
+    expect(ack?.text).toContain("Unrecognized verbose level");
+
+    await index.getReplyFromConfig(
+      { Body: "hi", From: "+1", To: "+2" },
+      undefined,
+      cfg,
+      runSpy,
+    );
+    expect(runSpy).toHaveBeenCalledTimes(1);
+    const args = runSpy.mock.calls[0][0] as string[];
+    const bodyArg = args[args.length - 1];
+    expect(bodyArg).toBe("hi");
+  });
+
+  it("shows tool results when verbose is on for pi", async () => {
+    const rpcSpy = vi.spyOn(tauRpc, "runPiRpc").mockResolvedValue({
+      stdout:
+        '{"type":"message","message":{"role":"assistant","content":[{"type":"text","text":"summary"}]}}\n' +
+        '{"type":"message_end","message":{"role":"tool_result","content":[{"type":"text","text":"ls output"}]}}',
+      stderr: "",
+      code: 0,
+      signal: null,
+      killed: false,
+    });
+    const cfg = {
+      inbound: {
+        reply: {
+          mode: "command" as const,
+          command: ["pi", "--mode", "json", "{{Body}}"],
+          agent: { kind: "pi" },
+        },
+      },
+    };
+
+    const res = await index.getReplyFromConfig(
+      { Body: "/v on hi", From: "+1", To: "+2" },
+      undefined,
+      cfg,
+    );
+
+    expect(rpcSpy).toHaveBeenCalled();
+    const payloads = Array.isArray(res) ? res : res ? [res] : [];
+    expect(payloads.length).toBeGreaterThanOrEqual(2);
+    expect(payloads[0]?.text).toContain("summary");
+    expect(payloads.find((p) => p.text?.includes("🛠️"))?.text).toContain(
+      "ls output",
+    );
+  });
+
   it("treats directive-only even when bracket prefixes are present", async () => {
     const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({
       stdout: "ok",