diff --git a/CHANGELOG.md b/CHANGELOG.md index a105657ea..068493ae4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Pending - Web auto-replies now resize/recompress media and honor `inbound.reply.mediaMaxMb` in `~/.warelay/warelay.json` (default 5 MB) to avoid provider/API limits. - Web provider now detects media kind (image/audio/video/document), logs the source path, and enforces provider caps: images ≤6 MB, audio/video ≤16 MB, documents ≤100 MB; images still target the configurable cap above with resize + JPEG recompress. +- Sessions can now send the system prompt only once: set `inbound.reply.session.sendSystemOnce` (optional `sessionIntro` for the first turn) to avoid re-sending large prompts every message. - Optional voice-note transcription: set `inbound.transcribeAudio.command` (e.g., OpenAI Whisper CLI) to turn inbound audio into text before templating/Claude; verbose logs surface when transcription runs. Prompts now include the original media path plus a `Transcript:` block so models see both. ## 1.0.4 — 2025-11-25 diff --git a/README.md b/README.md index 4e143d4a4..194d2a3bc 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ Best practice: use a dedicated WhatsApp account (separate SIM/eSIM or business a ### Claude CLI setup (how we run it) 1) Install the official Claude CLI (e.g., `brew install anthropic-ai/cli/claude` or follow the Anthropic docs) and run `claude login` so it can read your API key. 2) In `warelay.json`, set `reply.mode` to `"command"` and point `command[0]` to `"claude"`; set `claudeOutputFormat` to `"text"` (or `"json"`/`"stream-json"` if you want warelay to parse and trim the JSON output). -3) (Optional) Add `bodyPrefix` to inject a system prompt and `session` settings to keep multi-turn context (`/new` resets by default). +3) (Optional) Add `bodyPrefix` to inject a system prompt and `session` settings to keep multi-turn context (`/new` resets by default). Set `sendSystemOnce: true` (plus an optional `sessionIntro`) to only send that prompt on the first turn of each session. 4) Run `pnpm warelay relay --provider auto` (or `--provider web|twilio`) and send a WhatsApp message; warelay will queue the Claude call, stream typing indicators (Twilio provider), parse the result, and send back the text. ### Auto-reply parameter table (compact) @@ -151,6 +151,8 @@ Best practice: use a dedicated WhatsApp account (separate SIM/eSIM or business a | `inbound.reply.session.resetTriggers` | `string[]` (default: `["/new"]`) | Exact match or prefix (`/new hi`) resets session. | | `inbound.reply.session.idleMinutes` | `number` (default: `60`) | Session expires after idle period. | | `inbound.reply.session.store` | `string` (default: `~/.warelay/sessions.json`) | Custom session store path. | +| `inbound.reply.session.sendSystemOnce` | `boolean` (default: `false`) | If `true`, only include the system prompt/template on the first turn of a session. | +| `inbound.reply.session.sessionIntro` | `string` | Optional intro text sent once per new session (prepended before the body when `sendSystemOnce` is used). | | `inbound.reply.session.sessionArgNew` | `string[]` (default: `["--session-id","{{SessionId}}"]`) | Args injected for a new session run. | | `inbound.reply.session.sessionArgResume` | `string[]` (default: `["--resume","{{SessionId}}"]`) | Args for resumed sessions. | | `inbound.reply.session.sessionArgBeforeBody` | `boolean` (default: `true`) | Place session args before final body arg. | diff --git a/docs/claude-config.md b/docs/claude-config.md index 63e3f612e..56aa07434 100644 --- a/docs/claude-config.md +++ b/docs/claude-config.md @@ -61,6 +61,7 @@ Notes on this configuration: - Inbound media is downloaded (≤5 MB) and exposed to your templates as `{{MediaPath}}`, `{{MediaUrl}}`, and `{{MediaType}}`. You can mention this in your prompt if you want Claude to reason about the attachment. - Outbound media from Claude (via `MEDIA:`) follows provider caps: Web resizes images to the configured target (`inbound.reply.mediaMaxMb`, default 5 MB) within hard limits of 6 MB (image), 16 MB (audio/video voice notes), and 100 MB (documents); Twilio still uses the Funnel host with a 5 MB guard. - Voice notes: set `inbound.transcribeAudio.command` to run a CLI that emits the transcript to stdout (e.g., OpenAI Whisper: `openai api audio.transcriptions.create -m whisper-1 -f {{MediaPath}} --response-format text`). If it succeeds, warelay replaces `Body` with the transcript and adds the original media path plus a `Transcript:` block into the prompt before invoking Claude. +- To avoid re-sending long system prompts every turn, set `inbound.reply.session.sendSystemOnce: true` and keep your prompt in `bodyPrefix` or `sessionIntro`; they are sent only on the first message of each session (resets on `/new` or idle expiry). ## Testing the setup 1. Start a relay (auto-selects Web when logged in, otherwise Twilio polling): diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index 264fc8b6c..854525222 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -134,10 +134,13 @@ export async function getReplyFromConfig( ); const sessionScope = sessionCfg?.scope ?? "per-sender"; const storePath = resolveStorePath(sessionCfg?.store); + let sessionStore: ReturnType | undefined; + let sessionKey: string | undefined; let sessionId: string | undefined; let isNewSession = false; let bodyStripped: string | undefined; + let systemSent = false; if (sessionCfg) { const trimmedBody = (ctx.Body ?? "").trim(); @@ -156,21 +159,23 @@ export async function getReplyFromConfig( } } - const sessionKey = deriveSessionKey(sessionScope, ctx); - const store = loadSessionStore(storePath); - const entry = store[sessionKey]; + sessionKey = deriveSessionKey(sessionScope, ctx); + sessionStore = loadSessionStore(storePath); + const entry = sessionStore[sessionKey]; const idleMs = idleMinutes * 60_000; const freshEntry = entry && Date.now() - entry.updatedAt <= idleMs; if (!isNewSession && freshEntry) { sessionId = entry.sessionId; + systemSent = entry.systemSent ?? false; } else { sessionId = crypto.randomUUID(); isNewSession = true; + systemSent = false; } - store[sessionKey] = { sessionId, updatedAt: Date.now() }; - await saveSessionStore(storePath, store); + sessionStore[sessionKey] = { sessionId, updatedAt: Date.now(), systemSent }; + await saveSessionStore(storePath, sessionStore); } const sessionCtx: TemplateContext = { @@ -193,12 +198,43 @@ export async function getReplyFromConfig( } // Optional prefix injected before Body for templating/command prompts. + const sendSystemOnce = sessionCfg?.sendSystemOnce === true; + const isFirstTurnInSession = isNewSession || !systemSent; + const sessionIntro = + isFirstTurnInSession && sessionCfg?.sessionIntro + ? applyTemplate(sessionCfg.sessionIntro, sessionCtx) + : ""; const bodyPrefix = reply?.bodyPrefix ? applyTemplate(reply.bodyPrefix, sessionCtx) : ""; - const prefixedBodyBase = bodyPrefix - ? `${bodyPrefix}${sessionCtx.BodyStripped ?? sessionCtx.Body ?? ""}` - : (sessionCtx.BodyStripped ?? sessionCtx.Body); + const baseBody = sessionCtx.BodyStripped ?? sessionCtx.Body ?? ""; + const prefixedBodyBase = (() => { + let body = baseBody; + if (!sendSystemOnce || isFirstTurnInSession) { + body = bodyPrefix ? `${bodyPrefix}${body}` : body; + } + if (sessionIntro) { + body = `${sessionIntro}\n\n${body}`; + } + return body; + })(); + if ( + sessionCfg && + sendSystemOnce && + isFirstTurnInSession && + sessionStore && + sessionKey + ) { + sessionStore[sessionKey] = { + ...(sessionStore[sessionKey] ?? {}), + sessionId: sessionId ?? crypto.randomUUID(), + updatedAt: Date.now(), + systemSent: true, + }; + await saveSessionStore(storePath, sessionStore); + systemSent = true; + } + const prefixedBody = transcribedText && reply?.mode === "command" ? [prefixedBodyBase, `Transcript:\n${transcribedText}`] @@ -241,9 +277,10 @@ export async function getReplyFromConfig( if (reply.mode === "command" && reply.command?.length) { await onReplyStart(); let argv = reply.command.map((part) => applyTemplate(part, templatingCtx)); - const templatePrefix = reply.template - ? applyTemplate(reply.template, templatingCtx) - : ""; + const templatePrefix = + reply.template && (!sendSystemOnce || isFirstTurnInSession || !systemSent) + ? applyTemplate(reply.template, templatingCtx) + : ""; if (templatePrefix && argv.length > 0) { argv = [argv[0], templatePrefix, ...argv.slice(1)]; } diff --git a/src/config/config.ts b/src/config/config.ts index 8e19d63f4..3f0fa14e5 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -17,6 +17,8 @@ export type SessionConfig = { sessionArgNew?: string[]; sessionArgResume?: string[]; sessionArgBeforeBody?: boolean; + sendSystemOnce?: boolean; + sessionIntro?: string; }; export type LoggingConfig = { @@ -73,6 +75,8 @@ const ReplySchema = z sessionArgNew: z.array(z.string()).optional(), sessionArgResume: z.array(z.string()).optional(), sessionArgBeforeBody: z.boolean().optional(), + sendSystemOnce: z.boolean().optional(), + sessionIntro: z.string().optional(), }) .optional(), claudeOutputFormat: z diff --git a/src/config/sessions.ts b/src/config/sessions.ts index 261d34fc6..60265d95f 100644 --- a/src/config/sessions.ts +++ b/src/config/sessions.ts @@ -8,7 +8,11 @@ import { CONFIG_DIR, normalizeE164 } from "../utils.js"; export type SessionScope = "per-sender" | "global"; -export type SessionEntry = { sessionId: string; updatedAt: number }; +export type SessionEntry = { + sessionId: string; + updatedAt: number; + systemSent?: boolean; +}; export const SESSION_STORE_DEFAULT = path.join(CONFIG_DIR, "sessions.json"); export const DEFAULT_RESET_TRIGGER = "/new"; diff --git a/src/index.core.test.ts b/src/index.core.test.ts index 2d6a68772..2f9acfc59 100644 --- a/src/index.core.test.ts +++ b/src/index.core.test.ts @@ -1,5 +1,6 @@ import crypto from "node:crypto"; import net from "node:net"; +import fs from "node:fs"; import os from "node:os"; import path from "node:path"; import type { MessageInstance } from "twilio/lib/rest/api/v2010/account/message.js"; @@ -499,6 +500,64 @@ describe("config and templating", () => { expect(argvSecond[2]).toBe("--resume"); }); + it("only sends system prompt once per session when configured", async () => { + const tmpStore = path.join(os.tmpdir(), `warelay-store-${Date.now()}.json`); + vi.spyOn(crypto, "randomUUID").mockReturnValue("sid-1"); + const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({ + stdout: "ok\n", + stderr: "", + code: 0, + signal: null, + killed: false, + }); + const cfg = { + inbound: { + reply: { + mode: "command" as const, + command: ["echo", "{{Body}}"], + template: "[tmpl]", + bodyPrefix: "[pfx] ", + session: { + sendSystemOnce: true, + sessionIntro: "SYS", + store: tmpStore, + sessionArgNew: ["--sid", "{{SessionId}}"], + sessionArgResume: ["--resume", "{{SessionId}}"], + }, + }, + }, + }; + + await index.getReplyFromConfig( + { Body: "/new hi", From: "+1", To: "+2" }, + undefined, + cfg, + runSpy, + ); + await index.getReplyFromConfig( + { Body: "next", From: "+1", To: "+2" }, + undefined, + cfg, + runSpy, + ); + + const firstArgv = runSpy.mock.calls[0][0]; + expect(firstArgv).toEqual([ + "echo", + "[tmpl]", + "--sid", + "sid-1", + "SYS\n\n[pfx] hi", + ]); + + const secondArgv = runSpy.mock.calls[1][0]; + expect(secondArgv).toEqual(["echo", "--resume", "sid-1", "next"]); + + const persisted = JSON.parse(fs.readFileSync(tmpStore, "utf-8")); + const firstEntry = Object.values(persisted)[0] as { systemSent?: boolean }; + expect(firstEntry.systemSent).toBe(true); + }); + it("injects Claude output format + print flag when configured", async () => { const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({ stdout: "ok",