diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fb41e6da..4b1bada54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ - Config: remove `routing.groupChat.requireMention` + `telegram.requireMention`; use `whatsapp.groups`, `imessage.groups`, and `telegram.groups` defaults instead (run `clawdis doctor` to migrate). ### Features +- Discord/Telegram: add reply tags (`[[reply_to_current]]`, `[[reply_to:]]`) with per-provider `replyToMode` (off|first|all) for native threaded replies. - Talk mode: continuous speech conversations (macOS/iOS/Android) with ElevenLabs TTS, reply directives, and optional interrupt-on-speech. - UI: add optional `ui.seamColor` accent to tint the Talk Mode side bubble (macOS/iOS/Android). - Nix mode: opt-in declarative config + read-only settings UI when `CLAWDIS_NIX_MODE=1` (thanks @joshp123 for the persistence — earned my trust; I'll merge these going forward). diff --git a/docs/configuration.md b/docs/configuration.md index be6c2cf1c..9e08617eb 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -169,6 +169,7 @@ Set `telegram.enabled: false` to disable automatic startup. telegram: { enabled: true, botToken: "your-bot-token", + replyToMode: "off", groups: { "*": { requireMention: true }, "123456789": { requireMention: false } // group chat id @@ -183,6 +184,7 @@ Set `telegram.enabled: false` to disable automatic startup. } ``` Mention gating precedence (most specific wins): `telegram.groups..requireMention` → `telegram.groups."*".requireMention` → default `true`. +Reply threading is controlled via `telegram.replyToMode` (`off` | `first` | `all`) and reply tags in the model output. ### `discord` (bot transport) @@ -195,6 +197,7 @@ Configure the Discord bot by setting the bot token and optional gating: token: "your-bot-token", mediaMaxMb: 8, // clamp inbound media size enableReactions: true, // allow agent-triggered reactions + replyToMode: "off", // off | first | all slashCommand: { // user-installed app slash commands enabled: true, name: "clawd", @@ -225,6 +228,7 @@ Configure the Discord bot by setting the bot token and optional gating: ``` Clawdis starts Discord only when a `discord` config section exists. The token is resolved from `DISCORD_BOT_TOKEN` or `discord.token` (unless `discord.enabled` is `false`). Use `user:` (DM) or `channel:` (guild channel) when specifying delivery targets for cron/CLI commands. +Reply threading is controlled via `discord.replyToMode` (`off` | `first` | `all`) and reply tags in the model output. Guild slugs are lowercase with spaces replaced by `-`; channel keys use the slugged channel name (no leading `#`). Prefer guild ids as keys to avoid rename ambiguity. Use `discord.guilds."*"` for default per-guild settings. diff --git a/docs/discord.md b/docs/discord.md index 9c6a4b139..2d75bb584 100644 --- a/docs/discord.md +++ b/docs/discord.md @@ -40,6 +40,7 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea - File uploads supported up to the configured `discord.mediaMaxMb` (default 8 MB). - Mention-gated guild replies by default to avoid noisy bots. - Reply context is injected when a message references another message (quoted content + ids). +- Native reply threading is **off by default**; enable with `discord.replyToMode` and reply tags. ## Config @@ -50,6 +51,7 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea token: "abc.123", mediaMaxMb: 8, enableReactions: true, + replyToMode: "off", slashCommand: { enabled: true, name: "clawd", @@ -92,6 +94,18 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea - `mediaMaxMb`: clamp inbound media saved to disk. - `historyLimit`: number of recent guild messages to include as context when replying to a mention (default 20, `0` disables). - `enableReactions`: allow agent-triggered reactions via the `clawdis_discord` tool (default `true`). +- `replyToMode`: `off` (default), `first`, or `all`. Applies only when the model includes a reply tag. + +## Reply tags +To request a threaded reply, the model can include one tag in its output: +- `[[reply_to_current]]` — reply to the triggering Discord message. +- `[[reply_to:]]` — reply to a specific message id from context/history. +Current message ids are appended to prompts as `[message_id: …]`; history entries already include ids. + +Behavior is controlled by `discord.replyToMode`: +- `off`: ignore tags. +- `first`: only the first outbound chunk/attachment is a reply. +- `all`: every outbound chunk/attachment is a reply. Allowlist matching notes: - `allowFrom`/`users`/`groupChannels` accept ids, names, tags, or mentions like `<@id>`. diff --git a/docs/setup.md b/docs/setup.md index 614d197b6..45164f24d 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -113,5 +113,6 @@ pnpm clawdis health - `docs/gateway.md` (Gateway runbook; flags, supervision, ports) - `docs/configuration.md` (config schema + examples) +- `docs/discord.md` and `docs/telegram.md` (reply tags + replyToMode settings) - `docs/clawd.md` (personal assistant setup) - `docs/clawdis-mac.md` (macOS app behavior; gateway lifecycle + “Attach only”) diff --git a/docs/telegram.md b/docs/telegram.md index fd6c3f018..035e1fb76 100644 --- a/docs/telegram.md +++ b/docs/telegram.md @@ -31,13 +31,13 @@ Status: ready for bot-mode use with grammY (long-polling by default; webhook sup - Sees only messages sent after it’s added to a chat; no pre-history access. - Cannot DM users first; they must initiate. Channels are receive-only unless the bot is an admin poster. - File size caps follow Telegram Bot API (up to 2 GB for documents; smaller for some media types). -- Typing indicators (`sendChatAction`) supported; outbound replies are sent as native replies to the triggering message (threaded where Telegram allows). +- Typing indicators (`sendChatAction`) supported; native replies are **off by default** and enabled via `telegram.replyToMode` + reply tags. ## Planned implementation details - Library: grammY is the only client for send + gateway (fetch fallback removed); grammY throttler is enabled by default to stay under Bot API limits. -- Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, `Timestamp`, and reply-to metadata (`ReplyToId`, `ReplyToBody`, `ReplyToSender`) when the user replies; reply context is appended to `Body` as a `[Replying to ...]` block; groups require @bot mention by default (override per chat in config). +- Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, `Timestamp`, and reply-to metadata (`ReplyToId`, `ReplyToBody`, `ReplyToSender`) when the user replies; reply context is appended to `Body` as a `[Replying to ...]` block (includes `id:` when available); groups require @bot mention by default (override per chat in config). - Outbound: text and media (photo/video/audio/document) with optional caption; chunked to limits. Typing cue sent best-effort. -- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups`, `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported. +- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups`, `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.replyToMode`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported. - Mention gating precedence (most specific wins): `telegram.groups..requireMention` → `telegram.groups."*".requireMention` → default `true`. Example config: @@ -46,6 +46,7 @@ Example config: telegram: { enabled: true, botToken: "123:abc", + replyToMode: "off", groups: { "*": { requireMention: true }, "123456789": { requireMention: false } // group chat id @@ -66,6 +67,17 @@ Example config: - Make the bot an admin if you need it to send in restricted groups or channels. - Mention the bot (`@yourbot`) or use commands to trigger; per-group overrides live in `telegram.groups` if you want always-on behavior. +## Reply tags +To request a threaded reply, the model can include one tag in its output: +- `[[reply_to_current]]` — reply to the triggering Telegram message. +- `[[reply_to:]]` — reply to a specific message id from context. +Current message ids are appended to prompts as `[message_id: …]`; reply context includes `id:` when available. + +Behavior is controlled by `telegram.replyToMode`: +- `off`: ignore tags. +- `first`: only the first outbound chunk/attachment is a reply. +- `all`: every outbound chunk/attachment is a reply. + ## Roadmap - ✅ Design and defaults (this doc) - ✅ grammY long-poll gateway + text/media send diff --git a/docs/whatsapp.md b/docs/whatsapp.md index 4f04fdf15..c4c09ab2f 100644 --- a/docs/whatsapp.md +++ b/docs/whatsapp.md @@ -54,7 +54,7 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number - `Body` is the current message body with envelope. - Quoted reply context is **always appended**: ``` - [Replying to +1555] + [Replying to +1555 id:ABC123] > [/Replying] ``` @@ -81,8 +81,8 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number - Group metadata cached 5 min (subject + participants). ## Reply delivery (threading) -- Outbound replies are sent as **native replies** (quoted message). -- Model does not need IDs for threading; gateway attaches quote. +- WhatsApp Web sends standard messages (no quoted reply threading in the current gateway). +- Reply tags are ignored on this surface. ## Outbound send (text + media) - Uses active web listener; error if gateway not running. diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 2af132927..1e880125b 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -79,6 +79,7 @@ export type EmbeddedPiRunResult = { text?: string; mediaUrl?: string; mediaUrls?: string[]; + replyToId?: string; }>; meta: EmbeddedPiRunMeta; }; diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts index 796cb763b..facddb19f 100644 --- a/src/agents/system-prompt.ts +++ b/src/agents/system-prompt.ts @@ -82,6 +82,12 @@ export function buildAgentSystemPromptAppend(params: { "Never send streaming/partial replies to external messaging surfaces; only final replies should be delivered there.", "Clawdis handles message transport automatically; respond normally and your reply will be delivered to the current chat.", "", + "## Reply Tags", + "To request a native reply/quote on supported surfaces, include one tag in your reply:", + "- [[reply_to_current]] replies to the triggering message.", + "- [[reply_to:]] replies to a specific message id when you have it.", + "Tags are stripped before sending; support depends on the current provider config.", + "", ]; if (extraSystemPrompt) { diff --git a/src/auto-reply/reply.directive.test.ts b/src/auto-reply/reply.directive.test.ts index d5c339530..709a2fa8c 100644 --- a/src/auto-reply/reply.directive.test.ts +++ b/src/auto-reply/reply.directive.test.ts @@ -14,6 +14,7 @@ import { import { drainSystemEvents } from "../infra/system-events.js"; import { extractQueueDirective, + extractReplyToTag, extractThinkDirective, extractVerboseDirective, getReplyFromConfig, @@ -96,6 +97,90 @@ describe("directive parsing", () => { expect(res.cleaned).toBe("please now"); }); + it("extracts reply_to_current tag", () => { + const res = extractReplyToTag("ok [[reply_to_current]]", "msg-1"); + expect(res.replyToId).toBe("msg-1"); + expect(res.cleaned).toBe("ok"); + }); + + it("extracts reply_to id tag", () => { + const res = extractReplyToTag("see [[reply_to:12345]] now", "msg-1"); + expect(res.replyToId).toBe("12345"); + expect(res.cleaned).toBe("see now"); + }); + + it("strips reply tags and maps reply_to_current to MessageSid", async () => { + await withTempHome(async (home) => { + vi.mocked(runEmbeddedPiAgent).mockResolvedValue({ + payloads: [{ text: "hello [[reply_to_current]]" }], + meta: { + durationMs: 5, + agentMeta: { sessionId: "s", provider: "p", model: "m" }, + }, + }); + + const res = await getReplyFromConfig( + { + Body: "ping", + From: "+1004", + To: "+2000", + MessageSid: "msg-123", + }, + {}, + { + agent: { + model: "anthropic/claude-opus-4-5", + workspace: path.join(home, "clawd"), + }, + whatsapp: { allowFrom: ["*"] }, + session: { store: path.join(home, "sessions.json") }, + }, + ); + + const payload = Array.isArray(res) ? res[0] : res; + expect(payload?.text).toBe("hello"); + expect(payload?.replyToId).toBe("msg-123"); + }); + }); + + it("prefers explicit reply_to id over reply_to_current", async () => { + await withTempHome(async (home) => { + vi.mocked(runEmbeddedPiAgent).mockResolvedValue({ + payloads: [ + { + text: "hi [[reply_to_current]] [[reply_to:abc-456]]", + }, + ], + meta: { + durationMs: 5, + agentMeta: { sessionId: "s", provider: "p", model: "m" }, + }, + }); + + const res = await getReplyFromConfig( + { + Body: "ping", + From: "+1004", + To: "+2000", + MessageSid: "msg-123", + }, + {}, + { + agent: { + model: "anthropic/claude-opus-4-5", + workspace: path.join(home, "clawd"), + }, + whatsapp: { allowFrom: ["*"] }, + session: { store: path.join(home, "sessions.json") }, + }, + ); + + const payload = Array.isArray(res) ? res[0] : res; + expect(payload?.text).toBe("hi"); + expect(payload?.replyToId).toBe("abc-456"); + }); + }); + it("applies inline think and still runs agent content", async () => { await withTempHome(async (home) => { vi.mocked(runEmbeddedPiAgent).mockResolvedValue({ diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index 10cd9eda7..f6d8c5703 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -164,6 +164,39 @@ export function extractQueueDirective(body?: string): { }; } +export function extractReplyToTag( + text?: string, + currentMessageId?: string, +): { + cleaned: string; + replyToId?: string; + hasTag: boolean; +} { + if (!text) return { cleaned: "", hasTag: false }; + let cleaned = text; + let replyToId: string | undefined; + let hasTag = false; + + const currentMatch = cleaned.match(/\[\[reply_to_current\]\]/i); + if (currentMatch) { + cleaned = cleaned.replace(/\[\[reply_to_current\]\]/gi, " "); + hasTag = true; + if (currentMessageId?.trim()) { + replyToId = currentMessageId.trim(); + } + } + + const idMatch = cleaned.match(/\[\[reply_to:([^\]\n]+)\]\]/i); + if (idMatch?.[1]) { + cleaned = cleaned.replace(/\[\[reply_to:[^\]\n]+\]\]/gi, " "); + replyToId = idMatch[1].trim(); + hasTag = true; + } + + cleaned = cleaned.replace(/\s+/g, " ").trim(); + return { cleaned, replyToId, hasTag }; +} + function isAbortTrigger(text?: string): boolean { if (!text) return false; const normalized = text.trim().toLowerCase(); @@ -1123,6 +1156,12 @@ export async function getReplyFromConfig( ABORT_MEMORY.set(abortKey, false); } } + const messageIdHint = sessionCtx.MessageSid?.trim() + ? `[message_id: ${sessionCtx.MessageSid.trim()}]` + : ""; + if (messageIdHint) { + prefixedBodyBase = `${prefixedBodyBase}\n${messageIdHint}`; + } // Prepend queued system events (transitions only) and (for new main sessions) a provider snapshot. // Token efficiency: we filter out periodic/heartbeat noise and keep the lines compact. @@ -1399,9 +1438,28 @@ export async function getReplyFromConfig( return [{ ...payload, text: stripped.text }]; }); - if (sanitizedPayloads.length === 0) return undefined; + const replyTaggedPayloads: ReplyPayload[] = sanitizedPayloads + .map((payload) => { + const { cleaned, replyToId } = extractReplyToTag( + payload.text, + sessionCtx.MessageSid, + ); + return { + ...payload, + text: cleaned ? cleaned : undefined, + replyToId: replyToId ?? payload.replyToId, + }; + }) + .filter( + (payload) => + payload.text || + payload.mediaUrl || + (payload.mediaUrls && payload.mediaUrls.length > 0), + ); - const shouldSignalTyping = sanitizedPayloads.some((payload) => { + if (replyTaggedPayloads.length === 0) return undefined; + + const shouldSignalTyping = replyTaggedPayloads.some((payload) => { const trimmed = payload.text?.trim(); if (trimmed && trimmed !== SILENT_REPLY_TOKEN) return true; if (payload.mediaUrl) return true; @@ -1456,7 +1514,7 @@ export async function getReplyFromConfig( } // If verbose is enabled and this is a new session, prepend a session hint. - let finalPayloads = sanitizedPayloads; + let finalPayloads = replyTaggedPayloads; if (resolvedVerboseLevel === "on" && isNewSession) { finalPayloads = [ { text: `🧭 New session: ${sessionIdFinal}` }, diff --git a/src/auto-reply/types.ts b/src/auto-reply/types.ts index 27ddace0b..0bfe335fd 100644 --- a/src/auto-reply/types.ts +++ b/src/auto-reply/types.ts @@ -9,4 +9,5 @@ export type ReplyPayload = { text?: string; mediaUrl?: string; mediaUrls?: string[]; + replyToId?: string; }; diff --git a/src/config/config.ts b/src/config/config.ts index f5508d497..ab5189748 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -18,6 +18,7 @@ export const isNixMode = process.env.CLAWDIS_NIX_MODE === "1"; export type ReplyMode = "text" | "command"; export type SessionScope = "per-sender" | "global"; +export type ReplyToMode = "off" | "first" | "all"; export type SessionConfig = { scope?: SessionScope; @@ -166,6 +167,8 @@ export type TelegramConfig = { botToken?: string; /** Path to file containing bot token (for secret managers like agenix) */ tokenFile?: string; + /** Control reply threading when reply tags are present (off|first|all). */ + replyToMode?: ReplyToMode; groups?: Record< string, { @@ -222,6 +225,8 @@ export type DiscordConfig = { historyLimit?: number; /** Allow agent-triggered Discord reactions (default: true). */ enableReactions?: boolean; + /** Control reply threading when reply tags are present (off|first|all). */ + replyToMode?: ReplyToMode; slashCommand?: DiscordSlashCommandConfig; dm?: DiscordDmConfig; /** New per-guild config keyed by guild id or slug. */ @@ -650,6 +655,11 @@ const GroupChatSchema = z .optional(); const QueueModeSchema = z.union([z.literal("queue"), z.literal("interrupt")]); +const ReplyToModeSchema = z.union([ + z.literal("off"), + z.literal("first"), + z.literal("all"), +]); const QueueModeBySurfaceSchema = z .object({ @@ -962,6 +972,7 @@ const ClawdisSchema = z.object({ enabled: z.boolean().optional(), botToken: z.string().optional(), tokenFile: z.string().optional(), + replyToMode: ReplyToModeSchema.optional(), groups: z .record( z.string(), @@ -995,6 +1006,7 @@ const ClawdisSchema = z.object({ mediaMaxMb: z.number().positive().optional(), historyLimit: z.number().int().min(0).optional(), enableReactions: z.boolean().optional(), + replyToMode: ReplyToModeSchema.optional(), dm: z .object({ enabled: z.boolean().optional(), diff --git a/src/discord/monitor.ts b/src/discord/monitor.ts index c34c338ec..817d5a4ca 100644 --- a/src/discord/monitor.ts +++ b/src/discord/monitor.ts @@ -14,7 +14,7 @@ import { formatAgentEnvelope } from "../auto-reply/envelope.js"; import { getReplyFromConfig } from "../auto-reply/reply.js"; import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js"; import type { ReplyPayload } from "../auto-reply/types.js"; -import type { DiscordSlashCommandConfig } from "../config/config.js"; +import type { DiscordSlashCommandConfig, ReplyToMode } from "../config/config.js"; import { loadConfig } from "../config/config.js"; import { resolveStorePath, updateLastRoute } from "../config/sessions.js"; import { danger, isVerbose, logVerbose, warn } from "../globals.js"; @@ -32,6 +32,7 @@ export type MonitorDiscordOpts = { slashCommand?: DiscordSlashCommandConfig; mediaMaxMb?: number; historyLimit?: number; + replyToMode?: ReplyToMode; }; type DiscordMediaInfo = { @@ -117,6 +118,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { 0, opts.historyLimit ?? cfg.discord?.historyLimit ?? 20, ); + const replyToMode = opts.replyToMode ?? cfg.discord?.replyToMode ?? "off"; const dmEnabled = dmConfig?.enabled ?? true; const groupDmEnabled = dmConfig?.groupEnabled ?? false; const groupDmChannels = dmConfig?.groupChannels; @@ -417,6 +419,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { target: ctxPayload.To, token, runtime, + replyToMode, }); if (isVerbose()) { logVerbose( @@ -984,20 +987,34 @@ async function deliverReplies({ target, token, runtime, + replyToMode, }: { replies: ReplyPayload[]; target: string; token: string; runtime: RuntimeEnv; + replyToMode: ReplyToMode; }) { + let hasReplied = false; for (const payload of replies) { const mediaList = payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []); const text = payload.text ?? ""; + const replyToId = + replyToMode === "off" ? undefined : payload.replyToId?.trim(); if (!text && mediaList.length === 0) continue; if (mediaList.length === 0) { for (const chunk of chunkText(text, 2000)) { - await sendMessageDiscord(target, chunk, { token }); + await sendMessageDiscord(target, chunk, { + token, + replyTo: + replyToId && (replyToMode === "all" || !hasReplied) + ? replyToId + : undefined, + }); + if (replyToId && !hasReplied) { + hasReplied = true; + } } } else { let first = true; @@ -1007,7 +1024,14 @@ async function deliverReplies({ await sendMessageDiscord(target, caption, { token, mediaUrl, + replyTo: + replyToId && (replyToMode === "all" || !hasReplied) + ? replyToId + : undefined, }); + if (replyToId && !hasReplied) { + hasReplied = true; + } } } runtime.log?.(`delivered reply to ${target}`); diff --git a/src/discord/send.test.ts b/src/discord/send.test.ts index 7de4dd0c4..9e469c198 100644 --- a/src/discord/send.test.ts +++ b/src/discord/send.test.ts @@ -82,4 +82,37 @@ describe("sendMessageDiscord", () => { }), ); }); + + it("includes message_reference when replying", async () => { + const { rest, postMock } = makeRest(); + postMock.mockResolvedValue({ id: "msg1", channel_id: "789" }); + await sendMessageDiscord("channel:789", "hello", { + rest, + token: "t", + replyTo: "orig-123", + }); + const body = postMock.mock.calls[0]?.[1]?.body; + expect(body?.message_reference).toEqual({ + message_id: "orig-123", + fail_if_not_exists: false, + }); + }); + + it("replies only on the first chunk", async () => { + const { rest, postMock } = makeRest(); + postMock.mockResolvedValue({ id: "msg1", channel_id: "789" }); + await sendMessageDiscord("channel:789", "a".repeat(2001), { + rest, + token: "t", + replyTo: "orig-123", + }); + expect(postMock).toHaveBeenCalledTimes(2); + const firstBody = postMock.mock.calls[0]?.[1]?.body; + const secondBody = postMock.mock.calls[1]?.[1]?.body; + expect(firstBody?.message_reference).toEqual({ + message_id: "orig-123", + fail_if_not_exists: false, + }); + expect(secondBody?.message_reference).toBeUndefined(); + }); }); diff --git a/src/discord/send.ts b/src/discord/send.ts index 1b92f5be9..9763dfc94 100644 --- a/src/discord/send.ts +++ b/src/discord/send.ts @@ -22,6 +22,7 @@ type DiscordSendOpts = { mediaUrl?: string; verbose?: boolean; rest?: REST; + replyTo?: string; }; export type DiscordSendResult = { @@ -105,22 +106,35 @@ async function resolveChannelId( return { channelId: dmChannel.id, dm: true }; } -async function sendDiscordText(rest: REST, channelId: string, text: string) { +async function sendDiscordText( + rest: REST, + channelId: string, + text: string, + replyTo?: string, +) { if (!text.trim()) { throw new Error("Message must be non-empty for Discord sends"); } + const messageReference = replyTo + ? { message_id: replyTo, fail_if_not_exists: false } + : undefined; if (text.length <= DISCORD_TEXT_LIMIT) { const res = (await rest.post(Routes.channelMessages(channelId), { - body: { content: text }, + body: { content: text, message_reference: messageReference }, })) as { id: string; channel_id: string }; return res; } const chunks = chunkText(text, DISCORD_TEXT_LIMIT); let last: { id: string; channel_id: string } | null = null; + let isFirst = true; for (const chunk of chunks) { last = (await rest.post(Routes.channelMessages(channelId), { - body: { content: chunk }, + body: { + content: chunk, + message_reference: isFirst ? messageReference : undefined, + }, })) as { id: string; channel_id: string }; + isFirst = false; } if (!last) { throw new Error("Discord send failed (empty chunk result)"); @@ -133,13 +147,18 @@ async function sendDiscordMedia( channelId: string, text: string, mediaUrl: string, + replyTo?: string, ) { const media = await loadWebMedia(mediaUrl); const caption = text.length > DISCORD_TEXT_LIMIT ? text.slice(0, DISCORD_TEXT_LIMIT) : text; + const messageReference = replyTo + ? { message_id: replyTo, fail_if_not_exists: false } + : undefined; const res = (await rest.post(Routes.channelMessages(channelId), { body: { content: caption || undefined, + message_reference: messageReference, }, files: [ { @@ -171,9 +190,15 @@ export async function sendMessageDiscord( | { id: string | null; channel_id: string }; if (opts.mediaUrl) { - result = await sendDiscordMedia(rest, channelId, text, opts.mediaUrl); + result = await sendDiscordMedia( + rest, + channelId, + text, + opts.mediaUrl, + opts.replyTo, + ); } else { - result = await sendDiscordText(rest, channelId, text); + result = await sendDiscordText(rest, channelId, text, opts.replyTo); } return { diff --git a/src/telegram/bot.test.ts b/src/telegram/bot.test.ts index a6cade3e4..383fd9386 100644 --- a/src/telegram/bot.test.ts +++ b/src/telegram/bot.test.ts @@ -152,7 +152,7 @@ describe("createTelegramBot", () => { expect(replySpy).toHaveBeenCalledTimes(1); const payload = replySpy.mock.calls[0][0]; - expect(payload.Body).toContain("[Replying to Ada]"); + expect(payload.Body).toContain("[Replying to Ada id:9001]"); expect(payload.Body).toContain("Can you summarize this?"); expect(payload.ReplyToId).toBe("9001"); expect(payload.ReplyToBody).toBe("Can you summarize this?"); diff --git a/src/telegram/bot.ts b/src/telegram/bot.ts index 341401c14..d5f3eb335 100644 --- a/src/telegram/bot.ts +++ b/src/telegram/bot.ts @@ -9,6 +9,7 @@ import { chunkText } from "../auto-reply/chunk.js"; import { formatAgentEnvelope } from "../auto-reply/envelope.js"; import { getReplyFromConfig } from "../auto-reply/reply.js"; import type { ReplyPayload } from "../auto-reply/types.js"; +import type { ReplyToMode } from "../config/config.js"; import { loadConfig } from "../config/config.js"; import { resolveStorePath, updateLastRoute } from "../config/sessions.js"; import { danger, isVerbose, logVerbose } from "../globals.js"; @@ -39,6 +40,7 @@ export type TelegramBotOptions = { requireMention?: boolean; allowFrom?: Array; mediaMaxMb?: number; + replyToMode?: ReplyToMode; proxyFetch?: typeof fetch; }; @@ -59,6 +61,7 @@ export function createTelegramBot(opts: TelegramBotOptions) { const cfg = loadConfig(); const allowFrom = opts.allowFrom ?? cfg.telegram?.allowFrom; + const replyToMode = opts.replyToMode ?? cfg.telegram?.replyToMode ?? "off"; const mediaMaxBytes = (opts.mediaMaxMb ?? cfg.telegram?.mediaMaxMb ?? 5) * 1024 * 1024; const logger = getChildLogger({ module: "telegram-auto-reply" }); @@ -137,7 +140,9 @@ export function createTelegramBot(opts: TelegramBotOptions) { ).trim(); if (!rawBody) return; const replySuffix = replyTarget - ? `\n\n[Replying to ${replyTarget.sender}]\n${replyTarget.body}\n[/Replying]` + ? `\n\n[Replying to ${replyTarget.sender}${ + replyTarget.id ? ` id:${replyTarget.id}` : "" + }]\n${replyTarget.body}\n[/Replying]` : ""; const body = formatAgentEnvelope({ surface: "Telegram", @@ -211,6 +216,7 @@ export function createTelegramBot(opts: TelegramBotOptions) { token: opts.token, runtime, bot, + replyToMode, }); } catch (err) { runtime.error?.(danger(`handler failed: ${String(err)}`)); @@ -233,13 +239,17 @@ async function deliverReplies(params: { token: string; runtime: RuntimeEnv; bot: Bot; + replyToMode: ReplyToMode; }) { - const { replies, chatId, runtime, bot } = params; + const { replies, chatId, runtime, bot, replyToMode } = params; + let hasReplied = false; for (const reply of replies) { if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) { runtime.error?.(danger("reply missing text/media")); continue; } + const replyToId = + replyToMode === "off" ? undefined : resolveTelegramReplyId(reply.replyToId); const mediaList = reply.mediaUrls?.length ? reply.mediaUrls : reply.mediaUrl @@ -247,7 +257,15 @@ async function deliverReplies(params: { : []; if (mediaList.length === 0) { for (const chunk of chunkText(reply.text || "", 4000)) { - await sendTelegramText(bot, chatId, chunk, runtime); + await sendTelegramText(bot, chatId, chunk, runtime, { + replyToMessageId: + replyToId && (replyToMode === "all" || !hasReplied) + ? replyToId + : undefined, + }); + if (replyToId && !hasReplied) { + hasReplied = true; + } } continue; } @@ -259,14 +277,33 @@ async function deliverReplies(params: { const file = new InputFile(media.buffer, media.fileName ?? "file"); const caption = first ? (reply.text ?? undefined) : undefined; first = false; + const replyToMessageId = + replyToId && (replyToMode === "all" || !hasReplied) + ? replyToId + : undefined; if (kind === "image") { - await bot.api.sendPhoto(chatId, file, { caption }); + await bot.api.sendPhoto(chatId, file, { + caption, + reply_to_message_id: replyToMessageId, + }); } else if (kind === "video") { - await bot.api.sendVideo(chatId, file, { caption }); + await bot.api.sendVideo(chatId, file, { + caption, + reply_to_message_id: replyToMessageId, + }); } else if (kind === "audio") { - await bot.api.sendAudio(chatId, file, { caption }); + await bot.api.sendAudio(chatId, file, { + caption, + reply_to_message_id: replyToMessageId, + }); } else { - await bot.api.sendDocument(chatId, file, { caption }); + await bot.api.sendDocument(chatId, file, { + caption, + reply_to_message_id: replyToMessageId, + }); + } + if (replyToId && !hasReplied) { + hasReplied = true; } } } @@ -315,6 +352,13 @@ function hasBotMention(msg: TelegramMessage, botUsername: string) { return false; } +function resolveTelegramReplyId(raw?: string): number | undefined { + if (!raw) return undefined; + const parsed = Number(raw); + if (!Number.isFinite(parsed)) return undefined; + return parsed; +} + async function resolveMedia( ctx: TelegramContext, maxBytes: number, @@ -363,10 +407,12 @@ async function sendTelegramText( chatId: string, text: string, runtime: RuntimeEnv, + opts?: { replyToMessageId?: number }, ): Promise { try { const res = await bot.api.sendMessage(chatId, text, { parse_mode: "Markdown", + reply_to_message_id: opts?.replyToMessageId, }); return res.message_id; } catch (err) { @@ -375,7 +421,9 @@ async function sendTelegramText( runtime.log?.( `telegram markdown parse failed; retrying without formatting: ${errText}`, ); - const res = await bot.api.sendMessage(chatId, text, {}); + const res = await bot.api.sendMessage(chatId, text, { + reply_to_message_id: opts?.replyToMessageId, + }); return res.message_id; } throw err; diff --git a/src/web/auto-reply.test.ts b/src/web/auto-reply.test.ts index bb61f81c3..7731da36a 100644 --- a/src/web/auto-reply.test.ts +++ b/src/web/auto-reply.test.ts @@ -1417,7 +1417,7 @@ describe("web auto-reply", () => { expect(callArg.ReplyToId).toBe("q1"); expect(callArg.ReplyToBody).toBe("original"); expect(callArg.ReplyToSender).toBe("+1999"); - expect(callArg.Body).toContain("[Replying to +1999]"); + expect(callArg.Body).toContain("[Replying to +1999 id:q1]"); expect(callArg.Body).toContain("original"); }); diff --git a/src/web/auto-reply.ts b/src/web/auto-reply.ts index 37f6e2df2..236154621 100644 --- a/src/web/auto-reply.ts +++ b/src/web/auto-reply.ts @@ -921,7 +921,8 @@ export async function monitorWebProvider( const formatReplyContext = (msg: WebInboundMsg) => { if (!msg.replyToBody) return null; const sender = msg.replyToSender ?? "unknown sender"; - return `[Replying to ${sender}]\n${msg.replyToBody}\n[/Replying]`; + const idPart = msg.replyToId ? ` id:${msg.replyToId}` : ""; + return `[Replying to ${sender}${idPart}]\n${msg.replyToBody}\n[/Replying]`; }; const buildLine = (msg: WebInboundMsg) => {