feat: add reply tags and replyToMode

2026-01-02 23:18:41 +01:00
parent a9ff03acaf
commit 2c92ccd66e
19 changed files with 353 additions and 27 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@
 - Config: remove `routing.groupChat.requireMention` + `telegram.requireMention`; use `whatsapp.groups`, `imessage.groups`, and `telegram.groups` defaults instead (run `clawdis doctor` to migrate).

 ### Features
+- Discord/Telegram: add reply tags (`[[reply_to_current]]`, `[[reply_to:<id>]]`) with per-provider `replyToMode` (off|first|all) for native threaded replies.
 - Talk mode: continuous speech conversations (macOS/iOS/Android) with ElevenLabs TTS, reply directives, and optional interrupt-on-speech.
 - UI: add optional `ui.seamColor` accent to tint the Talk Mode side bubble (macOS/iOS/Android).
 - Nix mode: opt-in declarative config + read-only settings UI when `CLAWDIS_NIX_MODE=1` (thanks @joshp123 for the persistence — earned my trust; I'll merge these going forward).
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -169,6 +169,7 @@ Set `telegram.enabled: false` to disable automatic startup.
  telegram: {
    enabled: true,
    botToken: "your-bot-token",
+    replyToMode: "off",
    groups: {
      "*": { requireMention: true },
      "123456789": { requireMention: false } // group chat id
@@ -183,6 +184,7 @@ Set `telegram.enabled: false` to disable automatic startup.
 }
 ```
 Mention gating precedence (most specific wins): `telegram.groups.<chatId>.requireMention` → `telegram.groups."*".requireMention` → default `true`.
+Reply threading is controlled via `telegram.replyToMode` (`off` | `first` | `all`) and reply tags in the model output.

 ### `discord` (bot transport)

@@ -195,6 +197,7 @@ Configure the Discord bot by setting the bot token and optional gating:
    token: "your-bot-token",
    mediaMaxMb: 8,                          // clamp inbound media size
    enableReactions: true,                  // allow agent-triggered reactions
+    replyToMode: "off",                     // off | first | all
    slashCommand: {                         // user-installed app slash commands
      enabled: true,
      name: "clawd",
@@ -225,6 +228,7 @@ Configure the Discord bot by setting the bot token and optional gating:
 ```

 Clawdis starts Discord only when a `discord` config section exists. The token is resolved from `DISCORD_BOT_TOKEN` or `discord.token` (unless `discord.enabled` is `false`). Use `user:<id>` (DM) or `channel:<id>` (guild channel) when specifying delivery targets for cron/CLI commands.
+Reply threading is controlled via `discord.replyToMode` (`off` | `first` | `all`) and reply tags in the model output.
 Guild slugs are lowercase with spaces replaced by `-`; channel keys use the slugged channel name (no leading `#`). Prefer guild ids as keys to avoid rename ambiguity.
 Use `discord.guilds."*"` for default per-guild settings.

--- a/docs/discord.md
+++ b/docs/discord.md
@@ -40,6 +40,7 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea
 - File uploads supported up to the configured `discord.mediaMaxMb` (default 8 MB).
 - Mention-gated guild replies by default to avoid noisy bots.
 - Reply context is injected when a message references another message (quoted content + ids).
+- Native reply threading is **off by default**; enable with `discord.replyToMode` and reply tags.

 ## Config

@@ -50,6 +51,7 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea
    token: "abc.123",
    mediaMaxMb: 8,
    enableReactions: true,
+    replyToMode: "off",
    slashCommand: {
      enabled: true,
      name: "clawd",
@@ -92,6 +94,18 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea
 - `mediaMaxMb`: clamp inbound media saved to disk.
 - `historyLimit`: number of recent guild messages to include as context when replying to a mention (default 20, `0` disables).
 - `enableReactions`: allow agent-triggered reactions via the `clawdis_discord` tool (default `true`).
+- `replyToMode`: `off` (default), `first`, or `all`. Applies only when the model includes a reply tag.
+
+## Reply tags
+To request a threaded reply, the model can include one tag in its output:
+- `[[reply_to_current]]` — reply to the triggering Discord message.
+- `[[reply_to:<id>]]` — reply to a specific message id from context/history.
+Current message ids are appended to prompts as `[message_id: …]`; history entries already include ids.
+
+Behavior is controlled by `discord.replyToMode`:
+- `off`: ignore tags.
+- `first`: only the first outbound chunk/attachment is a reply.
+- `all`: every outbound chunk/attachment is a reply.

 Allowlist matching notes:
 - `allowFrom`/`users`/`groupChannels` accept ids, names, tags, or mentions like `<@id>`.
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -113,5 +113,6 @@ pnpm clawdis health

 - `docs/gateway.md` (Gateway runbook; flags, supervision, ports)
 - `docs/configuration.md` (config schema + examples)
+- `docs/discord.md` and `docs/telegram.md` (reply tags + replyToMode settings)
 - `docs/clawd.md` (personal assistant setup)
 - `docs/clawdis-mac.md` (macOS app behavior; gateway lifecycle + “Attach only”)
--- a/docs/telegram.md
+++ b/docs/telegram.md
@@ -31,13 +31,13 @@ Status: ready for bot-mode use with grammY (long-polling by default; webhook sup
 - Sees only messages sent after it’s added to a chat; no pre-history access.
 - Cannot DM users first; they must initiate. Channels are receive-only unless the bot is an admin poster.
 - File size caps follow Telegram Bot API (up to 2 GB for documents; smaller for some media types).
- Typing indicators (`sendChatAction`) supported; outbound replies are sent as native replies to the triggering message (threaded where Telegram allows).
+- Typing indicators (`sendChatAction`) supported; native replies are **off by default** and enabled via `telegram.replyToMode` + reply tags.

 ## Planned implementation details
 - Library: grammY is the only client for send + gateway (fetch fallback removed); grammY throttler is enabled by default to stay under Bot API limits.
- Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, `Timestamp`, and reply-to metadata (`ReplyToId`, `ReplyToBody`, `ReplyToSender`) when the user replies; reply context is appended to `Body` as a `[Replying to ...]` block; groups require @bot mention by default (override per chat in config).
+- Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, `Timestamp`, and reply-to metadata (`ReplyToId`, `ReplyToBody`, `ReplyToSender`) when the user replies; reply context is appended to `Body` as a `[Replying to ...]` block (includes `id:` when available); groups require @bot mention by default (override per chat in config).
 - Outbound: text and media (photo/video/audio/document) with optional caption; chunked to limits. Typing cue sent best-effort.
- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups`, `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported.
+- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups`, `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.replyToMode`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported.
  - Mention gating precedence (most specific wins): `telegram.groups.<chatId>.requireMention` → `telegram.groups."*".requireMention` → default `true`.

 Example config:
@@ -46,6 +46,7 @@ Example config:
  telegram: {
    enabled: true,
    botToken: "123:abc",
+    replyToMode: "off",
    groups: {
      "*": { requireMention: true },
      "123456789": { requireMention: false } // group chat id
@@ -66,6 +67,17 @@ Example config:
 - Make the bot an admin if you need it to send in restricted groups or channels.
 - Mention the bot (`@yourbot`) or use commands to trigger; per-group overrides live in `telegram.groups` if you want always-on behavior.

+## Reply tags
+To request a threaded reply, the model can include one tag in its output:
+- `[[reply_to_current]]` — reply to the triggering Telegram message.
+- `[[reply_to:<id>]]` — reply to a specific message id from context.
+Current message ids are appended to prompts as `[message_id: …]`; reply context includes `id:` when available.
+
+Behavior is controlled by `telegram.replyToMode`:
+- `off`: ignore tags.
+- `first`: only the first outbound chunk/attachment is a reply.
+- `all`: every outbound chunk/attachment is a reply.
+
 ## Roadmap
 - ✅ Design and defaults (this doc)
 - ✅ grammY long-poll gateway + text/media send
--- a/docs/whatsapp.md
+++ b/docs/whatsapp.md
@@ -54,7 +54,7 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number
 - `Body` is the current message body with envelope.
 - Quoted reply context is **always appended**:
  ```
-  [Replying to +1555]
+  [Replying to +1555 id:ABC123]
  <quoted text or <media:...>>
  [/Replying]
  ```
@@ -81,8 +81,8 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number
 - Group metadata cached 5 min (subject + participants).

 ## Reply delivery (threading)
- Outbound replies are sent as **native replies** (quoted message).
- Model does not need IDs for threading; gateway attaches quote.
+- WhatsApp Web sends standard messages (no quoted reply threading in the current gateway).
+- Reply tags are ignored on this surface.

 ## Outbound send (text + media)
 - Uses active web listener; error if gateway not running.
--- a/src/agents/pi-embedded-runner.ts
+++ b/src/agents/pi-embedded-runner.ts
@@ -79,6 +79,7 @@ export type EmbeddedPiRunResult = {
    text?: string;
    mediaUrl?: string;
    mediaUrls?: string[];
+    replyToId?: string;
  }>;
  meta: EmbeddedPiRunMeta;
 };
--- a/src/agents/system-prompt.ts
+++ b/src/agents/system-prompt.ts
@@ -82,6 +82,12 @@ export function buildAgentSystemPromptAppend(params: {
    "Never send streaming/partial replies to external messaging surfaces; only final replies should be delivered there.",
    "Clawdis handles message transport automatically; respond normally and your reply will be delivered to the current chat.",
    "",
+    "## Reply Tags",
+    "To request a native reply/quote on supported surfaces, include one tag in your reply:",
+    "- [[reply_to_current]] replies to the triggering message.",
+    "- [[reply_to:<id>]] replies to a specific message id when you have it.",
+    "Tags are stripped before sending; support depends on the current provider config.",
+    "",
  ];

  if (extraSystemPrompt) {
--- a/src/auto-reply/reply.directive.test.ts
+++ b/src/auto-reply/reply.directive.test.ts
@@ -14,6 +14,7 @@ import {
 import { drainSystemEvents } from "../infra/system-events.js";
 import {
  extractQueueDirective,
+  extractReplyToTag,
  extractThinkDirective,
  extractVerboseDirective,
  getReplyFromConfig,
@@ -96,6 +97,90 @@ describe("directive parsing", () => {
    expect(res.cleaned).toBe("please now");
  });

+  it("extracts reply_to_current tag", () => {
+    const res = extractReplyToTag("ok [[reply_to_current]]", "msg-1");
+    expect(res.replyToId).toBe("msg-1");
+    expect(res.cleaned).toBe("ok");
+  });
+
+  it("extracts reply_to id tag", () => {
+    const res = extractReplyToTag("see [[reply_to:12345]] now", "msg-1");
+    expect(res.replyToId).toBe("12345");
+    expect(res.cleaned).toBe("see now");
+  });
+
+  it("strips reply tags and maps reply_to_current to MessageSid", async () => {
+    await withTempHome(async (home) => {
+      vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
+        payloads: [{ text: "hello [[reply_to_current]]" }],
+        meta: {
+          durationMs: 5,
+          agentMeta: { sessionId: "s", provider: "p", model: "m" },
+        },
+      });
+
+      const res = await getReplyFromConfig(
+        {
+          Body: "ping",
+          From: "+1004",
+          To: "+2000",
+          MessageSid: "msg-123",
+        },
+        {},
+        {
+          agent: {
+            model: "anthropic/claude-opus-4-5",
+            workspace: path.join(home, "clawd"),
+          },
+          whatsapp: { allowFrom: ["*"] },
+          session: { store: path.join(home, "sessions.json") },
+        },
+      );
+
+      const payload = Array.isArray(res) ? res[0] : res;
+      expect(payload?.text).toBe("hello");
+      expect(payload?.replyToId).toBe("msg-123");
+    });
+  });
+
+  it("prefers explicit reply_to id over reply_to_current", async () => {
+    await withTempHome(async (home) => {
+      vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
+        payloads: [
+          {
+            text: "hi [[reply_to_current]] [[reply_to:abc-456]]",
+          },
+        ],
+        meta: {
+          durationMs: 5,
+          agentMeta: { sessionId: "s", provider: "p", model: "m" },
+        },
+      });
+
+      const res = await getReplyFromConfig(
+        {
+          Body: "ping",
+          From: "+1004",
+          To: "+2000",
+          MessageSid: "msg-123",
+        },
+        {},
+        {
+          agent: {
+            model: "anthropic/claude-opus-4-5",
+            workspace: path.join(home, "clawd"),
+          },
+          whatsapp: { allowFrom: ["*"] },
+          session: { store: path.join(home, "sessions.json") },
+        },
+      );
+
+      const payload = Array.isArray(res) ? res[0] : res;
+      expect(payload?.text).toBe("hi");
+      expect(payload?.replyToId).toBe("abc-456");
+    });
+  });
+
  it("applies inline think and still runs agent content", async () => {
    await withTempHome(async (home) => {
      vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
--- a/src/auto-reply/reply.ts
+++ b/src/auto-reply/reply.ts
@@ -164,6 +164,39 @@ export function extractQueueDirective(body?: string): {
  };
 }

+export function extractReplyToTag(
+  text?: string,
+  currentMessageId?: string,
+): {
+  cleaned: string;
+  replyToId?: string;
+  hasTag: boolean;
+} {
+  if (!text) return { cleaned: "", hasTag: false };
+  let cleaned = text;
+  let replyToId: string | undefined;
+  let hasTag = false;
+
+  const currentMatch = cleaned.match(/\[\[reply_to_current\]\]/i);
+  if (currentMatch) {
+    cleaned = cleaned.replace(/\[\[reply_to_current\]\]/gi, " ");
+    hasTag = true;
+    if (currentMessageId?.trim()) {
+      replyToId = currentMessageId.trim();
+    }
+  }
+
+  const idMatch = cleaned.match(/\[\[reply_to:([^\]\n]+)\]\]/i);
+  if (idMatch?.[1]) {
+    cleaned = cleaned.replace(/\[\[reply_to:[^\]\n]+\]\]/gi, " ");
+    replyToId = idMatch[1].trim();
+    hasTag = true;
+  }
+
+  cleaned = cleaned.replace(/\s+/g, " ").trim();
+  return { cleaned, replyToId, hasTag };
+}
+
 function isAbortTrigger(text?: string): boolean {
  if (!text) return false;
  const normalized = text.trim().toLowerCase();
@@ -1123,6 +1156,12 @@ export async function getReplyFromConfig(
      ABORT_MEMORY.set(abortKey, false);
    }
  }
+  const messageIdHint = sessionCtx.MessageSid?.trim()
+    ? `[message_id: ${sessionCtx.MessageSid.trim()}]`
+    : "";
+  if (messageIdHint) {
+    prefixedBodyBase = `${prefixedBodyBase}\n${messageIdHint}`;
+  }

  // Prepend queued system events (transitions only) and (for new main sessions) a provider snapshot.
  // Token efficiency: we filter out periodic/heartbeat noise and keep the lines compact.
@@ -1399,9 +1438,28 @@ export async function getReplyFromConfig(
          return [{ ...payload, text: stripped.text }];
        });

-    if (sanitizedPayloads.length === 0) return undefined;
+    const replyTaggedPayloads: ReplyPayload[] = sanitizedPayloads
+      .map((payload) => {
+        const { cleaned, replyToId } = extractReplyToTag(
+          payload.text,
+          sessionCtx.MessageSid,
+        );
+        return {
+          ...payload,
+          text: cleaned ? cleaned : undefined,
+          replyToId: replyToId ?? payload.replyToId,
+        };
+      })
+      .filter(
+        (payload) =>
+          payload.text ||
+          payload.mediaUrl ||
+          (payload.mediaUrls && payload.mediaUrls.length > 0),
+      );

-    const shouldSignalTyping = sanitizedPayloads.some((payload) => {
+    if (replyTaggedPayloads.length === 0) return undefined;
+
+    const shouldSignalTyping = replyTaggedPayloads.some((payload) => {
      const trimmed = payload.text?.trim();
      if (trimmed && trimmed !== SILENT_REPLY_TOKEN) return true;
      if (payload.mediaUrl) return true;
@@ -1456,7 +1514,7 @@ export async function getReplyFromConfig(
    }

    // If verbose is enabled and this is a new session, prepend a session hint.
-    let finalPayloads = sanitizedPayloads;
+    let finalPayloads = replyTaggedPayloads;
    if (resolvedVerboseLevel === "on" && isNewSession) {
      finalPayloads = [
        { text: `🧭 New session: ${sessionIdFinal}` },
--- a/src/auto-reply/types.ts
+++ b/src/auto-reply/types.ts
@@ -9,4 +9,5 @@ export type ReplyPayload = {
  text?: string;
  mediaUrl?: string;
  mediaUrls?: string[];
+  replyToId?: string;
 };
--- a/src/config/config.ts
+++ b/src/config/config.ts
@@ -18,6 +18,7 @@ export const isNixMode = process.env.CLAWDIS_NIX_MODE === "1";

 export type ReplyMode = "text" | "command";
 export type SessionScope = "per-sender" | "global";
+export type ReplyToMode = "off" | "first" | "all";

 export type SessionConfig = {
  scope?: SessionScope;
@@ -166,6 +167,8 @@ export type TelegramConfig = {
  botToken?: string;
  /** Path to file containing bot token (for secret managers like agenix) */
  tokenFile?: string;
+  /** Control reply threading when reply tags are present (off|first|all). */
+  replyToMode?: ReplyToMode;
  groups?: Record<
    string,
    {
@@ -222,6 +225,8 @@ export type DiscordConfig = {
  historyLimit?: number;
  /** Allow agent-triggered Discord reactions (default: true). */
  enableReactions?: boolean;
+  /** Control reply threading when reply tags are present (off|first|all). */
+  replyToMode?: ReplyToMode;
  slashCommand?: DiscordSlashCommandConfig;
  dm?: DiscordDmConfig;
  /** New per-guild config keyed by guild id or slug. */
@@ -650,6 +655,11 @@ const GroupChatSchema = z
  .optional();

 const QueueModeSchema = z.union([z.literal("queue"), z.literal("interrupt")]);
+const ReplyToModeSchema = z.union([
+  z.literal("off"),
+  z.literal("first"),
+  z.literal("all"),
+]);

 const QueueModeBySurfaceSchema = z
  .object({
@@ -962,6 +972,7 @@ const ClawdisSchema = z.object({
      enabled: z.boolean().optional(),
      botToken: z.string().optional(),
      tokenFile: z.string().optional(),
+      replyToMode: ReplyToModeSchema.optional(),
      groups: z
        .record(
          z.string(),
@@ -995,6 +1006,7 @@ const ClawdisSchema = z.object({
      mediaMaxMb: z.number().positive().optional(),
      historyLimit: z.number().int().min(0).optional(),
      enableReactions: z.boolean().optional(),
+      replyToMode: ReplyToModeSchema.optional(),
      dm: z
        .object({
          enabled: z.boolean().optional(),
--- a/src/discord/monitor.ts
+++ b/src/discord/monitor.ts
@@ -14,7 +14,7 @@ import { formatAgentEnvelope } from "../auto-reply/envelope.js";
 import { getReplyFromConfig } from "../auto-reply/reply.js";
 import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
 import type { ReplyPayload } from "../auto-reply/types.js";
-import type { DiscordSlashCommandConfig } from "../config/config.js";
+import type { DiscordSlashCommandConfig, ReplyToMode } from "../config/config.js";
 import { loadConfig } from "../config/config.js";
 import { resolveStorePath, updateLastRoute } from "../config/sessions.js";
 import { danger, isVerbose, logVerbose, warn } from "../globals.js";
@@ -32,6 +32,7 @@ export type MonitorDiscordOpts = {
  slashCommand?: DiscordSlashCommandConfig;
  mediaMaxMb?: number;
  historyLimit?: number;
+  replyToMode?: ReplyToMode;
 };

 type DiscordMediaInfo = {
@@ -117,6 +118,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
    0,
    opts.historyLimit ?? cfg.discord?.historyLimit ?? 20,
  );
+  const replyToMode = opts.replyToMode ?? cfg.discord?.replyToMode ?? "off";
  const dmEnabled = dmConfig?.enabled ?? true;
  const groupDmEnabled = dmConfig?.groupEnabled ?? false;
  const groupDmChannels = dmConfig?.groupChannels;
@@ -417,6 +419,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
        target: ctxPayload.To,
        token,
        runtime,
+        replyToMode,
      });
      if (isVerbose()) {
        logVerbose(
@@ -984,20 +987,34 @@ async function deliverReplies({
  target,
  token,
  runtime,
+  replyToMode,
 }: {
  replies: ReplyPayload[];
  target: string;
  token: string;
  runtime: RuntimeEnv;
+  replyToMode: ReplyToMode;
 }) {
+  let hasReplied = false;
  for (const payload of replies) {
    const mediaList =
      payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
    const text = payload.text ?? "";
+    const replyToId =
+      replyToMode === "off" ? undefined : payload.replyToId?.trim();
    if (!text && mediaList.length === 0) continue;
    if (mediaList.length === 0) {
      for (const chunk of chunkText(text, 2000)) {
-        await sendMessageDiscord(target, chunk, { token });
+        await sendMessageDiscord(target, chunk, {
+          token,
+          replyTo:
+            replyToId && (replyToMode === "all" || !hasReplied)
+              ? replyToId
+              : undefined,
+        });
+        if (replyToId && !hasReplied) {
+          hasReplied = true;
+        }
      }
    } else {
      let first = true;
@@ -1007,7 +1024,14 @@ async function deliverReplies({
        await sendMessageDiscord(target, caption, {
          token,
          mediaUrl,
+          replyTo:
+            replyToId && (replyToMode === "all" || !hasReplied)
+              ? replyToId
+              : undefined,
        });
+        if (replyToId && !hasReplied) {
+          hasReplied = true;
+        }
      }
    }
    runtime.log?.(`delivered reply to ${target}`);
--- a/src/discord/send.test.ts
+++ b/src/discord/send.test.ts
@@ -82,4 +82,37 @@ describe("sendMessageDiscord", () => {
      }),
    );
  });
+
+  it("includes message_reference when replying", async () => {
+    const { rest, postMock } = makeRest();
+    postMock.mockResolvedValue({ id: "msg1", channel_id: "789" });
+    await sendMessageDiscord("channel:789", "hello", {
+      rest,
+      token: "t",
+      replyTo: "orig-123",
+    });
+    const body = postMock.mock.calls[0]?.[1]?.body;
+    expect(body?.message_reference).toEqual({
+      message_id: "orig-123",
+      fail_if_not_exists: false,
+    });
+  });
+
+  it("replies only on the first chunk", async () => {
+    const { rest, postMock } = makeRest();
+    postMock.mockResolvedValue({ id: "msg1", channel_id: "789" });
+    await sendMessageDiscord("channel:789", "a".repeat(2001), {
+      rest,
+      token: "t",
+      replyTo: "orig-123",
+    });
+    expect(postMock).toHaveBeenCalledTimes(2);
+    const firstBody = postMock.mock.calls[0]?.[1]?.body;
+    const secondBody = postMock.mock.calls[1]?.[1]?.body;
+    expect(firstBody?.message_reference).toEqual({
+      message_id: "orig-123",
+      fail_if_not_exists: false,
+    });
+    expect(secondBody?.message_reference).toBeUndefined();
+  });
 });
--- a/src/discord/send.ts
+++ b/src/discord/send.ts
@@ -22,6 +22,7 @@ type DiscordSendOpts = {
  mediaUrl?: string;
  verbose?: boolean;
  rest?: REST;
+  replyTo?: string;
 };

 export type DiscordSendResult = {
@@ -105,22 +106,35 @@ async function resolveChannelId(
  return { channelId: dmChannel.id, dm: true };
 }

-async function sendDiscordText(rest: REST, channelId: string, text: string) {
+async function sendDiscordText(
+  rest: REST,
+  channelId: string,
+  text: string,
+  replyTo?: string,
+) {
  if (!text.trim()) {
    throw new Error("Message must be non-empty for Discord sends");
  }
+  const messageReference = replyTo
+    ? { message_id: replyTo, fail_if_not_exists: false }
+    : undefined;
  if (text.length <= DISCORD_TEXT_LIMIT) {
    const res = (await rest.post(Routes.channelMessages(channelId), {
-      body: { content: text },
+      body: { content: text, message_reference: messageReference },
    })) as { id: string; channel_id: string };
    return res;
  }
  const chunks = chunkText(text, DISCORD_TEXT_LIMIT);
  let last: { id: string; channel_id: string } | null = null;
+  let isFirst = true;
  for (const chunk of chunks) {
    last = (await rest.post(Routes.channelMessages(channelId), {
-      body: { content: chunk },
+      body: {
+        content: chunk,
+        message_reference: isFirst ? messageReference : undefined,
+      },
    })) as { id: string; channel_id: string };
+    isFirst = false;
  }
  if (!last) {
    throw new Error("Discord send failed (empty chunk result)");
@@ -133,13 +147,18 @@ async function sendDiscordMedia(
  channelId: string,
  text: string,
  mediaUrl: string,
+  replyTo?: string,
 ) {
  const media = await loadWebMedia(mediaUrl);
  const caption =
    text.length > DISCORD_TEXT_LIMIT ? text.slice(0, DISCORD_TEXT_LIMIT) : text;
+  const messageReference = replyTo
+    ? { message_id: replyTo, fail_if_not_exists: false }
+    : undefined;
  const res = (await rest.post(Routes.channelMessages(channelId), {
    body: {
      content: caption || undefined,
+      message_reference: messageReference,
    },
    files: [
      {
@@ -171,9 +190,15 @@ export async function sendMessageDiscord(
    | { id: string | null; channel_id: string };

  if (opts.mediaUrl) {
-    result = await sendDiscordMedia(rest, channelId, text, opts.mediaUrl);
+    result = await sendDiscordMedia(
+      rest,
+      channelId,
+      text,
+      opts.mediaUrl,
+      opts.replyTo,
+    );
  } else {
-    result = await sendDiscordText(rest, channelId, text);
+    result = await sendDiscordText(rest, channelId, text, opts.replyTo);
  }

  return {
--- a/src/telegram/bot.test.ts
+++ b/src/telegram/bot.test.ts
@@ -152,7 +152,7 @@ describe("createTelegramBot", () => {

    expect(replySpy).toHaveBeenCalledTimes(1);
    const payload = replySpy.mock.calls[0][0];
-    expect(payload.Body).toContain("[Replying to Ada]");
+    expect(payload.Body).toContain("[Replying to Ada id:9001]");
    expect(payload.Body).toContain("Can you summarize this?");
    expect(payload.ReplyToId).toBe("9001");
    expect(payload.ReplyToBody).toBe("Can you summarize this?");
--- a/src/telegram/bot.ts
+++ b/src/telegram/bot.ts
@@ -9,6 +9,7 @@ import { chunkText } from "../auto-reply/chunk.js";
 import { formatAgentEnvelope } from "../auto-reply/envelope.js";
 import { getReplyFromConfig } from "../auto-reply/reply.js";
 import type { ReplyPayload } from "../auto-reply/types.js";
+import type { ReplyToMode } from "../config/config.js";
 import { loadConfig } from "../config/config.js";
 import { resolveStorePath, updateLastRoute } from "../config/sessions.js";
 import { danger, isVerbose, logVerbose } from "../globals.js";
@@ -39,6 +40,7 @@ export type TelegramBotOptions = {
  requireMention?: boolean;
  allowFrom?: Array<string | number>;
  mediaMaxMb?: number;
+  replyToMode?: ReplyToMode;
  proxyFetch?: typeof fetch;
 };

@@ -59,6 +61,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {

  const cfg = loadConfig();
  const allowFrom = opts.allowFrom ?? cfg.telegram?.allowFrom;
+  const replyToMode = opts.replyToMode ?? cfg.telegram?.replyToMode ?? "off";
  const mediaMaxBytes =
    (opts.mediaMaxMb ?? cfg.telegram?.mediaMaxMb ?? 5) * 1024 * 1024;
  const logger = getChildLogger({ module: "telegram-auto-reply" });
@@ -137,7 +140,9 @@ export function createTelegramBot(opts: TelegramBotOptions) {
      ).trim();
      if (!rawBody) return;
      const replySuffix = replyTarget
-        ? `\n\n[Replying to ${replyTarget.sender}]\n${replyTarget.body}\n[/Replying]`
+        ? `\n\n[Replying to ${replyTarget.sender}${
+            replyTarget.id ? ` id:${replyTarget.id}` : ""
+          }]\n${replyTarget.body}\n[/Replying]`
        : "";
      const body = formatAgentEnvelope({
        surface: "Telegram",
@@ -211,6 +216,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
        token: opts.token,
        runtime,
        bot,
+        replyToMode,
      });
    } catch (err) {
      runtime.error?.(danger(`handler failed: ${String(err)}`));
@@ -233,13 +239,17 @@ async function deliverReplies(params: {
  token: string;
  runtime: RuntimeEnv;
  bot: Bot;
+  replyToMode: ReplyToMode;
 }) {
-  const { replies, chatId, runtime, bot } = params;
+  const { replies, chatId, runtime, bot, replyToMode } = params;
+  let hasReplied = false;
  for (const reply of replies) {
    if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) {
      runtime.error?.(danger("reply missing text/media"));
      continue;
    }
+    const replyToId =
+      replyToMode === "off" ? undefined : resolveTelegramReplyId(reply.replyToId);
    const mediaList = reply.mediaUrls?.length
      ? reply.mediaUrls
      : reply.mediaUrl
@@ -247,7 +257,15 @@ async function deliverReplies(params: {
        : [];
    if (mediaList.length === 0) {
      for (const chunk of chunkText(reply.text || "", 4000)) {
-        await sendTelegramText(bot, chatId, chunk, runtime);
+        await sendTelegramText(bot, chatId, chunk, runtime, {
+          replyToMessageId:
+            replyToId && (replyToMode === "all" || !hasReplied)
+              ? replyToId
+              : undefined,
+        });
+        if (replyToId && !hasReplied) {
+          hasReplied = true;
+        }
      }
      continue;
    }
@@ -259,14 +277,33 @@ async function deliverReplies(params: {
      const file = new InputFile(media.buffer, media.fileName ?? "file");
      const caption = first ? (reply.text ?? undefined) : undefined;
      first = false;
+      const replyToMessageId =
+        replyToId && (replyToMode === "all" || !hasReplied)
+          ? replyToId
+          : undefined;
      if (kind === "image") {
-        await bot.api.sendPhoto(chatId, file, { caption });
+        await bot.api.sendPhoto(chatId, file, {
+          caption,
+          reply_to_message_id: replyToMessageId,
+        });
      } else if (kind === "video") {
-        await bot.api.sendVideo(chatId, file, { caption });
+        await bot.api.sendVideo(chatId, file, {
+          caption,
+          reply_to_message_id: replyToMessageId,
+        });
      } else if (kind === "audio") {
-        await bot.api.sendAudio(chatId, file, { caption });
+        await bot.api.sendAudio(chatId, file, {
+          caption,
+          reply_to_message_id: replyToMessageId,
+        });
      } else {
-        await bot.api.sendDocument(chatId, file, { caption });
+        await bot.api.sendDocument(chatId, file, {
+          caption,
+          reply_to_message_id: replyToMessageId,
+        });
+      }
+      if (replyToId && !hasReplied) {
+        hasReplied = true;
      }
    }
  }
@@ -315,6 +352,13 @@ function hasBotMention(msg: TelegramMessage, botUsername: string) {
  return false;
 }

+function resolveTelegramReplyId(raw?: string): number | undefined {
+  if (!raw) return undefined;
+  const parsed = Number(raw);
+  if (!Number.isFinite(parsed)) return undefined;
+  return parsed;
+}
+
 async function resolveMedia(
  ctx: TelegramContext,
  maxBytes: number,
@@ -363,10 +407,12 @@ async function sendTelegramText(
  chatId: string,
  text: string,
  runtime: RuntimeEnv,
+  opts?: { replyToMessageId?: number },
 ): Promise<number | undefined> {
  try {
    const res = await bot.api.sendMessage(chatId, text, {
      parse_mode: "Markdown",
+      reply_to_message_id: opts?.replyToMessageId,
    });
    return res.message_id;
  } catch (err) {
@@ -375,7 +421,9 @@ async function sendTelegramText(
      runtime.log?.(
        `telegram markdown parse failed; retrying without formatting: ${errText}`,
      );
-      const res = await bot.api.sendMessage(chatId, text, {});
+      const res = await bot.api.sendMessage(chatId, text, {
+        reply_to_message_id: opts?.replyToMessageId,
+      });
      return res.message_id;
    }
    throw err;
--- a/src/web/auto-reply.test.ts
+++ b/src/web/auto-reply.test.ts
@@ -1417,7 +1417,7 @@ describe("web auto-reply", () => {
    expect(callArg.ReplyToId).toBe("q1");
    expect(callArg.ReplyToBody).toBe("original");
    expect(callArg.ReplyToSender).toBe("+1999");
-    expect(callArg.Body).toContain("[Replying to +1999]");
+    expect(callArg.Body).toContain("[Replying to +1999 id:q1]");
    expect(callArg.Body).toContain("original");
  });

--- a/src/web/auto-reply.ts
+++ b/src/web/auto-reply.ts
@@ -921,7 +921,8 @@ export async function monitorWebProvider(
    const formatReplyContext = (msg: WebInboundMsg) => {
      if (!msg.replyToBody) return null;
      const sender = msg.replyToSender ?? "unknown sender";
-      return `[Replying to ${sender}]\n${msg.replyToBody}\n[/Replying]`;
+      const idPart = msg.replyToId ? ` id:${msg.replyToId}` : "";
+      return `[Replying to ${sender}${idPart}]\n${msg.replyToBody}\n[/Replying]`;
    };

    const buildLine = (msg: WebInboundMsg) => {