diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6fb41e6da..4b1bada54 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@
 - Config: remove `routing.groupChat.requireMention` + `telegram.requireMention`; use `whatsapp.groups`, `imessage.groups`, and `telegram.groups` defaults instead (run `clawdis doctor` to migrate).
 
 ### Features
+- Discord/Telegram: add reply tags (`[[reply_to_current]]`, `[[reply_to:<id>]]`) with per-provider `replyToMode` (off|first|all) for native threaded replies.
 - Talk mode: continuous speech conversations (macOS/iOS/Android) with ElevenLabs TTS, reply directives, and optional interrupt-on-speech.
 - UI: add optional `ui.seamColor` accent to tint the Talk Mode side bubble (macOS/iOS/Android).
 - Nix mode: opt-in declarative config + read-only settings UI when `CLAWDIS_NIX_MODE=1` (thanks @joshp123 for the persistence — earned my trust; I'll merge these going forward).
diff --git a/docs/configuration.md b/docs/configuration.md
index be6c2cf1c..9e08617eb 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -169,6 +169,7 @@ Set `telegram.enabled: false` to disable automatic startup.
   telegram: {
     enabled: true,
     botToken: "your-bot-token",
+    replyToMode: "off",
     groups: {
       "*": { requireMention: true },
       "123456789": { requireMention: false } // group chat id
@@ -183,6 +184,7 @@ Set `telegram.enabled: false` to disable automatic startup.
 }
 ```
 Mention gating precedence (most specific wins): `telegram.groups.<chatId>.requireMention` → `telegram.groups."*".requireMention` → default `true`.
+Reply threading is controlled via `telegram.replyToMode` (`off` | `first` | `all`) and reply tags in the model output.
 
 ### `discord` (bot transport)
 
@@ -195,6 +197,7 @@ Configure the Discord bot by setting the bot token and optional gating:
     token: "your-bot-token",
     mediaMaxMb: 8,                          // clamp inbound media size
     enableReactions: true,                  // allow agent-triggered reactions
+    replyToMode: "off",                     // off | first | all
     slashCommand: {                         // user-installed app slash commands
       enabled: true,
       name: "clawd",
@@ -225,6 +228,7 @@ Configure the Discord bot by setting the bot token and optional gating:
 ```
 
 Clawdis starts Discord only when a `discord` config section exists. The token is resolved from `DISCORD_BOT_TOKEN` or `discord.token` (unless `discord.enabled` is `false`). Use `user:<id>` (DM) or `channel:<id>` (guild channel) when specifying delivery targets for cron/CLI commands.
+Reply threading is controlled via `discord.replyToMode` (`off` | `first` | `all`) and reply tags in the model output.
 Guild slugs are lowercase with spaces replaced by `-`; channel keys use the slugged channel name (no leading `#`). Prefer guild ids as keys to avoid rename ambiguity.
 Use `discord.guilds."*"` for default per-guild settings.
 
diff --git a/docs/discord.md b/docs/discord.md
index 9c6a4b139..2d75bb584 100644
--- a/docs/discord.md
+++ b/docs/discord.md
@@ -40,6 +40,7 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea
 - File uploads supported up to the configured `discord.mediaMaxMb` (default 8 MB).
 - Mention-gated guild replies by default to avoid noisy bots.
 - Reply context is injected when a message references another message (quoted content + ids).
+- Native reply threading is **off by default**; enable with `discord.replyToMode` and reply tags.
 
 ## Config
 
@@ -50,6 +51,7 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea
     token: "abc.123",
     mediaMaxMb: 8,
     enableReactions: true,
+    replyToMode: "off",
     slashCommand: {
       enabled: true,
       name: "clawd",
@@ -92,6 +94,18 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea
 - `mediaMaxMb`: clamp inbound media saved to disk.
 - `historyLimit`: number of recent guild messages to include as context when replying to a mention (default 20, `0` disables).
 - `enableReactions`: allow agent-triggered reactions via the `clawdis_discord` tool (default `true`).
+- `replyToMode`: `off` (default), `first`, or `all`. Applies only when the model includes a reply tag.
+
+## Reply tags
+To request a threaded reply, the model can include one tag in its output:
+- `[[reply_to_current]]` — reply to the triggering Discord message.
+- `[[reply_to:<id>]]` — reply to a specific message id from context/history.
+Current message ids are appended to prompts as `[message_id: …]`; history entries already include ids.
+
+Behavior is controlled by `discord.replyToMode`:
+- `off`: ignore tags.
+- `first`: only the first outbound chunk/attachment is a reply.
+- `all`: every outbound chunk/attachment is a reply.
 
 Allowlist matching notes:
 - `allowFrom`/`users`/`groupChannels` accept ids, names, tags, or mentions like `<@id>`.
diff --git a/docs/setup.md b/docs/setup.md
index 614d197b6..45164f24d 100644
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -113,5 +113,6 @@ pnpm clawdis health
 
 - `docs/gateway.md` (Gateway runbook; flags, supervision, ports)
 - `docs/configuration.md` (config schema + examples)
+- `docs/discord.md` and `docs/telegram.md` (reply tags + replyToMode settings)
 - `docs/clawd.md` (personal assistant setup)
 - `docs/clawdis-mac.md` (macOS app behavior; gateway lifecycle + “Attach only”)
diff --git a/docs/telegram.md b/docs/telegram.md
index fd6c3f018..035e1fb76 100644
--- a/docs/telegram.md
+++ b/docs/telegram.md
@@ -31,13 +31,13 @@ Status: ready for bot-mode use with grammY (long-polling by default; webhook sup
 - Sees only messages sent after it’s added to a chat; no pre-history access.
 - Cannot DM users first; they must initiate. Channels are receive-only unless the bot is an admin poster.
 - File size caps follow Telegram Bot API (up to 2 GB for documents; smaller for some media types).
-- Typing indicators (`sendChatAction`) supported; outbound replies are sent as native replies to the triggering message (threaded where Telegram allows).
+- Typing indicators (`sendChatAction`) supported; native replies are **off by default** and enabled via `telegram.replyToMode` + reply tags.
 
 ## Planned implementation details
 - Library: grammY is the only client for send + gateway (fetch fallback removed); grammY throttler is enabled by default to stay under Bot API limits.
-- Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, `Timestamp`, and reply-to metadata (`ReplyToId`, `ReplyToBody`, `ReplyToSender`) when the user replies; reply context is appended to `Body` as a `[Replying to ...]` block; groups require @bot mention by default (override per chat in config).
+- Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, `Timestamp`, and reply-to metadata (`ReplyToId`, `ReplyToBody`, `ReplyToSender`) when the user replies; reply context is appended to `Body` as a `[Replying to ...]` block (includes `id:` when available); groups require @bot mention by default (override per chat in config).
 - Outbound: text and media (photo/video/audio/document) with optional caption; chunked to limits. Typing cue sent best-effort.
-- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups`, `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported.
+- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups`, `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.replyToMode`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported.
   - Mention gating precedence (most specific wins): `telegram.groups.<chatId>.requireMention` → `telegram.groups."*".requireMention` → default `true`.
 
 Example config:
@@ -46,6 +46,7 @@ Example config:
   telegram: {
     enabled: true,
     botToken: "123:abc",
+    replyToMode: "off",
     groups: {
       "*": { requireMention: true },
       "123456789": { requireMention: false } // group chat id
@@ -66,6 +67,17 @@ Example config:
 - Make the bot an admin if you need it to send in restricted groups or channels.
 - Mention the bot (`@yourbot`) or use commands to trigger; per-group overrides live in `telegram.groups` if you want always-on behavior.
 
+## Reply tags
+To request a threaded reply, the model can include one tag in its output:
+- `[[reply_to_current]]` — reply to the triggering Telegram message.
+- `[[reply_to:<id>]]` — reply to a specific message id from context.
+Current message ids are appended to prompts as `[message_id: …]`; reply context includes `id:` when available.
+
+Behavior is controlled by `telegram.replyToMode`:
+- `off`: ignore tags.
+- `first`: only the first outbound chunk/attachment is a reply.
+- `all`: every outbound chunk/attachment is a reply.
+
 ## Roadmap
 - ✅ Design and defaults (this doc)
 - ✅ grammY long-poll gateway + text/media send
diff --git a/docs/whatsapp.md b/docs/whatsapp.md
index 4f04fdf15..c4c09ab2f 100644
--- a/docs/whatsapp.md
+++ b/docs/whatsapp.md
@@ -54,7 +54,7 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number
 - `Body` is the current message body with envelope.
 - Quoted reply context is **always appended**:
   ```
-  [Replying to +1555]
+  [Replying to +1555 id:ABC123]
   <quoted text or <media:...>>
   [/Replying]
   ```
@@ -81,8 +81,8 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number
 - Group metadata cached 5 min (subject + participants).
 
 ## Reply delivery (threading)
-- Outbound replies are sent as **native replies** (quoted message).
-- Model does not need IDs for threading; gateway attaches quote.
+- WhatsApp Web sends standard messages (no quoted reply threading in the current gateway).
+- Reply tags are ignored on this surface.
 
 ## Outbound send (text + media)
 - Uses active web listener; error if gateway not running.
diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts
index 2af132927..1e880125b 100644
--- a/src/agents/pi-embedded-runner.ts
+++ b/src/agents/pi-embedded-runner.ts
@@ -79,6 +79,7 @@ export type EmbeddedPiRunResult = {
     text?: string;
     mediaUrl?: string;
     mediaUrls?: string[];
+    replyToId?: string;
   }>;
   meta: EmbeddedPiRunMeta;
 };
diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts
index 796cb763b..facddb19f 100644
--- a/src/agents/system-prompt.ts
+++ b/src/agents/system-prompt.ts
@@ -82,6 +82,12 @@ export function buildAgentSystemPromptAppend(params: {
     "Never send streaming/partial replies to external messaging surfaces; only final replies should be delivered there.",
     "Clawdis handles message transport automatically; respond normally and your reply will be delivered to the current chat.",
     "",
+    "## Reply Tags",
+    "To request a native reply/quote on supported surfaces, include one tag in your reply:",
+    "- [[reply_to_current]] replies to the triggering message.",
+    "- [[reply_to:<id>]] replies to a specific message id when you have it.",
+    "Tags are stripped before sending; support depends on the current provider config.",
+    "",
   ];
 
   if (extraSystemPrompt) {
diff --git a/src/auto-reply/reply.directive.test.ts b/src/auto-reply/reply.directive.test.ts
index d5c339530..709a2fa8c 100644
--- a/src/auto-reply/reply.directive.test.ts
+++ b/src/auto-reply/reply.directive.test.ts
@@ -14,6 +14,7 @@ import {
 import { drainSystemEvents } from "../infra/system-events.js";
 import {
   extractQueueDirective,
+  extractReplyToTag,
   extractThinkDirective,
   extractVerboseDirective,
   getReplyFromConfig,
@@ -96,6 +97,90 @@ describe("directive parsing", () => {
     expect(res.cleaned).toBe("please now");
   });
 
+  it("extracts reply_to_current tag", () => {
+    const res = extractReplyToTag("ok [[reply_to_current]]", "msg-1");
+    expect(res.replyToId).toBe("msg-1");
+    expect(res.cleaned).toBe("ok");
+  });
+
+  it("extracts reply_to id tag", () => {
+    const res = extractReplyToTag("see [[reply_to:12345]] now", "msg-1");
+    expect(res.replyToId).toBe("12345");
+    expect(res.cleaned).toBe("see now");
+  });
+
+  it("strips reply tags and maps reply_to_current to MessageSid", async () => {
+    await withTempHome(async (home) => {
+      vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
+        payloads: [{ text: "hello [[reply_to_current]]" }],
+        meta: {
+          durationMs: 5,
+          agentMeta: { sessionId: "s", provider: "p", model: "m" },
+        },
+      });
+
+      const res = await getReplyFromConfig(
+        {
+          Body: "ping",
+          From: "+1004",
+          To: "+2000",
+          MessageSid: "msg-123",
+        },
+        {},
+        {
+          agent: {
+            model: "anthropic/claude-opus-4-5",
+            workspace: path.join(home, "clawd"),
+          },
+          whatsapp: { allowFrom: ["*"] },
+          session: { store: path.join(home, "sessions.json") },
+        },
+      );
+
+      const payload = Array.isArray(res) ? res[0] : res;
+      expect(payload?.text).toBe("hello");
+      expect(payload?.replyToId).toBe("msg-123");
+    });
+  });
+
+  it("prefers explicit reply_to id over reply_to_current", async () => {
+    await withTempHome(async (home) => {
+      vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
+        payloads: [
+          {
+            text: "hi [[reply_to_current]] [[reply_to:abc-456]]",
+          },
+        ],
+        meta: {
+          durationMs: 5,
+          agentMeta: { sessionId: "s", provider: "p", model: "m" },
+        },
+      });
+
+      const res = await getReplyFromConfig(
+        {
+          Body: "ping",
+          From: "+1004",
+          To: "+2000",
+          MessageSid: "msg-123",
+        },
+        {},
+        {
+          agent: {
+            model: "anthropic/claude-opus-4-5",
+            workspace: path.join(home, "clawd"),
+          },
+          whatsapp: { allowFrom: ["*"] },
+          session: { store: path.join(home, "sessions.json") },
+        },
+      );
+
+      const payload = Array.isArray(res) ? res[0] : res;
+      expect(payload?.text).toBe("hi");
+      expect(payload?.replyToId).toBe("abc-456");
+    });
+  });
+
   it("applies inline think and still runs agent content", async () => {
     await withTempHome(async (home) => {
       vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts
index 10cd9eda7..f6d8c5703 100644
--- a/src/auto-reply/reply.ts
+++ b/src/auto-reply/reply.ts
@@ -164,6 +164,39 @@ export function extractQueueDirective(body?: string): {
   };
 }
 
+export function extractReplyToTag(
+  text?: string,
+  currentMessageId?: string,
+): {
+  cleaned: string;
+  replyToId?: string;
+  hasTag: boolean;
+} {
+  if (!text) return { cleaned: "", hasTag: false };
+  let cleaned = text;
+  let replyToId: string | undefined;
+  let hasTag = false;
+
+  const currentMatch = cleaned.match(/\[\[reply_to_current\]\]/i);
+  if (currentMatch) {
+    cleaned = cleaned.replace(/\[\[reply_to_current\]\]/gi, " ");
+    hasTag = true;
+    if (currentMessageId?.trim()) {
+      replyToId = currentMessageId.trim();
+    }
+  }
+
+  const idMatch = cleaned.match(/\[\[reply_to:([^\]\n]+)\]\]/i);
+  if (idMatch?.[1]) {
+    cleaned = cleaned.replace(/\[\[reply_to:[^\]\n]+\]\]/gi, " ");
+    replyToId = idMatch[1].trim();
+    hasTag = true;
+  }
+
+  cleaned = cleaned.replace(/\s+/g, " ").trim();
+  return { cleaned, replyToId, hasTag };
+}
+
 function isAbortTrigger(text?: string): boolean {
   if (!text) return false;
   const normalized = text.trim().toLowerCase();
@@ -1123,6 +1156,12 @@ export async function getReplyFromConfig(
       ABORT_MEMORY.set(abortKey, false);
     }
   }
+  const messageIdHint = sessionCtx.MessageSid?.trim()
+    ? `[message_id: ${sessionCtx.MessageSid.trim()}]`
+    : "";
+  if (messageIdHint) {
+    prefixedBodyBase = `${prefixedBodyBase}\n${messageIdHint}`;
+  }
 
   // Prepend queued system events (transitions only) and (for new main sessions) a provider snapshot.
   // Token efficiency: we filter out periodic/heartbeat noise and keep the lines compact.
@@ -1399,9 +1438,28 @@ export async function getReplyFromConfig(
           return [{ ...payload, text: stripped.text }];
         });
 
-    if (sanitizedPayloads.length === 0) return undefined;
+    const replyTaggedPayloads: ReplyPayload[] = sanitizedPayloads
+      .map((payload) => {
+        const { cleaned, replyToId } = extractReplyToTag(
+          payload.text,
+          sessionCtx.MessageSid,
+        );
+        return {
+          ...payload,
+          text: cleaned ? cleaned : undefined,
+          replyToId: replyToId ?? payload.replyToId,
+        };
+      })
+      .filter(
+        (payload) =>
+          payload.text ||
+          payload.mediaUrl ||
+          (payload.mediaUrls && payload.mediaUrls.length > 0),
+      );
 
-    const shouldSignalTyping = sanitizedPayloads.some((payload) => {
+    if (replyTaggedPayloads.length === 0) return undefined;
+
+    const shouldSignalTyping = replyTaggedPayloads.some((payload) => {
       const trimmed = payload.text?.trim();
       if (trimmed && trimmed !== SILENT_REPLY_TOKEN) return true;
       if (payload.mediaUrl) return true;
@@ -1456,7 +1514,7 @@ export async function getReplyFromConfig(
     }
 
     // If verbose is enabled and this is a new session, prepend a session hint.
-    let finalPayloads = sanitizedPayloads;
+    let finalPayloads = replyTaggedPayloads;
     if (resolvedVerboseLevel === "on" && isNewSession) {
       finalPayloads = [
         { text: `🧭 New session: ${sessionIdFinal}` },
diff --git a/src/auto-reply/types.ts b/src/auto-reply/types.ts
index 27ddace0b..0bfe335fd 100644
--- a/src/auto-reply/types.ts
+++ b/src/auto-reply/types.ts
@@ -9,4 +9,5 @@ export type ReplyPayload = {
   text?: string;
   mediaUrl?: string;
   mediaUrls?: string[];
+  replyToId?: string;
 };
diff --git a/src/config/config.ts b/src/config/config.ts
index f5508d497..ab5189748 100644
--- a/src/config/config.ts
+++ b/src/config/config.ts
@@ -18,6 +18,7 @@ export const isNixMode = process.env.CLAWDIS_NIX_MODE === "1";
 
 export type ReplyMode = "text" | "command";
 export type SessionScope = "per-sender" | "global";
+export type ReplyToMode = "off" | "first" | "all";
 
 export type SessionConfig = {
   scope?: SessionScope;
@@ -166,6 +167,8 @@ export type TelegramConfig = {
   botToken?: string;
   /** Path to file containing bot token (for secret managers like agenix) */
   tokenFile?: string;
+  /** Control reply threading when reply tags are present (off|first|all). */
+  replyToMode?: ReplyToMode;
   groups?: Record<
     string,
     {
@@ -222,6 +225,8 @@ export type DiscordConfig = {
   historyLimit?: number;
   /** Allow agent-triggered Discord reactions (default: true). */
   enableReactions?: boolean;
+  /** Control reply threading when reply tags are present (off|first|all). */
+  replyToMode?: ReplyToMode;
   slashCommand?: DiscordSlashCommandConfig;
   dm?: DiscordDmConfig;
   /** New per-guild config keyed by guild id or slug. */
@@ -650,6 +655,11 @@ const GroupChatSchema = z
   .optional();
 
 const QueueModeSchema = z.union([z.literal("queue"), z.literal("interrupt")]);
+const ReplyToModeSchema = z.union([
+  z.literal("off"),
+  z.literal("first"),
+  z.literal("all"),
+]);
 
 const QueueModeBySurfaceSchema = z
   .object({
@@ -962,6 +972,7 @@ const ClawdisSchema = z.object({
       enabled: z.boolean().optional(),
       botToken: z.string().optional(),
       tokenFile: z.string().optional(),
+      replyToMode: ReplyToModeSchema.optional(),
       groups: z
         .record(
           z.string(),
@@ -995,6 +1006,7 @@ const ClawdisSchema = z.object({
       mediaMaxMb: z.number().positive().optional(),
       historyLimit: z.number().int().min(0).optional(),
       enableReactions: z.boolean().optional(),
+      replyToMode: ReplyToModeSchema.optional(),
       dm: z
         .object({
           enabled: z.boolean().optional(),
diff --git a/src/discord/monitor.ts b/src/discord/monitor.ts
index c34c338ec..817d5a4ca 100644
--- a/src/discord/monitor.ts
+++ b/src/discord/monitor.ts
@@ -14,7 +14,7 @@ import { formatAgentEnvelope } from "../auto-reply/envelope.js";
 import { getReplyFromConfig } from "../auto-reply/reply.js";
 import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
 import type { ReplyPayload } from "../auto-reply/types.js";
-import type { DiscordSlashCommandConfig } from "../config/config.js";
+import type { DiscordSlashCommandConfig, ReplyToMode } from "../config/config.js";
 import { loadConfig } from "../config/config.js";
 import { resolveStorePath, updateLastRoute } from "../config/sessions.js";
 import { danger, isVerbose, logVerbose, warn } from "../globals.js";
@@ -32,6 +32,7 @@ export type MonitorDiscordOpts = {
   slashCommand?: DiscordSlashCommandConfig;
   mediaMaxMb?: number;
   historyLimit?: number;
+  replyToMode?: ReplyToMode;
 };
 
 type DiscordMediaInfo = {
@@ -117,6 +118,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
     0,
     opts.historyLimit ?? cfg.discord?.historyLimit ?? 20,
   );
+  const replyToMode = opts.replyToMode ?? cfg.discord?.replyToMode ?? "off";
   const dmEnabled = dmConfig?.enabled ?? true;
   const groupDmEnabled = dmConfig?.groupEnabled ?? false;
   const groupDmChannels = dmConfig?.groupChannels;
@@ -417,6 +419,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
         target: ctxPayload.To,
         token,
         runtime,
+        replyToMode,
       });
       if (isVerbose()) {
         logVerbose(
@@ -984,20 +987,34 @@ async function deliverReplies({
   target,
   token,
   runtime,
+  replyToMode,
 }: {
   replies: ReplyPayload[];
   target: string;
   token: string;
   runtime: RuntimeEnv;
+  replyToMode: ReplyToMode;
 }) {
+  let hasReplied = false;
   for (const payload of replies) {
     const mediaList =
       payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
     const text = payload.text ?? "";
+    const replyToId =
+      replyToMode === "off" ? undefined : payload.replyToId?.trim();
     if (!text && mediaList.length === 0) continue;
     if (mediaList.length === 0) {
       for (const chunk of chunkText(text, 2000)) {
-        await sendMessageDiscord(target, chunk, { token });
+        await sendMessageDiscord(target, chunk, {
+          token,
+          replyTo:
+            replyToId && (replyToMode === "all" || !hasReplied)
+              ? replyToId
+              : undefined,
+        });
+        if (replyToId && !hasReplied) {
+          hasReplied = true;
+        }
       }
     } else {
       let first = true;
@@ -1007,7 +1024,14 @@ async function deliverReplies({
         await sendMessageDiscord(target, caption, {
           token,
           mediaUrl,
+          replyTo:
+            replyToId && (replyToMode === "all" || !hasReplied)
+              ? replyToId
+              : undefined,
         });
+        if (replyToId && !hasReplied) {
+          hasReplied = true;
+        }
       }
     }
     runtime.log?.(`delivered reply to ${target}`);
diff --git a/src/discord/send.test.ts b/src/discord/send.test.ts
index 7de4dd0c4..9e469c198 100644
--- a/src/discord/send.test.ts
+++ b/src/discord/send.test.ts
@@ -82,4 +82,37 @@ describe("sendMessageDiscord", () => {
       }),
     );
   });
+
+  it("includes message_reference when replying", async () => {
+    const { rest, postMock } = makeRest();
+    postMock.mockResolvedValue({ id: "msg1", channel_id: "789" });
+    await sendMessageDiscord("channel:789", "hello", {
+      rest,
+      token: "t",
+      replyTo: "orig-123",
+    });
+    const body = postMock.mock.calls[0]?.[1]?.body;
+    expect(body?.message_reference).toEqual({
+      message_id: "orig-123",
+      fail_if_not_exists: false,
+    });
+  });
+
+  it("replies only on the first chunk", async () => {
+    const { rest, postMock } = makeRest();
+    postMock.mockResolvedValue({ id: "msg1", channel_id: "789" });
+    await sendMessageDiscord("channel:789", "a".repeat(2001), {
+      rest,
+      token: "t",
+      replyTo: "orig-123",
+    });
+    expect(postMock).toHaveBeenCalledTimes(2);
+    const firstBody = postMock.mock.calls[0]?.[1]?.body;
+    const secondBody = postMock.mock.calls[1]?.[1]?.body;
+    expect(firstBody?.message_reference).toEqual({
+      message_id: "orig-123",
+      fail_if_not_exists: false,
+    });
+    expect(secondBody?.message_reference).toBeUndefined();
+  });
 });
diff --git a/src/discord/send.ts b/src/discord/send.ts
index 1b92f5be9..9763dfc94 100644
--- a/src/discord/send.ts
+++ b/src/discord/send.ts
@@ -22,6 +22,7 @@ type DiscordSendOpts = {
   mediaUrl?: string;
   verbose?: boolean;
   rest?: REST;
+  replyTo?: string;
 };
 
 export type DiscordSendResult = {
@@ -105,22 +106,35 @@ async function resolveChannelId(
   return { channelId: dmChannel.id, dm: true };
 }
 
-async function sendDiscordText(rest: REST, channelId: string, text: string) {
+async function sendDiscordText(
+  rest: REST,
+  channelId: string,
+  text: string,
+  replyTo?: string,
+) {
   if (!text.trim()) {
     throw new Error("Message must be non-empty for Discord sends");
   }
+  const messageReference = replyTo
+    ? { message_id: replyTo, fail_if_not_exists: false }
+    : undefined;
   if (text.length <= DISCORD_TEXT_LIMIT) {
     const res = (await rest.post(Routes.channelMessages(channelId), {
-      body: { content: text },
+      body: { content: text, message_reference: messageReference },
     })) as { id: string; channel_id: string };
     return res;
   }
   const chunks = chunkText(text, DISCORD_TEXT_LIMIT);
   let last: { id: string; channel_id: string } | null = null;
+  let isFirst = true;
   for (const chunk of chunks) {
     last = (await rest.post(Routes.channelMessages(channelId), {
-      body: { content: chunk },
+      body: {
+        content: chunk,
+        message_reference: isFirst ? messageReference : undefined,
+      },
     })) as { id: string; channel_id: string };
+    isFirst = false;
   }
   if (!last) {
     throw new Error("Discord send failed (empty chunk result)");
@@ -133,13 +147,18 @@ async function sendDiscordMedia(
   channelId: string,
   text: string,
   mediaUrl: string,
+  replyTo?: string,
 ) {
   const media = await loadWebMedia(mediaUrl);
   const caption =
     text.length > DISCORD_TEXT_LIMIT ? text.slice(0, DISCORD_TEXT_LIMIT) : text;
+  const messageReference = replyTo
+    ? { message_id: replyTo, fail_if_not_exists: false }
+    : undefined;
   const res = (await rest.post(Routes.channelMessages(channelId), {
     body: {
       content: caption || undefined,
+      message_reference: messageReference,
     },
     files: [
       {
@@ -171,9 +190,15 @@ export async function sendMessageDiscord(
     | { id: string | null; channel_id: string };
 
   if (opts.mediaUrl) {
-    result = await sendDiscordMedia(rest, channelId, text, opts.mediaUrl);
+    result = await sendDiscordMedia(
+      rest,
+      channelId,
+      text,
+      opts.mediaUrl,
+      opts.replyTo,
+    );
   } else {
-    result = await sendDiscordText(rest, channelId, text);
+    result = await sendDiscordText(rest, channelId, text, opts.replyTo);
   }
 
   return {
diff --git a/src/telegram/bot.test.ts b/src/telegram/bot.test.ts
index a6cade3e4..383fd9386 100644
--- a/src/telegram/bot.test.ts
+++ b/src/telegram/bot.test.ts
@@ -152,7 +152,7 @@ describe("createTelegramBot", () => {
 
     expect(replySpy).toHaveBeenCalledTimes(1);
     const payload = replySpy.mock.calls[0][0];
-    expect(payload.Body).toContain("[Replying to Ada]");
+    expect(payload.Body).toContain("[Replying to Ada id:9001]");
     expect(payload.Body).toContain("Can you summarize this?");
     expect(payload.ReplyToId).toBe("9001");
     expect(payload.ReplyToBody).toBe("Can you summarize this?");
diff --git a/src/telegram/bot.ts b/src/telegram/bot.ts
index 341401c14..d5f3eb335 100644
--- a/src/telegram/bot.ts
+++ b/src/telegram/bot.ts
@@ -9,6 +9,7 @@ import { chunkText } from "../auto-reply/chunk.js";
 import { formatAgentEnvelope } from "../auto-reply/envelope.js";
 import { getReplyFromConfig } from "../auto-reply/reply.js";
 import type { ReplyPayload } from "../auto-reply/types.js";
+import type { ReplyToMode } from "../config/config.js";
 import { loadConfig } from "../config/config.js";
 import { resolveStorePath, updateLastRoute } from "../config/sessions.js";
 import { danger, isVerbose, logVerbose } from "../globals.js";
@@ -39,6 +40,7 @@ export type TelegramBotOptions = {
   requireMention?: boolean;
   allowFrom?: Array<string | number>;
   mediaMaxMb?: number;
+  replyToMode?: ReplyToMode;
   proxyFetch?: typeof fetch;
 };
 
@@ -59,6 +61,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
 
   const cfg = loadConfig();
   const allowFrom = opts.allowFrom ?? cfg.telegram?.allowFrom;
+  const replyToMode = opts.replyToMode ?? cfg.telegram?.replyToMode ?? "off";
   const mediaMaxBytes =
     (opts.mediaMaxMb ?? cfg.telegram?.mediaMaxMb ?? 5) * 1024 * 1024;
   const logger = getChildLogger({ module: "telegram-auto-reply" });
@@ -137,7 +140,9 @@ export function createTelegramBot(opts: TelegramBotOptions) {
       ).trim();
       if (!rawBody) return;
       const replySuffix = replyTarget
-        ? `\n\n[Replying to ${replyTarget.sender}]\n${replyTarget.body}\n[/Replying]`
+        ? `\n\n[Replying to ${replyTarget.sender}${
+            replyTarget.id ? ` id:${replyTarget.id}` : ""
+          }]\n${replyTarget.body}\n[/Replying]`
         : "";
       const body = formatAgentEnvelope({
         surface: "Telegram",
@@ -211,6 +216,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
         token: opts.token,
         runtime,
         bot,
+        replyToMode,
       });
     } catch (err) {
       runtime.error?.(danger(`handler failed: ${String(err)}`));
@@ -233,13 +239,17 @@ async function deliverReplies(params: {
   token: string;
   runtime: RuntimeEnv;
   bot: Bot;
+  replyToMode: ReplyToMode;
 }) {
-  const { replies, chatId, runtime, bot } = params;
+  const { replies, chatId, runtime, bot, replyToMode } = params;
+  let hasReplied = false;
   for (const reply of replies) {
     if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) {
       runtime.error?.(danger("reply missing text/media"));
       continue;
     }
+    const replyToId =
+      replyToMode === "off" ? undefined : resolveTelegramReplyId(reply.replyToId);
     const mediaList = reply.mediaUrls?.length
       ? reply.mediaUrls
       : reply.mediaUrl
@@ -247,7 +257,15 @@ async function deliverReplies(params: {
         : [];
     if (mediaList.length === 0) {
       for (const chunk of chunkText(reply.text || "", 4000)) {
-        await sendTelegramText(bot, chatId, chunk, runtime);
+        await sendTelegramText(bot, chatId, chunk, runtime, {
+          replyToMessageId:
+            replyToId && (replyToMode === "all" || !hasReplied)
+              ? replyToId
+              : undefined,
+        });
+        if (replyToId && !hasReplied) {
+          hasReplied = true;
+        }
       }
       continue;
     }
@@ -259,14 +277,33 @@ async function deliverReplies(params: {
       const file = new InputFile(media.buffer, media.fileName ?? "file");
       const caption = first ? (reply.text ?? undefined) : undefined;
       first = false;
+      const replyToMessageId =
+        replyToId && (replyToMode === "all" || !hasReplied)
+          ? replyToId
+          : undefined;
       if (kind === "image") {
-        await bot.api.sendPhoto(chatId, file, { caption });
+        await bot.api.sendPhoto(chatId, file, {
+          caption,
+          reply_to_message_id: replyToMessageId,
+        });
       } else if (kind === "video") {
-        await bot.api.sendVideo(chatId, file, { caption });
+        await bot.api.sendVideo(chatId, file, {
+          caption,
+          reply_to_message_id: replyToMessageId,
+        });
       } else if (kind === "audio") {
-        await bot.api.sendAudio(chatId, file, { caption });
+        await bot.api.sendAudio(chatId, file, {
+          caption,
+          reply_to_message_id: replyToMessageId,
+        });
       } else {
-        await bot.api.sendDocument(chatId, file, { caption });
+        await bot.api.sendDocument(chatId, file, {
+          caption,
+          reply_to_message_id: replyToMessageId,
+        });
+      }
+      if (replyToId && !hasReplied) {
+        hasReplied = true;
       }
     }
   }
@@ -315,6 +352,13 @@ function hasBotMention(msg: TelegramMessage, botUsername: string) {
   return false;
 }
 
+function resolveTelegramReplyId(raw?: string): number | undefined {
+  if (!raw) return undefined;
+  const parsed = Number(raw);
+  if (!Number.isFinite(parsed)) return undefined;
+  return parsed;
+}
+
 async function resolveMedia(
   ctx: TelegramContext,
   maxBytes: number,
@@ -363,10 +407,12 @@ async function sendTelegramText(
   chatId: string,
   text: string,
   runtime: RuntimeEnv,
+  opts?: { replyToMessageId?: number },
 ): Promise<number | undefined> {
   try {
     const res = await bot.api.sendMessage(chatId, text, {
       parse_mode: "Markdown",
+      reply_to_message_id: opts?.replyToMessageId,
     });
     return res.message_id;
   } catch (err) {
@@ -375,7 +421,9 @@ async function sendTelegramText(
       runtime.log?.(
         `telegram markdown parse failed; retrying without formatting: ${errText}`,
       );
-      const res = await bot.api.sendMessage(chatId, text, {});
+      const res = await bot.api.sendMessage(chatId, text, {
+        reply_to_message_id: opts?.replyToMessageId,
+      });
       return res.message_id;
     }
     throw err;
diff --git a/src/web/auto-reply.test.ts b/src/web/auto-reply.test.ts
index bb61f81c3..7731da36a 100644
--- a/src/web/auto-reply.test.ts
+++ b/src/web/auto-reply.test.ts
@@ -1417,7 +1417,7 @@ describe("web auto-reply", () => {
     expect(callArg.ReplyToId).toBe("q1");
     expect(callArg.ReplyToBody).toBe("original");
     expect(callArg.ReplyToSender).toBe("+1999");
-    expect(callArg.Body).toContain("[Replying to +1999]");
+    expect(callArg.Body).toContain("[Replying to +1999 id:q1]");
     expect(callArg.Body).toContain("original");
   });
 
diff --git a/src/web/auto-reply.ts b/src/web/auto-reply.ts
index 37f6e2df2..236154621 100644
--- a/src/web/auto-reply.ts
+++ b/src/web/auto-reply.ts
@@ -921,7 +921,8 @@ export async function monitorWebProvider(
     const formatReplyContext = (msg: WebInboundMsg) => {
       if (!msg.replyToBody) return null;
       const sender = msg.replyToSender ?? "unknown sender";
-      return `[Replying to ${sender}]\n${msg.replyToBody}\n[/Replying]`;
+      const idPart = msg.replyToId ? ` id:${msg.replyToId}` : "";
+      return `[Replying to ${sender}${idPart}]\n${msg.replyToBody}\n[/Replying]`;
     };
 
     const buildLine = (msg: WebInboundMsg) => {