feat: add reply tags and replyToMode

This commit is contained in:
Peter Steinberger
2026-01-02 23:18:41 +01:00
parent a9ff03acaf
commit 2c92ccd66e
19 changed files with 353 additions and 27 deletions

View File

@@ -18,6 +18,7 @@
- Config: remove `routing.groupChat.requireMention` + `telegram.requireMention`; use `whatsapp.groups`, `imessage.groups`, and `telegram.groups` defaults instead (run `clawdis doctor` to migrate). - Config: remove `routing.groupChat.requireMention` + `telegram.requireMention`; use `whatsapp.groups`, `imessage.groups`, and `telegram.groups` defaults instead (run `clawdis doctor` to migrate).
### Features ### Features
- Discord/Telegram: add reply tags (`[[reply_to_current]]`, `[[reply_to:<id>]]`) with per-provider `replyToMode` (off|first|all) for native threaded replies.
- Talk mode: continuous speech conversations (macOS/iOS/Android) with ElevenLabs TTS, reply directives, and optional interrupt-on-speech. - Talk mode: continuous speech conversations (macOS/iOS/Android) with ElevenLabs TTS, reply directives, and optional interrupt-on-speech.
- UI: add optional `ui.seamColor` accent to tint the Talk Mode side bubble (macOS/iOS/Android). - UI: add optional `ui.seamColor` accent to tint the Talk Mode side bubble (macOS/iOS/Android).
- Nix mode: opt-in declarative config + read-only settings UI when `CLAWDIS_NIX_MODE=1` (thanks @joshp123 for the persistence — earned my trust; I'll merge these going forward). - Nix mode: opt-in declarative config + read-only settings UI when `CLAWDIS_NIX_MODE=1` (thanks @joshp123 for the persistence — earned my trust; I'll merge these going forward).

View File

@@ -169,6 +169,7 @@ Set `telegram.enabled: false` to disable automatic startup.
telegram: { telegram: {
enabled: true, enabled: true,
botToken: "your-bot-token", botToken: "your-bot-token",
replyToMode: "off",
groups: { groups: {
"*": { requireMention: true }, "*": { requireMention: true },
"123456789": { requireMention: false } // group chat id "123456789": { requireMention: false } // group chat id
@@ -183,6 +184,7 @@ Set `telegram.enabled: false` to disable automatic startup.
} }
``` ```
Mention gating precedence (most specific wins): `telegram.groups.<chatId>.requireMention``telegram.groups."*".requireMention` → default `true`. Mention gating precedence (most specific wins): `telegram.groups.<chatId>.requireMention``telegram.groups."*".requireMention` → default `true`.
Reply threading is controlled via `telegram.replyToMode` (`off` | `first` | `all`) and reply tags in the model output.
### `discord` (bot transport) ### `discord` (bot transport)
@@ -195,6 +197,7 @@ Configure the Discord bot by setting the bot token and optional gating:
token: "your-bot-token", token: "your-bot-token",
mediaMaxMb: 8, // clamp inbound media size mediaMaxMb: 8, // clamp inbound media size
enableReactions: true, // allow agent-triggered reactions enableReactions: true, // allow agent-triggered reactions
replyToMode: "off", // off | first | all
slashCommand: { // user-installed app slash commands slashCommand: { // user-installed app slash commands
enabled: true, enabled: true,
name: "clawd", name: "clawd",
@@ -225,6 +228,7 @@ Configure the Discord bot by setting the bot token and optional gating:
``` ```
Clawdis starts Discord only when a `discord` config section exists. The token is resolved from `DISCORD_BOT_TOKEN` or `discord.token` (unless `discord.enabled` is `false`). Use `user:<id>` (DM) or `channel:<id>` (guild channel) when specifying delivery targets for cron/CLI commands. Clawdis starts Discord only when a `discord` config section exists. The token is resolved from `DISCORD_BOT_TOKEN` or `discord.token` (unless `discord.enabled` is `false`). Use `user:<id>` (DM) or `channel:<id>` (guild channel) when specifying delivery targets for cron/CLI commands.
Reply threading is controlled via `discord.replyToMode` (`off` | `first` | `all`) and reply tags in the model output.
Guild slugs are lowercase with spaces replaced by `-`; channel keys use the slugged channel name (no leading `#`). Prefer guild ids as keys to avoid rename ambiguity. Guild slugs are lowercase with spaces replaced by `-`; channel keys use the slugged channel name (no leading `#`). Prefer guild ids as keys to avoid rename ambiguity.
Use `discord.guilds."*"` for default per-guild settings. Use `discord.guilds."*"` for default per-guild settings.

View File

@@ -40,6 +40,7 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea
- File uploads supported up to the configured `discord.mediaMaxMb` (default 8 MB). - File uploads supported up to the configured `discord.mediaMaxMb` (default 8 MB).
- Mention-gated guild replies by default to avoid noisy bots. - Mention-gated guild replies by default to avoid noisy bots.
- Reply context is injected when a message references another message (quoted content + ids). - Reply context is injected when a message references another message (quoted content + ids).
- Native reply threading is **off by default**; enable with `discord.replyToMode` and reply tags.
## Config ## Config
@@ -50,6 +51,7 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea
token: "abc.123", token: "abc.123",
mediaMaxMb: 8, mediaMaxMb: 8,
enableReactions: true, enableReactions: true,
replyToMode: "off",
slashCommand: { slashCommand: {
enabled: true, enabled: true,
name: "clawd", name: "clawd",
@@ -92,6 +94,18 @@ Note: Guild context `[from:]` lines include `author.tag` + `id` to make ping-rea
- `mediaMaxMb`: clamp inbound media saved to disk. - `mediaMaxMb`: clamp inbound media saved to disk.
- `historyLimit`: number of recent guild messages to include as context when replying to a mention (default 20, `0` disables). - `historyLimit`: number of recent guild messages to include as context when replying to a mention (default 20, `0` disables).
- `enableReactions`: allow agent-triggered reactions via the `clawdis_discord` tool (default `true`). - `enableReactions`: allow agent-triggered reactions via the `clawdis_discord` tool (default `true`).
- `replyToMode`: `off` (default), `first`, or `all`. Applies only when the model includes a reply tag.
## Reply tags
To request a threaded reply, the model can include one tag in its output:
- `[[reply_to_current]]` — reply to the triggering Discord message.
- `[[reply_to:<id>]]` — reply to a specific message id from context/history.
Current message ids are appended to prompts as `[message_id: …]`; history entries already include ids.
Behavior is controlled by `discord.replyToMode`:
- `off`: ignore tags.
- `first`: only the first outbound chunk/attachment is a reply.
- `all`: every outbound chunk/attachment is a reply.
Allowlist matching notes: Allowlist matching notes:
- `allowFrom`/`users`/`groupChannels` accept ids, names, tags, or mentions like `<@id>`. - `allowFrom`/`users`/`groupChannels` accept ids, names, tags, or mentions like `<@id>`.

View File

@@ -113,5 +113,6 @@ pnpm clawdis health
- `docs/gateway.md` (Gateway runbook; flags, supervision, ports) - `docs/gateway.md` (Gateway runbook; flags, supervision, ports)
- `docs/configuration.md` (config schema + examples) - `docs/configuration.md` (config schema + examples)
- `docs/discord.md` and `docs/telegram.md` (reply tags + replyToMode settings)
- `docs/clawd.md` (personal assistant setup) - `docs/clawd.md` (personal assistant setup)
- `docs/clawdis-mac.md` (macOS app behavior; gateway lifecycle + “Attach only”) - `docs/clawdis-mac.md` (macOS app behavior; gateway lifecycle + “Attach only”)

View File

@@ -31,13 +31,13 @@ Status: ready for bot-mode use with grammY (long-polling by default; webhook sup
- Sees only messages sent after its added to a chat; no pre-history access. - Sees only messages sent after its added to a chat; no pre-history access.
- Cannot DM users first; they must initiate. Channels are receive-only unless the bot is an admin poster. - Cannot DM users first; they must initiate. Channels are receive-only unless the bot is an admin poster.
- File size caps follow Telegram Bot API (up to 2 GB for documents; smaller for some media types). - File size caps follow Telegram Bot API (up to 2 GB for documents; smaller for some media types).
- Typing indicators (`sendChatAction`) supported; outbound replies are sent as native replies to the triggering message (threaded where Telegram allows). - Typing indicators (`sendChatAction`) supported; native replies are **off by default** and enabled via `telegram.replyToMode` + reply tags.
## Planned implementation details ## Planned implementation details
- Library: grammY is the only client for send + gateway (fetch fallback removed); grammY throttler is enabled by default to stay under Bot API limits. - Library: grammY is the only client for send + gateway (fetch fallback removed); grammY throttler is enabled by default to stay under Bot API limits.
- Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, `Timestamp`, and reply-to metadata (`ReplyToId`, `ReplyToBody`, `ReplyToSender`) when the user replies; reply context is appended to `Body` as a `[Replying to ...]` block; groups require @bot mention by default (override per chat in config). - Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, `Timestamp`, and reply-to metadata (`ReplyToId`, `ReplyToBody`, `ReplyToSender`) when the user replies; reply context is appended to `Body` as a `[Replying to ...]` block (includes `id:` when available); groups require @bot mention by default (override per chat in config).
- Outbound: text and media (photo/video/audio/document) with optional caption; chunked to limits. Typing cue sent best-effort. - Outbound: text and media (photo/video/audio/document) with optional caption; chunked to limits. Typing cue sent best-effort.
- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups`, `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported. - Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups`, `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.replyToMode`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported.
- Mention gating precedence (most specific wins): `telegram.groups.<chatId>.requireMention``telegram.groups."*".requireMention` → default `true`. - Mention gating precedence (most specific wins): `telegram.groups.<chatId>.requireMention``telegram.groups."*".requireMention` → default `true`.
Example config: Example config:
@@ -46,6 +46,7 @@ Example config:
telegram: { telegram: {
enabled: true, enabled: true,
botToken: "123:abc", botToken: "123:abc",
replyToMode: "off",
groups: { groups: {
"*": { requireMention: true }, "*": { requireMention: true },
"123456789": { requireMention: false } // group chat id "123456789": { requireMention: false } // group chat id
@@ -66,6 +67,17 @@ Example config:
- Make the bot an admin if you need it to send in restricted groups or channels. - Make the bot an admin if you need it to send in restricted groups or channels.
- Mention the bot (`@yourbot`) or use commands to trigger; per-group overrides live in `telegram.groups` if you want always-on behavior. - Mention the bot (`@yourbot`) or use commands to trigger; per-group overrides live in `telegram.groups` if you want always-on behavior.
## Reply tags
To request a threaded reply, the model can include one tag in its output:
- `[[reply_to_current]]` — reply to the triggering Telegram message.
- `[[reply_to:<id>]]` — reply to a specific message id from context.
Current message ids are appended to prompts as `[message_id: …]`; reply context includes `id:` when available.
Behavior is controlled by `telegram.replyToMode`:
- `off`: ignore tags.
- `first`: only the first outbound chunk/attachment is a reply.
- `all`: every outbound chunk/attachment is a reply.
## Roadmap ## Roadmap
- ✅ Design and defaults (this doc) - ✅ Design and defaults (this doc)
- ✅ grammY long-poll gateway + text/media send - ✅ grammY long-poll gateway + text/media send

View File

@@ -54,7 +54,7 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number
- `Body` is the current message body with envelope. - `Body` is the current message body with envelope.
- Quoted reply context is **always appended**: - Quoted reply context is **always appended**:
``` ```
[Replying to +1555] [Replying to +1555 id:ABC123]
<quoted text or <media:...>> <quoted text or <media:...>>
[/Replying] [/Replying]
``` ```
@@ -81,8 +81,8 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number
- Group metadata cached 5 min (subject + participants). - Group metadata cached 5 min (subject + participants).
## Reply delivery (threading) ## Reply delivery (threading)
- Outbound replies are sent as **native replies** (quoted message). - WhatsApp Web sends standard messages (no quoted reply threading in the current gateway).
- Model does not need IDs for threading; gateway attaches quote. - Reply tags are ignored on this surface.
## Outbound send (text + media) ## Outbound send (text + media)
- Uses active web listener; error if gateway not running. - Uses active web listener; error if gateway not running.

View File

@@ -79,6 +79,7 @@ export type EmbeddedPiRunResult = {
text?: string; text?: string;
mediaUrl?: string; mediaUrl?: string;
mediaUrls?: string[]; mediaUrls?: string[];
replyToId?: string;
}>; }>;
meta: EmbeddedPiRunMeta; meta: EmbeddedPiRunMeta;
}; };

View File

@@ -82,6 +82,12 @@ export function buildAgentSystemPromptAppend(params: {
"Never send streaming/partial replies to external messaging surfaces; only final replies should be delivered there.", "Never send streaming/partial replies to external messaging surfaces; only final replies should be delivered there.",
"Clawdis handles message transport automatically; respond normally and your reply will be delivered to the current chat.", "Clawdis handles message transport automatically; respond normally and your reply will be delivered to the current chat.",
"", "",
"## Reply Tags",
"To request a native reply/quote on supported surfaces, include one tag in your reply:",
"- [[reply_to_current]] replies to the triggering message.",
"- [[reply_to:<id>]] replies to a specific message id when you have it.",
"Tags are stripped before sending; support depends on the current provider config.",
"",
]; ];
if (extraSystemPrompt) { if (extraSystemPrompt) {

View File

@@ -14,6 +14,7 @@ import {
import { drainSystemEvents } from "../infra/system-events.js"; import { drainSystemEvents } from "../infra/system-events.js";
import { import {
extractQueueDirective, extractQueueDirective,
extractReplyToTag,
extractThinkDirective, extractThinkDirective,
extractVerboseDirective, extractVerboseDirective,
getReplyFromConfig, getReplyFromConfig,
@@ -96,6 +97,90 @@ describe("directive parsing", () => {
expect(res.cleaned).toBe("please now"); expect(res.cleaned).toBe("please now");
}); });
it("extracts reply_to_current tag", () => {
const res = extractReplyToTag("ok [[reply_to_current]]", "msg-1");
expect(res.replyToId).toBe("msg-1");
expect(res.cleaned).toBe("ok");
});
it("extracts reply_to id tag", () => {
const res = extractReplyToTag("see [[reply_to:12345]] now", "msg-1");
expect(res.replyToId).toBe("12345");
expect(res.cleaned).toBe("see now");
});
it("strips reply tags and maps reply_to_current to MessageSid", async () => {
await withTempHome(async (home) => {
vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
payloads: [{ text: "hello [[reply_to_current]]" }],
meta: {
durationMs: 5,
agentMeta: { sessionId: "s", provider: "p", model: "m" },
},
});
const res = await getReplyFromConfig(
{
Body: "ping",
From: "+1004",
To: "+2000",
MessageSid: "msg-123",
},
{},
{
agent: {
model: "anthropic/claude-opus-4-5",
workspace: path.join(home, "clawd"),
},
whatsapp: { allowFrom: ["*"] },
session: { store: path.join(home, "sessions.json") },
},
);
const payload = Array.isArray(res) ? res[0] : res;
expect(payload?.text).toBe("hello");
expect(payload?.replyToId).toBe("msg-123");
});
});
it("prefers explicit reply_to id over reply_to_current", async () => {
await withTempHome(async (home) => {
vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
payloads: [
{
text: "hi [[reply_to_current]] [[reply_to:abc-456]]",
},
],
meta: {
durationMs: 5,
agentMeta: { sessionId: "s", provider: "p", model: "m" },
},
});
const res = await getReplyFromConfig(
{
Body: "ping",
From: "+1004",
To: "+2000",
MessageSid: "msg-123",
},
{},
{
agent: {
model: "anthropic/claude-opus-4-5",
workspace: path.join(home, "clawd"),
},
whatsapp: { allowFrom: ["*"] },
session: { store: path.join(home, "sessions.json") },
},
);
const payload = Array.isArray(res) ? res[0] : res;
expect(payload?.text).toBe("hi");
expect(payload?.replyToId).toBe("abc-456");
});
});
it("applies inline think and still runs agent content", async () => { it("applies inline think and still runs agent content", async () => {
await withTempHome(async (home) => { await withTempHome(async (home) => {
vi.mocked(runEmbeddedPiAgent).mockResolvedValue({ vi.mocked(runEmbeddedPiAgent).mockResolvedValue({

View File

@@ -164,6 +164,39 @@ export function extractQueueDirective(body?: string): {
}; };
} }
export function extractReplyToTag(
text?: string,
currentMessageId?: string,
): {
cleaned: string;
replyToId?: string;
hasTag: boolean;
} {
if (!text) return { cleaned: "", hasTag: false };
let cleaned = text;
let replyToId: string | undefined;
let hasTag = false;
const currentMatch = cleaned.match(/\[\[reply_to_current\]\]/i);
if (currentMatch) {
cleaned = cleaned.replace(/\[\[reply_to_current\]\]/gi, " ");
hasTag = true;
if (currentMessageId?.trim()) {
replyToId = currentMessageId.trim();
}
}
const idMatch = cleaned.match(/\[\[reply_to:([^\]\n]+)\]\]/i);
if (idMatch?.[1]) {
cleaned = cleaned.replace(/\[\[reply_to:[^\]\n]+\]\]/gi, " ");
replyToId = idMatch[1].trim();
hasTag = true;
}
cleaned = cleaned.replace(/\s+/g, " ").trim();
return { cleaned, replyToId, hasTag };
}
function isAbortTrigger(text?: string): boolean { function isAbortTrigger(text?: string): boolean {
if (!text) return false; if (!text) return false;
const normalized = text.trim().toLowerCase(); const normalized = text.trim().toLowerCase();
@@ -1123,6 +1156,12 @@ export async function getReplyFromConfig(
ABORT_MEMORY.set(abortKey, false); ABORT_MEMORY.set(abortKey, false);
} }
} }
const messageIdHint = sessionCtx.MessageSid?.trim()
? `[message_id: ${sessionCtx.MessageSid.trim()}]`
: "";
if (messageIdHint) {
prefixedBodyBase = `${prefixedBodyBase}\n${messageIdHint}`;
}
// Prepend queued system events (transitions only) and (for new main sessions) a provider snapshot. // Prepend queued system events (transitions only) and (for new main sessions) a provider snapshot.
// Token efficiency: we filter out periodic/heartbeat noise and keep the lines compact. // Token efficiency: we filter out periodic/heartbeat noise and keep the lines compact.
@@ -1399,9 +1438,28 @@ export async function getReplyFromConfig(
return [{ ...payload, text: stripped.text }]; return [{ ...payload, text: stripped.text }];
}); });
if (sanitizedPayloads.length === 0) return undefined; const replyTaggedPayloads: ReplyPayload[] = sanitizedPayloads
.map((payload) => {
const { cleaned, replyToId } = extractReplyToTag(
payload.text,
sessionCtx.MessageSid,
);
return {
...payload,
text: cleaned ? cleaned : undefined,
replyToId: replyToId ?? payload.replyToId,
};
})
.filter(
(payload) =>
payload.text ||
payload.mediaUrl ||
(payload.mediaUrls && payload.mediaUrls.length > 0),
);
const shouldSignalTyping = sanitizedPayloads.some((payload) => { if (replyTaggedPayloads.length === 0) return undefined;
const shouldSignalTyping = replyTaggedPayloads.some((payload) => {
const trimmed = payload.text?.trim(); const trimmed = payload.text?.trim();
if (trimmed && trimmed !== SILENT_REPLY_TOKEN) return true; if (trimmed && trimmed !== SILENT_REPLY_TOKEN) return true;
if (payload.mediaUrl) return true; if (payload.mediaUrl) return true;
@@ -1456,7 +1514,7 @@ export async function getReplyFromConfig(
} }
// If verbose is enabled and this is a new session, prepend a session hint. // If verbose is enabled and this is a new session, prepend a session hint.
let finalPayloads = sanitizedPayloads; let finalPayloads = replyTaggedPayloads;
if (resolvedVerboseLevel === "on" && isNewSession) { if (resolvedVerboseLevel === "on" && isNewSession) {
finalPayloads = [ finalPayloads = [
{ text: `🧭 New session: ${sessionIdFinal}` }, { text: `🧭 New session: ${sessionIdFinal}` },

View File

@@ -9,4 +9,5 @@ export type ReplyPayload = {
text?: string; text?: string;
mediaUrl?: string; mediaUrl?: string;
mediaUrls?: string[]; mediaUrls?: string[];
replyToId?: string;
}; };

View File

@@ -18,6 +18,7 @@ export const isNixMode = process.env.CLAWDIS_NIX_MODE === "1";
export type ReplyMode = "text" | "command"; export type ReplyMode = "text" | "command";
export type SessionScope = "per-sender" | "global"; export type SessionScope = "per-sender" | "global";
export type ReplyToMode = "off" | "first" | "all";
export type SessionConfig = { export type SessionConfig = {
scope?: SessionScope; scope?: SessionScope;
@@ -166,6 +167,8 @@ export type TelegramConfig = {
botToken?: string; botToken?: string;
/** Path to file containing bot token (for secret managers like agenix) */ /** Path to file containing bot token (for secret managers like agenix) */
tokenFile?: string; tokenFile?: string;
/** Control reply threading when reply tags are present (off|first|all). */
replyToMode?: ReplyToMode;
groups?: Record< groups?: Record<
string, string,
{ {
@@ -222,6 +225,8 @@ export type DiscordConfig = {
historyLimit?: number; historyLimit?: number;
/** Allow agent-triggered Discord reactions (default: true). */ /** Allow agent-triggered Discord reactions (default: true). */
enableReactions?: boolean; enableReactions?: boolean;
/** Control reply threading when reply tags are present (off|first|all). */
replyToMode?: ReplyToMode;
slashCommand?: DiscordSlashCommandConfig; slashCommand?: DiscordSlashCommandConfig;
dm?: DiscordDmConfig; dm?: DiscordDmConfig;
/** New per-guild config keyed by guild id or slug. */ /** New per-guild config keyed by guild id or slug. */
@@ -650,6 +655,11 @@ const GroupChatSchema = z
.optional(); .optional();
const QueueModeSchema = z.union([z.literal("queue"), z.literal("interrupt")]); const QueueModeSchema = z.union([z.literal("queue"), z.literal("interrupt")]);
const ReplyToModeSchema = z.union([
z.literal("off"),
z.literal("first"),
z.literal("all"),
]);
const QueueModeBySurfaceSchema = z const QueueModeBySurfaceSchema = z
.object({ .object({
@@ -962,6 +972,7 @@ const ClawdisSchema = z.object({
enabled: z.boolean().optional(), enabled: z.boolean().optional(),
botToken: z.string().optional(), botToken: z.string().optional(),
tokenFile: z.string().optional(), tokenFile: z.string().optional(),
replyToMode: ReplyToModeSchema.optional(),
groups: z groups: z
.record( .record(
z.string(), z.string(),
@@ -995,6 +1006,7 @@ const ClawdisSchema = z.object({
mediaMaxMb: z.number().positive().optional(), mediaMaxMb: z.number().positive().optional(),
historyLimit: z.number().int().min(0).optional(), historyLimit: z.number().int().min(0).optional(),
enableReactions: z.boolean().optional(), enableReactions: z.boolean().optional(),
replyToMode: ReplyToModeSchema.optional(),
dm: z dm: z
.object({ .object({
enabled: z.boolean().optional(), enabled: z.boolean().optional(),

View File

@@ -14,7 +14,7 @@ import { formatAgentEnvelope } from "../auto-reply/envelope.js";
import { getReplyFromConfig } from "../auto-reply/reply.js"; import { getReplyFromConfig } from "../auto-reply/reply.js";
import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js"; import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
import type { ReplyPayload } from "../auto-reply/types.js"; import type { ReplyPayload } from "../auto-reply/types.js";
import type { DiscordSlashCommandConfig } from "../config/config.js"; import type { DiscordSlashCommandConfig, ReplyToMode } from "../config/config.js";
import { loadConfig } from "../config/config.js"; import { loadConfig } from "../config/config.js";
import { resolveStorePath, updateLastRoute } from "../config/sessions.js"; import { resolveStorePath, updateLastRoute } from "../config/sessions.js";
import { danger, isVerbose, logVerbose, warn } from "../globals.js"; import { danger, isVerbose, logVerbose, warn } from "../globals.js";
@@ -32,6 +32,7 @@ export type MonitorDiscordOpts = {
slashCommand?: DiscordSlashCommandConfig; slashCommand?: DiscordSlashCommandConfig;
mediaMaxMb?: number; mediaMaxMb?: number;
historyLimit?: number; historyLimit?: number;
replyToMode?: ReplyToMode;
}; };
type DiscordMediaInfo = { type DiscordMediaInfo = {
@@ -117,6 +118,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
0, 0,
opts.historyLimit ?? cfg.discord?.historyLimit ?? 20, opts.historyLimit ?? cfg.discord?.historyLimit ?? 20,
); );
const replyToMode = opts.replyToMode ?? cfg.discord?.replyToMode ?? "off";
const dmEnabled = dmConfig?.enabled ?? true; const dmEnabled = dmConfig?.enabled ?? true;
const groupDmEnabled = dmConfig?.groupEnabled ?? false; const groupDmEnabled = dmConfig?.groupEnabled ?? false;
const groupDmChannels = dmConfig?.groupChannels; const groupDmChannels = dmConfig?.groupChannels;
@@ -417,6 +419,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
target: ctxPayload.To, target: ctxPayload.To,
token, token,
runtime, runtime,
replyToMode,
}); });
if (isVerbose()) { if (isVerbose()) {
logVerbose( logVerbose(
@@ -984,20 +987,34 @@ async function deliverReplies({
target, target,
token, token,
runtime, runtime,
replyToMode,
}: { }: {
replies: ReplyPayload[]; replies: ReplyPayload[];
target: string; target: string;
token: string; token: string;
runtime: RuntimeEnv; runtime: RuntimeEnv;
replyToMode: ReplyToMode;
}) { }) {
let hasReplied = false;
for (const payload of replies) { for (const payload of replies) {
const mediaList = const mediaList =
payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []); payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
const text = payload.text ?? ""; const text = payload.text ?? "";
const replyToId =
replyToMode === "off" ? undefined : payload.replyToId?.trim();
if (!text && mediaList.length === 0) continue; if (!text && mediaList.length === 0) continue;
if (mediaList.length === 0) { if (mediaList.length === 0) {
for (const chunk of chunkText(text, 2000)) { for (const chunk of chunkText(text, 2000)) {
await sendMessageDiscord(target, chunk, { token }); await sendMessageDiscord(target, chunk, {
token,
replyTo:
replyToId && (replyToMode === "all" || !hasReplied)
? replyToId
: undefined,
});
if (replyToId && !hasReplied) {
hasReplied = true;
}
} }
} else { } else {
let first = true; let first = true;
@@ -1007,7 +1024,14 @@ async function deliverReplies({
await sendMessageDiscord(target, caption, { await sendMessageDiscord(target, caption, {
token, token,
mediaUrl, mediaUrl,
replyTo:
replyToId && (replyToMode === "all" || !hasReplied)
? replyToId
: undefined,
}); });
if (replyToId && !hasReplied) {
hasReplied = true;
}
} }
} }
runtime.log?.(`delivered reply to ${target}`); runtime.log?.(`delivered reply to ${target}`);

View File

@@ -82,4 +82,37 @@ describe("sendMessageDiscord", () => {
}), }),
); );
}); });
it("includes message_reference when replying", async () => {
const { rest, postMock } = makeRest();
postMock.mockResolvedValue({ id: "msg1", channel_id: "789" });
await sendMessageDiscord("channel:789", "hello", {
rest,
token: "t",
replyTo: "orig-123",
});
const body = postMock.mock.calls[0]?.[1]?.body;
expect(body?.message_reference).toEqual({
message_id: "orig-123",
fail_if_not_exists: false,
});
});
it("replies only on the first chunk", async () => {
const { rest, postMock } = makeRest();
postMock.mockResolvedValue({ id: "msg1", channel_id: "789" });
await sendMessageDiscord("channel:789", "a".repeat(2001), {
rest,
token: "t",
replyTo: "orig-123",
});
expect(postMock).toHaveBeenCalledTimes(2);
const firstBody = postMock.mock.calls[0]?.[1]?.body;
const secondBody = postMock.mock.calls[1]?.[1]?.body;
expect(firstBody?.message_reference).toEqual({
message_id: "orig-123",
fail_if_not_exists: false,
});
expect(secondBody?.message_reference).toBeUndefined();
});
}); });

View File

@@ -22,6 +22,7 @@ type DiscordSendOpts = {
mediaUrl?: string; mediaUrl?: string;
verbose?: boolean; verbose?: boolean;
rest?: REST; rest?: REST;
replyTo?: string;
}; };
export type DiscordSendResult = { export type DiscordSendResult = {
@@ -105,22 +106,35 @@ async function resolveChannelId(
return { channelId: dmChannel.id, dm: true }; return { channelId: dmChannel.id, dm: true };
} }
async function sendDiscordText(rest: REST, channelId: string, text: string) { async function sendDiscordText(
rest: REST,
channelId: string,
text: string,
replyTo?: string,
) {
if (!text.trim()) { if (!text.trim()) {
throw new Error("Message must be non-empty for Discord sends"); throw new Error("Message must be non-empty for Discord sends");
} }
const messageReference = replyTo
? { message_id: replyTo, fail_if_not_exists: false }
: undefined;
if (text.length <= DISCORD_TEXT_LIMIT) { if (text.length <= DISCORD_TEXT_LIMIT) {
const res = (await rest.post(Routes.channelMessages(channelId), { const res = (await rest.post(Routes.channelMessages(channelId), {
body: { content: text }, body: { content: text, message_reference: messageReference },
})) as { id: string; channel_id: string }; })) as { id: string; channel_id: string };
return res; return res;
} }
const chunks = chunkText(text, DISCORD_TEXT_LIMIT); const chunks = chunkText(text, DISCORD_TEXT_LIMIT);
let last: { id: string; channel_id: string } | null = null; let last: { id: string; channel_id: string } | null = null;
let isFirst = true;
for (const chunk of chunks) { for (const chunk of chunks) {
last = (await rest.post(Routes.channelMessages(channelId), { last = (await rest.post(Routes.channelMessages(channelId), {
body: { content: chunk }, body: {
content: chunk,
message_reference: isFirst ? messageReference : undefined,
},
})) as { id: string; channel_id: string }; })) as { id: string; channel_id: string };
isFirst = false;
} }
if (!last) { if (!last) {
throw new Error("Discord send failed (empty chunk result)"); throw new Error("Discord send failed (empty chunk result)");
@@ -133,13 +147,18 @@ async function sendDiscordMedia(
channelId: string, channelId: string,
text: string, text: string,
mediaUrl: string, mediaUrl: string,
replyTo?: string,
) { ) {
const media = await loadWebMedia(mediaUrl); const media = await loadWebMedia(mediaUrl);
const caption = const caption =
text.length > DISCORD_TEXT_LIMIT ? text.slice(0, DISCORD_TEXT_LIMIT) : text; text.length > DISCORD_TEXT_LIMIT ? text.slice(0, DISCORD_TEXT_LIMIT) : text;
const messageReference = replyTo
? { message_id: replyTo, fail_if_not_exists: false }
: undefined;
const res = (await rest.post(Routes.channelMessages(channelId), { const res = (await rest.post(Routes.channelMessages(channelId), {
body: { body: {
content: caption || undefined, content: caption || undefined,
message_reference: messageReference,
}, },
files: [ files: [
{ {
@@ -171,9 +190,15 @@ export async function sendMessageDiscord(
| { id: string | null; channel_id: string }; | { id: string | null; channel_id: string };
if (opts.mediaUrl) { if (opts.mediaUrl) {
result = await sendDiscordMedia(rest, channelId, text, opts.mediaUrl); result = await sendDiscordMedia(
rest,
channelId,
text,
opts.mediaUrl,
opts.replyTo,
);
} else { } else {
result = await sendDiscordText(rest, channelId, text); result = await sendDiscordText(rest, channelId, text, opts.replyTo);
} }
return { return {

View File

@@ -152,7 +152,7 @@ describe("createTelegramBot", () => {
expect(replySpy).toHaveBeenCalledTimes(1); expect(replySpy).toHaveBeenCalledTimes(1);
const payload = replySpy.mock.calls[0][0]; const payload = replySpy.mock.calls[0][0];
expect(payload.Body).toContain("[Replying to Ada]"); expect(payload.Body).toContain("[Replying to Ada id:9001]");
expect(payload.Body).toContain("Can you summarize this?"); expect(payload.Body).toContain("Can you summarize this?");
expect(payload.ReplyToId).toBe("9001"); expect(payload.ReplyToId).toBe("9001");
expect(payload.ReplyToBody).toBe("Can you summarize this?"); expect(payload.ReplyToBody).toBe("Can you summarize this?");

View File

@@ -9,6 +9,7 @@ import { chunkText } from "../auto-reply/chunk.js";
import { formatAgentEnvelope } from "../auto-reply/envelope.js"; import { formatAgentEnvelope } from "../auto-reply/envelope.js";
import { getReplyFromConfig } from "../auto-reply/reply.js"; import { getReplyFromConfig } from "../auto-reply/reply.js";
import type { ReplyPayload } from "../auto-reply/types.js"; import type { ReplyPayload } from "../auto-reply/types.js";
import type { ReplyToMode } from "../config/config.js";
import { loadConfig } from "../config/config.js"; import { loadConfig } from "../config/config.js";
import { resolveStorePath, updateLastRoute } from "../config/sessions.js"; import { resolveStorePath, updateLastRoute } from "../config/sessions.js";
import { danger, isVerbose, logVerbose } from "../globals.js"; import { danger, isVerbose, logVerbose } from "../globals.js";
@@ -39,6 +40,7 @@ export type TelegramBotOptions = {
requireMention?: boolean; requireMention?: boolean;
allowFrom?: Array<string | number>; allowFrom?: Array<string | number>;
mediaMaxMb?: number; mediaMaxMb?: number;
replyToMode?: ReplyToMode;
proxyFetch?: typeof fetch; proxyFetch?: typeof fetch;
}; };
@@ -59,6 +61,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
const cfg = loadConfig(); const cfg = loadConfig();
const allowFrom = opts.allowFrom ?? cfg.telegram?.allowFrom; const allowFrom = opts.allowFrom ?? cfg.telegram?.allowFrom;
const replyToMode = opts.replyToMode ?? cfg.telegram?.replyToMode ?? "off";
const mediaMaxBytes = const mediaMaxBytes =
(opts.mediaMaxMb ?? cfg.telegram?.mediaMaxMb ?? 5) * 1024 * 1024; (opts.mediaMaxMb ?? cfg.telegram?.mediaMaxMb ?? 5) * 1024 * 1024;
const logger = getChildLogger({ module: "telegram-auto-reply" }); const logger = getChildLogger({ module: "telegram-auto-reply" });
@@ -137,7 +140,9 @@ export function createTelegramBot(opts: TelegramBotOptions) {
).trim(); ).trim();
if (!rawBody) return; if (!rawBody) return;
const replySuffix = replyTarget const replySuffix = replyTarget
? `\n\n[Replying to ${replyTarget.sender}]\n${replyTarget.body}\n[/Replying]` ? `\n\n[Replying to ${replyTarget.sender}${
replyTarget.id ? ` id:${replyTarget.id}` : ""
}]\n${replyTarget.body}\n[/Replying]`
: ""; : "";
const body = formatAgentEnvelope({ const body = formatAgentEnvelope({
surface: "Telegram", surface: "Telegram",
@@ -211,6 +216,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
token: opts.token, token: opts.token,
runtime, runtime,
bot, bot,
replyToMode,
}); });
} catch (err) { } catch (err) {
runtime.error?.(danger(`handler failed: ${String(err)}`)); runtime.error?.(danger(`handler failed: ${String(err)}`));
@@ -233,13 +239,17 @@ async function deliverReplies(params: {
token: string; token: string;
runtime: RuntimeEnv; runtime: RuntimeEnv;
bot: Bot; bot: Bot;
replyToMode: ReplyToMode;
}) { }) {
const { replies, chatId, runtime, bot } = params; const { replies, chatId, runtime, bot, replyToMode } = params;
let hasReplied = false;
for (const reply of replies) { for (const reply of replies) {
if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) { if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) {
runtime.error?.(danger("reply missing text/media")); runtime.error?.(danger("reply missing text/media"));
continue; continue;
} }
const replyToId =
replyToMode === "off" ? undefined : resolveTelegramReplyId(reply.replyToId);
const mediaList = reply.mediaUrls?.length const mediaList = reply.mediaUrls?.length
? reply.mediaUrls ? reply.mediaUrls
: reply.mediaUrl : reply.mediaUrl
@@ -247,7 +257,15 @@ async function deliverReplies(params: {
: []; : [];
if (mediaList.length === 0) { if (mediaList.length === 0) {
for (const chunk of chunkText(reply.text || "", 4000)) { for (const chunk of chunkText(reply.text || "", 4000)) {
await sendTelegramText(bot, chatId, chunk, runtime); await sendTelegramText(bot, chatId, chunk, runtime, {
replyToMessageId:
replyToId && (replyToMode === "all" || !hasReplied)
? replyToId
: undefined,
});
if (replyToId && !hasReplied) {
hasReplied = true;
}
} }
continue; continue;
} }
@@ -259,14 +277,33 @@ async function deliverReplies(params: {
const file = new InputFile(media.buffer, media.fileName ?? "file"); const file = new InputFile(media.buffer, media.fileName ?? "file");
const caption = first ? (reply.text ?? undefined) : undefined; const caption = first ? (reply.text ?? undefined) : undefined;
first = false; first = false;
const replyToMessageId =
replyToId && (replyToMode === "all" || !hasReplied)
? replyToId
: undefined;
if (kind === "image") { if (kind === "image") {
await bot.api.sendPhoto(chatId, file, { caption }); await bot.api.sendPhoto(chatId, file, {
caption,
reply_to_message_id: replyToMessageId,
});
} else if (kind === "video") { } else if (kind === "video") {
await bot.api.sendVideo(chatId, file, { caption }); await bot.api.sendVideo(chatId, file, {
caption,
reply_to_message_id: replyToMessageId,
});
} else if (kind === "audio") { } else if (kind === "audio") {
await bot.api.sendAudio(chatId, file, { caption }); await bot.api.sendAudio(chatId, file, {
caption,
reply_to_message_id: replyToMessageId,
});
} else { } else {
await bot.api.sendDocument(chatId, file, { caption }); await bot.api.sendDocument(chatId, file, {
caption,
reply_to_message_id: replyToMessageId,
});
}
if (replyToId && !hasReplied) {
hasReplied = true;
} }
} }
} }
@@ -315,6 +352,13 @@ function hasBotMention(msg: TelegramMessage, botUsername: string) {
return false; return false;
} }
function resolveTelegramReplyId(raw?: string): number | undefined {
if (!raw) return undefined;
const parsed = Number(raw);
if (!Number.isFinite(parsed)) return undefined;
return parsed;
}
async function resolveMedia( async function resolveMedia(
ctx: TelegramContext, ctx: TelegramContext,
maxBytes: number, maxBytes: number,
@@ -363,10 +407,12 @@ async function sendTelegramText(
chatId: string, chatId: string,
text: string, text: string,
runtime: RuntimeEnv, runtime: RuntimeEnv,
opts?: { replyToMessageId?: number },
): Promise<number | undefined> { ): Promise<number | undefined> {
try { try {
const res = await bot.api.sendMessage(chatId, text, { const res = await bot.api.sendMessage(chatId, text, {
parse_mode: "Markdown", parse_mode: "Markdown",
reply_to_message_id: opts?.replyToMessageId,
}); });
return res.message_id; return res.message_id;
} catch (err) { } catch (err) {
@@ -375,7 +421,9 @@ async function sendTelegramText(
runtime.log?.( runtime.log?.(
`telegram markdown parse failed; retrying without formatting: ${errText}`, `telegram markdown parse failed; retrying without formatting: ${errText}`,
); );
const res = await bot.api.sendMessage(chatId, text, {}); const res = await bot.api.sendMessage(chatId, text, {
reply_to_message_id: opts?.replyToMessageId,
});
return res.message_id; return res.message_id;
} }
throw err; throw err;

View File

@@ -1417,7 +1417,7 @@ describe("web auto-reply", () => {
expect(callArg.ReplyToId).toBe("q1"); expect(callArg.ReplyToId).toBe("q1");
expect(callArg.ReplyToBody).toBe("original"); expect(callArg.ReplyToBody).toBe("original");
expect(callArg.ReplyToSender).toBe("+1999"); expect(callArg.ReplyToSender).toBe("+1999");
expect(callArg.Body).toContain("[Replying to +1999]"); expect(callArg.Body).toContain("[Replying to +1999 id:q1]");
expect(callArg.Body).toContain("original"); expect(callArg.Body).toContain("original");
}); });

View File

@@ -921,7 +921,8 @@ export async function monitorWebProvider(
const formatReplyContext = (msg: WebInboundMsg) => { const formatReplyContext = (msg: WebInboundMsg) => {
if (!msg.replyToBody) return null; if (!msg.replyToBody) return null;
const sender = msg.replyToSender ?? "unknown sender"; const sender = msg.replyToSender ?? "unknown sender";
return `[Replying to ${sender}]\n${msg.replyToBody}\n[/Replying]`; const idPart = msg.replyToId ? ` id:${msg.replyToId}` : "";
return `[Replying to ${sender}${idPart}]\n${msg.replyToBody}\n[/Replying]`;
}; };
const buildLine = (msg: WebInboundMsg) => { const buildLine = (msg: WebInboundMsg) => {