refactor: move text chunk limits to providers

2026-01-03 01:27:37 +01:00
parent 75a9cd83a0
commit f5189cc897
6 changed files with 71 additions and 58 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -55,7 +55,7 @@
 ### Fixes
 - Chat UI: keep the chat scrolled to the latest message after switching sessions.
 - Auto-reply: stream completed reply blocks as soon as they finish (configurable default + break); skip empty tool-only blocks unless verbose.
- Messages: make outbound text chunk limits configurable (defaults remain 4000/Discord 2000).
+- Providers: make outbound text chunk limits configurable via `*.textChunkLimit` (defaults remain 4000/Discord 2000).
 - CLI onboarding: persist gateway token in config so local CLI auth works; recommend auth Off unless you need multi-machine access.
 - Control UI: accept a `?token=` URL param to auto-fill Gateway auth; onboarding now opens the dashboard with token auth when configured.
 - Agent prompt: remove hardcoded user name in system prompt example.
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -82,7 +82,10 @@ Allowlist of E.164 phone numbers that may trigger WhatsApp auto-replies.

 ```json5
 {
-  whatsapp: { allowFrom: ["+15555550123", "+447700900123"] }
+  whatsapp: {
+    allowFrom: ["+15555550123", "+447700900123"],
+    textChunkLimit: 4000 // optional outbound chunk size (chars)
+  }
 }
 ```

@@ -169,6 +172,7 @@ Set `telegram.enabled: false` to disable automatic startup.
  telegram: {
    enabled: true,
    botToken: "your-bot-token",
+    textChunkLimit: 4000,                  // optional outbound chunk size (chars)
    replyToMode: "off",
    groups: {
      "*": { requireMention: true },
@@ -195,6 +199,7 @@ Configure the Discord bot by setting the bot token and optional gating:
  discord: {
    enabled: true,
    token: "your-bot-token",
+    textChunkLimit: 2000,                   // optional outbound chunk size (chars)
    mediaMaxMb: 8,                          // clamp inbound media size
    enableReactions: true,                  // allow agent-triggered reactions
    replyToMode: "off",                     // off | first | all
@@ -232,6 +237,20 @@ Reply threading is controlled via `discord.replyToMode` (`off` | `first` | `all`
 Guild slugs are lowercase with spaces replaced by `-`; channel keys use the slugged channel name (no leading `#`). Prefer guild ids as keys to avoid rename ambiguity.
 Use `discord.guilds."*"` for default per-guild settings.

+### `signal` (signal-cli JSON-RPC)
+
+Clawdis can send/receive Signal via `signal-cli` (daemon or existing HTTP URL).
+
+```json5
+{
+  signal: {
+    enabled: true,
+    textChunkLimit: 4000,                   // optional outbound chunk size (chars)
+    mediaMaxMb: 8
+  }
+}
+```
+
 ### `imessage` (imsg CLI)

 Clawdis spawns `imsg rpc` (JSON-RPC over stdio). No daemon or port required.
@@ -242,6 +261,7 @@ Clawdis spawns `imsg rpc` (JSON-RPC over stdio). No daemon or port required.
    enabled: true,
    cliPath: "imsg",
    dbPath: "~/Library/Messages/chat.db",
+    textChunkLimit: 4000,                   // optional outbound chunk size (chars)
    allowFrom: ["+15555550123", "user@example.com", "chat_id:123"],
    groups: {
      "*": { requireMention: true },
@@ -276,23 +296,14 @@ Default: `~/clawd`.
 ### `messages`

 Controls inbound/outbound prefixes and timestamps.
+Outbound text chunking is configured per provider via `*.textChunkLimit` (e.g. `whatsapp.textChunkLimit`, `telegram.textChunkLimit`).

 ```json5
 {
  messages: {
    messagePrefix: "[clawdis]",
    responsePrefix: "🦞",
-    timestampPrefix: "Europe/London",
-    // outbound chunk size (chars); defaults vary by surface (e.g. 4000, Discord 2000)
-    textChunkLimit: 4000,
-    // optional per-surface overrides
-    textChunkLimitBySurface: {
-      whatsapp: 4000,
-      telegram: 4000,
-      signal: 4000,
-      imessage: 4000,
-      discord: 2000
-    }
+    timestampPrefix: "Europe/London"
  }
 }
 ```
--- a/src/auto-reply/chunk.test.ts
+++ b/src/auto-reply/chunk.test.ts
@@ -55,20 +55,15 @@ describe("resolveTextChunkLimit", () => {
    expect(resolveTextChunkLimit(undefined, "discord")).toBe(2000);
  });

-  it("supports a global override", () => {
-    const cfg = { messages: { textChunkLimit: 1234 } };
-    expect(resolveTextChunkLimit(cfg, "whatsapp")).toBe(1234);
-    expect(resolveTextChunkLimit(cfg, "discord")).toBe(1234);
-  });
-
-  it("prefers per-surface overrides over global", () => {
-    const cfg = {
-      messages: {
-        textChunkLimit: 1234,
-        textChunkLimitBySurface: { discord: 111 },
-      },
-    };
-    expect(resolveTextChunkLimit(cfg, "discord")).toBe(111);
+  it("supports provider overrides", () => {
+    const cfg = { telegram: { textChunkLimit: 1234 } };
+    expect(resolveTextChunkLimit(cfg, "whatsapp")).toBe(4000);
    expect(resolveTextChunkLimit(cfg, "telegram")).toBe(1234);
  });
+
+  it("uses the matching provider override", () => {
+    const cfg = { discord: { textChunkLimit: 111 } };
+    expect(resolveTextChunkLimit(cfg, "discord")).toBe(111);
+    expect(resolveTextChunkLimit(cfg, "telegram")).toBe(4000);
+  });
 });
--- a/src/auto-reply/chunk.ts
+++ b/src/auto-reply/chunk.ts
@@ -22,19 +22,21 @@ const DEFAULT_CHUNK_LIMIT_BY_SURFACE: Record<TextChunkSurface, number> = {
 };

 export function resolveTextChunkLimit(
-  cfg: Pick<ClawdisConfig, "messages"> | undefined,
+  cfg: ClawdisConfig | undefined,
  surface?: TextChunkSurface,
 ): number {
-  const surfaceOverride = surface
-    ? cfg?.messages?.textChunkLimitBySurface?.[surface]
-    : undefined;
+  const surfaceOverride = (() => {
+    if (!surface) return undefined;
+    if (surface === "whatsapp") return cfg?.whatsapp?.textChunkLimit;
+    if (surface === "telegram") return cfg?.telegram?.textChunkLimit;
+    if (surface === "discord") return cfg?.discord?.textChunkLimit;
+    if (surface === "signal") return cfg?.signal?.textChunkLimit;
+    if (surface === "imessage") return cfg?.imessage?.textChunkLimit;
+    return undefined;
+  })();
  if (typeof surfaceOverride === "number" && surfaceOverride > 0) {
    return surfaceOverride;
  }
-  const globalOverride = cfg?.messages?.textChunkLimit;
-  if (typeof globalOverride === "number" && globalOverride > 0) {
-    return globalOverride;
-  }
  if (surface) return DEFAULT_CHUNK_LIMIT_BY_SURFACE[surface];
  return 4000;
 }
--- a/src/config/config.ts
+++ b/src/config/config.ts
@@ -62,6 +62,8 @@ export type WebConfig = {
 export type WhatsAppConfig = {
  /** Optional allowlist for WhatsApp direct chats (E.164). */
  allowFrom?: string[];
+  /** Outbound text chunk size (chars). Default: 4000. */
+  textChunkLimit?: number;
  groups?: Record<
    string,
    {
@@ -176,6 +178,8 @@ export type TelegramConfig = {
    }
  >;
  allowFrom?: Array<string | number>;
+  /** Outbound text chunk size (chars). Default: 4000. */
+  textChunkLimit?: number;
  mediaMaxMb?: number;
  proxy?: string;
  webhookUrl?: string;
@@ -221,6 +225,8 @@ export type DiscordConfig = {
  /** If false, do not start the Discord provider. Default: true. */
  enabled?: boolean;
  token?: string;
+  /** Outbound text chunk size (chars). Default: 2000. */
+  textChunkLimit?: number;
  mediaMaxMb?: number;
  historyLimit?: number;
  /** Allow agent-triggered Discord reactions (default: true). */
@@ -253,6 +259,8 @@ export type SignalConfig = {
  ignoreStories?: boolean;
  sendReadReceipts?: boolean;
  allowFrom?: Array<string | number>;
+  /** Outbound text chunk size (chars). Default: 4000. */
+  textChunkLimit?: number;
  mediaMaxMb?: number;
 };

@@ -273,6 +281,8 @@ export type IMessageConfig = {
  includeAttachments?: boolean;
  /** Max outbound media size in MB. */
  mediaMaxMb?: number;
+  /** Outbound text chunk size (chars). Default: 4000. */
+  textChunkLimit?: number;
  groups?: Record<
    string,
    {
@@ -314,15 +324,6 @@ export type MessagesConfig = {
  messagePrefix?: string; // Prefix added to all inbound messages (default: "[clawdis]" if no allowFrom, else "")
  responsePrefix?: string; // Prefix auto-added to all outbound replies (e.g., "🦞")
  timestampPrefix?: boolean | string; // true/false or IANA timezone string (default: true with UTC)
-  /** Outbound text chunk size (chars). Default varies by provider (e.g. 4000, Discord 2000). */
-  textChunkLimit?: number;
-  /** Optional per-surface chunk overrides. */
-  textChunkLimitBySurface?: Partial<
-    Record<
-      "whatsapp" | "telegram" | "discord" | "signal" | "imessage" | "webchat",
-      number
-    >
-  >;
 };

 export type BridgeBindMode = "auto" | "lan" | "tailnet" | "loopback";
@@ -717,17 +718,6 @@ const MessagesSchema = z
    messagePrefix: z.string().optional(),
    responsePrefix: z.string().optional(),
    timestampPrefix: z.union([z.boolean(), z.string()]).optional(),
-    textChunkLimit: z.number().int().positive().optional(),
-    textChunkLimitBySurface: z
-      .object({
-        whatsapp: z.number().int().positive().optional(),
-        telegram: z.number().int().positive().optional(),
-        discord: z.number().int().positive().optional(),
-        signal: z.number().int().positive().optional(),
-        imessage: z.number().int().positive().optional(),
-        webchat: z.number().int().positive().optional(),
-      })
-      .optional(),
  })
  .optional();

@@ -989,6 +979,7 @@ const ClawdisSchema = z.object({
  whatsapp: z
    .object({
      allowFrom: z.array(z.string()).optional(),
+      textChunkLimit: z.number().int().positive().optional(),
      groups: z
        .record(
          z.string(),
@@ -1018,6 +1009,7 @@ const ClawdisSchema = z.object({
        )
        .optional(),
      allowFrom: z.array(z.union([z.string(), z.number()])).optional(),
+      textChunkLimit: z.number().int().positive().optional(),
      mediaMaxMb: z.number().positive().optional(),
      proxy: z.string().optional(),
      webhookUrl: z.string().optional(),
@@ -1029,6 +1021,7 @@ const ClawdisSchema = z.object({
    .object({
      enabled: z.boolean().optional(),
      token: z.string().optional(),
+      textChunkLimit: z.number().int().positive().optional(),
      slashCommand: z
        .object({
          enabled: z.boolean().optional(),
@@ -1090,6 +1083,7 @@ const ClawdisSchema = z.object({
      ignoreStories: z.boolean().optional(),
      sendReadReceipts: z.boolean().optional(),
      allowFrom: z.array(z.union([z.string(), z.number()])).optional(),
+      textChunkLimit: z.number().int().positive().optional(),
      mediaMaxMb: z.number().positive().optional(),
    })
    .optional(),
@@ -1105,6 +1099,7 @@ const ClawdisSchema = z.object({
      allowFrom: z.array(z.union([z.string(), z.number()])).optional(),
      includeAttachments: z.boolean().optional(),
      mediaMaxMb: z.number().positive().optional(),
+      textChunkLimit: z.number().int().positive().optional(),
      groups: z
        .record(
          z.string(),
--- a/src/discord/monitor.ts
+++ b/src/discord/monitor.ts
@@ -9,7 +9,7 @@ import {
  Partials,
 } from "discord.js";

-import { chunkText } from "../auto-reply/chunk.js";
+import { chunkText, resolveTextChunkLimit } from "../auto-reply/chunk.js";
 import { formatAgentEnvelope } from "../auto-reply/envelope.js";
 import { getReplyFromConfig } from "../auto-reply/reply.js";
 import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
@@ -129,6 +129,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
  );
  const mediaMaxBytes =
    (opts.mediaMaxMb ?? cfg.discord?.mediaMaxMb ?? 8) * 1024 * 1024;
+  const textLimit = resolveTextChunkLimit(cfg, "discord");
  const historyLimit = Math.max(
    0,
    opts.historyLimit ?? cfg.discord?.historyLimit ?? 20,
@@ -433,6 +434,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
              token,
              runtime,
              replyToMode,
+              textLimit,
            });
            didSendReply = true;
          })
@@ -475,6 +477,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
        token,
        runtime,
        replyToMode,
+        textLimit,
      });
      didSendReply = true;
      if (isVerbose()) {
@@ -653,6 +656,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
        replies,
        interaction,
        ephemeral: slashCommand.ephemeral,
+        textLimit,
      });
    } catch (err) {
      runtime.error?.(danger(`slash handler failed: ${String(err)}`));
@@ -1049,14 +1053,17 @@ async function deliverReplies({
  token,
  runtime,
  replyToMode,
+  textLimit,
 }: {
  replies: ReplyPayload[];
  target: string;
  token: string;
  runtime: RuntimeEnv;
  replyToMode: ReplyToMode;
+  textLimit: number;
 }) {
  let hasReplied = false;
+  const chunkLimit = Math.min(textLimit, 2000);
  for (const payload of replies) {
    const mediaList =
      payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
@@ -1064,7 +1071,7 @@ async function deliverReplies({
    const replyToId = payload.replyToId;
    if (!text && mediaList.length === 0) continue;
    if (mediaList.length === 0) {
-      for (const chunk of chunkText(text, 2000)) {
+      for (const chunk of chunkText(text, chunkLimit)) {
        const replyTo = resolveDiscordReplyTarget({
          replyToMode,
          replyToId,
@@ -1106,12 +1113,15 @@ async function deliverSlashReplies({
  replies,
  interaction,
  ephemeral,
+  textLimit,
 }: {
  replies: ReplyPayload[];
  interaction: import("discord.js").ChatInputCommandInteraction;
  ephemeral: boolean;
+  textLimit: number;
 }) {
  const messages: string[] = [];
+  const chunkLimit = Math.min(textLimit, 2000);
  for (const payload of replies) {
    const textRaw = payload.text?.trim() ?? "";
    const text =
@@ -1125,7 +1135,7 @@ async function deliverSlashReplies({
      .filter(Boolean)
      .join("\n");
    if (!combined) continue;
-    for (const chunk of chunkText(combined, 2000)) {
+    for (const chunk of chunkText(combined, chunkLimit)) {
      messages.push(chunk);
    }
  }