refactor: move text chunk limits to providers

This commit is contained in:
Peter Steinberger
2026-01-03 01:27:37 +01:00
parent 75a9cd83a0
commit f5189cc897
6 changed files with 71 additions and 58 deletions

View File

@@ -55,7 +55,7 @@
### Fixes
- Chat UI: keep the chat scrolled to the latest message after switching sessions.
- Auto-reply: stream completed reply blocks as soon as they finish (configurable default + break); skip empty tool-only blocks unless verbose.
- Messages: make outbound text chunk limits configurable (defaults remain 4000/Discord 2000).
- Providers: make outbound text chunk limits configurable via `*.textChunkLimit` (defaults remain 4000/Discord 2000).
- CLI onboarding: persist gateway token in config so local CLI auth works; recommend auth Off unless you need multi-machine access.
- Control UI: accept a `?token=` URL param to auto-fill Gateway auth; onboarding now opens the dashboard with token auth when configured.
- Agent prompt: remove hardcoded user name in system prompt example.

View File

@@ -82,7 +82,10 @@ Allowlist of E.164 phone numbers that may trigger WhatsApp auto-replies.
```json5
{
whatsapp: { allowFrom: ["+15555550123", "+447700900123"] }
whatsapp: {
allowFrom: ["+15555550123", "+447700900123"],
textChunkLimit: 4000 // optional outbound chunk size (chars)
}
}
```
@@ -169,6 +172,7 @@ Set `telegram.enabled: false` to disable automatic startup.
telegram: {
enabled: true,
botToken: "your-bot-token",
textChunkLimit: 4000, // optional outbound chunk size (chars)
replyToMode: "off",
groups: {
"*": { requireMention: true },
@@ -195,6 +199,7 @@ Configure the Discord bot by setting the bot token and optional gating:
discord: {
enabled: true,
token: "your-bot-token",
textChunkLimit: 2000, // optional outbound chunk size (chars)
mediaMaxMb: 8, // clamp inbound media size
enableReactions: true, // allow agent-triggered reactions
replyToMode: "off", // off | first | all
@@ -232,6 +237,20 @@ Reply threading is controlled via `discord.replyToMode` (`off` | `first` | `all`
Guild slugs are lowercase with spaces replaced by `-`; channel keys use the slugged channel name (no leading `#`). Prefer guild ids as keys to avoid rename ambiguity.
Use `discord.guilds."*"` for default per-guild settings.
### `signal` (signal-cli JSON-RPC)
Clawdis can send/receive Signal via `signal-cli` (daemon or existing HTTP URL).
```json5
{
signal: {
enabled: true,
textChunkLimit: 4000, // optional outbound chunk size (chars)
mediaMaxMb: 8
}
}
```
### `imessage` (imsg CLI)
Clawdis spawns `imsg rpc` (JSON-RPC over stdio). No daemon or port required.
@@ -242,6 +261,7 @@ Clawdis spawns `imsg rpc` (JSON-RPC over stdio). No daemon or port required.
enabled: true,
cliPath: "imsg",
dbPath: "~/Library/Messages/chat.db",
textChunkLimit: 4000, // optional outbound chunk size (chars)
allowFrom: ["+15555550123", "user@example.com", "chat_id:123"],
groups: {
"*": { requireMention: true },
@@ -276,23 +296,14 @@ Default: `~/clawd`.
### `messages`
Controls inbound/outbound prefixes and timestamps.
Outbound text chunking is configured per provider via `*.textChunkLimit` (e.g. `whatsapp.textChunkLimit`, `telegram.textChunkLimit`).
```json5
{
messages: {
messagePrefix: "[clawdis]",
responsePrefix: "🦞",
timestampPrefix: "Europe/London",
// outbound chunk size (chars); defaults vary by surface (e.g. 4000, Discord 2000)
textChunkLimit: 4000,
// optional per-surface overrides
textChunkLimitBySurface: {
whatsapp: 4000,
telegram: 4000,
signal: 4000,
imessage: 4000,
discord: 2000
}
timestampPrefix: "Europe/London"
}
}
```

View File

@@ -55,20 +55,15 @@ describe("resolveTextChunkLimit", () => {
expect(resolveTextChunkLimit(undefined, "discord")).toBe(2000);
});
it("supports a global override", () => {
const cfg = { messages: { textChunkLimit: 1234 } };
expect(resolveTextChunkLimit(cfg, "whatsapp")).toBe(1234);
expect(resolveTextChunkLimit(cfg, "discord")).toBe(1234);
});
it("prefers per-surface overrides over global", () => {
const cfg = {
messages: {
textChunkLimit: 1234,
textChunkLimitBySurface: { discord: 111 },
},
};
expect(resolveTextChunkLimit(cfg, "discord")).toBe(111);
it("supports provider overrides", () => {
const cfg = { telegram: { textChunkLimit: 1234 } };
expect(resolveTextChunkLimit(cfg, "whatsapp")).toBe(4000);
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(1234);
});
it("uses the matching provider override", () => {
const cfg = { discord: { textChunkLimit: 111 } };
expect(resolveTextChunkLimit(cfg, "discord")).toBe(111);
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(4000);
});
});

View File

@@ -22,19 +22,21 @@ const DEFAULT_CHUNK_LIMIT_BY_SURFACE: Record<TextChunkSurface, number> = {
};
export function resolveTextChunkLimit(
cfg: Pick<ClawdisConfig, "messages"> | undefined,
cfg: ClawdisConfig | undefined,
surface?: TextChunkSurface,
): number {
const surfaceOverride = surface
? cfg?.messages?.textChunkLimitBySurface?.[surface]
: undefined;
const surfaceOverride = (() => {
if (!surface) return undefined;
if (surface === "whatsapp") return cfg?.whatsapp?.textChunkLimit;
if (surface === "telegram") return cfg?.telegram?.textChunkLimit;
if (surface === "discord") return cfg?.discord?.textChunkLimit;
if (surface === "signal") return cfg?.signal?.textChunkLimit;
if (surface === "imessage") return cfg?.imessage?.textChunkLimit;
return undefined;
})();
if (typeof surfaceOverride === "number" && surfaceOverride > 0) {
return surfaceOverride;
}
const globalOverride = cfg?.messages?.textChunkLimit;
if (typeof globalOverride === "number" && globalOverride > 0) {
return globalOverride;
}
if (surface) return DEFAULT_CHUNK_LIMIT_BY_SURFACE[surface];
return 4000;
}

View File

@@ -62,6 +62,8 @@ export type WebConfig = {
export type WhatsAppConfig = {
/** Optional allowlist for WhatsApp direct chats (E.164). */
allowFrom?: string[];
/** Outbound text chunk size (chars). Default: 4000. */
textChunkLimit?: number;
groups?: Record<
string,
{
@@ -176,6 +178,8 @@ export type TelegramConfig = {
}
>;
allowFrom?: Array<string | number>;
/** Outbound text chunk size (chars). Default: 4000. */
textChunkLimit?: number;
mediaMaxMb?: number;
proxy?: string;
webhookUrl?: string;
@@ -221,6 +225,8 @@ export type DiscordConfig = {
/** If false, do not start the Discord provider. Default: true. */
enabled?: boolean;
token?: string;
/** Outbound text chunk size (chars). Default: 2000. */
textChunkLimit?: number;
mediaMaxMb?: number;
historyLimit?: number;
/** Allow agent-triggered Discord reactions (default: true). */
@@ -253,6 +259,8 @@ export type SignalConfig = {
ignoreStories?: boolean;
sendReadReceipts?: boolean;
allowFrom?: Array<string | number>;
/** Outbound text chunk size (chars). Default: 4000. */
textChunkLimit?: number;
mediaMaxMb?: number;
};
@@ -273,6 +281,8 @@ export type IMessageConfig = {
includeAttachments?: boolean;
/** Max outbound media size in MB. */
mediaMaxMb?: number;
/** Outbound text chunk size (chars). Default: 4000. */
textChunkLimit?: number;
groups?: Record<
string,
{
@@ -314,15 +324,6 @@ export type MessagesConfig = {
messagePrefix?: string; // Prefix added to all inbound messages (default: "[clawdis]" if no allowFrom, else "")
responsePrefix?: string; // Prefix auto-added to all outbound replies (e.g., "🦞")
timestampPrefix?: boolean | string; // true/false or IANA timezone string (default: true with UTC)
/** Outbound text chunk size (chars). Default varies by provider (e.g. 4000, Discord 2000). */
textChunkLimit?: number;
/** Optional per-surface chunk overrides. */
textChunkLimitBySurface?: Partial<
Record<
"whatsapp" | "telegram" | "discord" | "signal" | "imessage" | "webchat",
number
>
>;
};
export type BridgeBindMode = "auto" | "lan" | "tailnet" | "loopback";
@@ -717,17 +718,6 @@ const MessagesSchema = z
messagePrefix: z.string().optional(),
responsePrefix: z.string().optional(),
timestampPrefix: z.union([z.boolean(), z.string()]).optional(),
textChunkLimit: z.number().int().positive().optional(),
textChunkLimitBySurface: z
.object({
whatsapp: z.number().int().positive().optional(),
telegram: z.number().int().positive().optional(),
discord: z.number().int().positive().optional(),
signal: z.number().int().positive().optional(),
imessage: z.number().int().positive().optional(),
webchat: z.number().int().positive().optional(),
})
.optional(),
})
.optional();
@@ -989,6 +979,7 @@ const ClawdisSchema = z.object({
whatsapp: z
.object({
allowFrom: z.array(z.string()).optional(),
textChunkLimit: z.number().int().positive().optional(),
groups: z
.record(
z.string(),
@@ -1018,6 +1009,7 @@ const ClawdisSchema = z.object({
)
.optional(),
allowFrom: z.array(z.union([z.string(), z.number()])).optional(),
textChunkLimit: z.number().int().positive().optional(),
mediaMaxMb: z.number().positive().optional(),
proxy: z.string().optional(),
webhookUrl: z.string().optional(),
@@ -1029,6 +1021,7 @@ const ClawdisSchema = z.object({
.object({
enabled: z.boolean().optional(),
token: z.string().optional(),
textChunkLimit: z.number().int().positive().optional(),
slashCommand: z
.object({
enabled: z.boolean().optional(),
@@ -1090,6 +1083,7 @@ const ClawdisSchema = z.object({
ignoreStories: z.boolean().optional(),
sendReadReceipts: z.boolean().optional(),
allowFrom: z.array(z.union([z.string(), z.number()])).optional(),
textChunkLimit: z.number().int().positive().optional(),
mediaMaxMb: z.number().positive().optional(),
})
.optional(),
@@ -1105,6 +1099,7 @@ const ClawdisSchema = z.object({
allowFrom: z.array(z.union([z.string(), z.number()])).optional(),
includeAttachments: z.boolean().optional(),
mediaMaxMb: z.number().positive().optional(),
textChunkLimit: z.number().int().positive().optional(),
groups: z
.record(
z.string(),

View File

@@ -9,7 +9,7 @@ import {
Partials,
} from "discord.js";
import { chunkText } from "../auto-reply/chunk.js";
import { chunkText, resolveTextChunkLimit } from "../auto-reply/chunk.js";
import { formatAgentEnvelope } from "../auto-reply/envelope.js";
import { getReplyFromConfig } from "../auto-reply/reply.js";
import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
@@ -129,6 +129,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
);
const mediaMaxBytes =
(opts.mediaMaxMb ?? cfg.discord?.mediaMaxMb ?? 8) * 1024 * 1024;
const textLimit = resolveTextChunkLimit(cfg, "discord");
const historyLimit = Math.max(
0,
opts.historyLimit ?? cfg.discord?.historyLimit ?? 20,
@@ -433,6 +434,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
token,
runtime,
replyToMode,
textLimit,
});
didSendReply = true;
})
@@ -475,6 +477,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
token,
runtime,
replyToMode,
textLimit,
});
didSendReply = true;
if (isVerbose()) {
@@ -653,6 +656,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
replies,
interaction,
ephemeral: slashCommand.ephemeral,
textLimit,
});
} catch (err) {
runtime.error?.(danger(`slash handler failed: ${String(err)}`));
@@ -1049,14 +1053,17 @@ async function deliverReplies({
token,
runtime,
replyToMode,
textLimit,
}: {
replies: ReplyPayload[];
target: string;
token: string;
runtime: RuntimeEnv;
replyToMode: ReplyToMode;
textLimit: number;
}) {
let hasReplied = false;
const chunkLimit = Math.min(textLimit, 2000);
for (const payload of replies) {
const mediaList =
payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
@@ -1064,7 +1071,7 @@ async function deliverReplies({
const replyToId = payload.replyToId;
if (!text && mediaList.length === 0) continue;
if (mediaList.length === 0) {
for (const chunk of chunkText(text, 2000)) {
for (const chunk of chunkText(text, chunkLimit)) {
const replyTo = resolveDiscordReplyTarget({
replyToMode,
replyToId,
@@ -1106,12 +1113,15 @@ async function deliverSlashReplies({
replies,
interaction,
ephemeral,
textLimit,
}: {
replies: ReplyPayload[];
interaction: import("discord.js").ChatInputCommandInteraction;
ephemeral: boolean;
textLimit: number;
}) {
const messages: string[] = [];
const chunkLimit = Math.min(textLimit, 2000);
for (const payload of replies) {
const textRaw = payload.text?.trim() ?? "";
const text =
@@ -1125,7 +1135,7 @@ async function deliverSlashReplies({
.filter(Boolean)
.join("\n");
if (!combined) continue;
for (const chunk of chunkText(combined, 2000)) {
for (const chunk of chunkText(combined, chunkLimit)) {
messages.push(chunk);
}
}