feat: add chunking mode option for BlueBubbles (#1645)

* feat: add chunking mode for outbound messages

- Introduced `chunkMode` option in various account configurations to allow splitting messages by "length" or "newline".
- Updated message processing to handle chunking based on the selected mode.
- Added tests for new chunking functionality, ensuring correct behavior for both modes.

* feat: enhance chunking mode documentation and configuration

- Added `chunkMode` option to the BlueBubbles account configuration, allowing users to choose between "length" and "newline" for message chunking.
- Updated documentation to clarify the behavior of the `chunkMode` setting.
- Adjusted account merging logic to incorporate the new `chunkMode` configuration.

* refactor: simplify chunk mode handling for BlueBubbles

- Removed `chunkMode` configuration from various account schemas and types, centralizing chunk mode logic to BlueBubbles only.
- Updated `processMessage` to default to "newline" for BlueBubbles chunking.
- Adjusted tests to reflect changes in chunk mode handling for BlueBubbles, ensuring proper functionality.

* fix: update default chunk mode to 'length' for BlueBubbles

- Changed the default value of `chunkMode` from 'newline' to 'length' in the BlueBubbles configuration and related processing functions.
- Updated documentation to reflect the new default behavior for chunking messages.
- Adjusted tests to ensure the correct default value is returned for BlueBubbles chunk mode.
This commit is contained in:
Tyler Yust
2026-01-24 16:47:10 -08:00
committed by GitHub
parent 6375ee836f
commit 92e794dc18
13 changed files with 247 additions and 8 deletions

View File

@@ -1,6 +1,13 @@
import { describe, expect, it } from "vitest";
import { chunkMarkdownText, chunkText, resolveTextChunkLimit } from "./chunk.js";
import {
chunkByNewline,
chunkMarkdownText,
chunkText,
chunkTextWithMode,
resolveChunkMode,
resolveTextChunkLimit,
} from "./chunk.js";
function expectFencesBalanced(chunks: string[]) {
for (const chunk of chunks) {
@@ -231,3 +238,95 @@ describe("chunkMarkdownText", () => {
expect(chunks.join("")).toBe(text);
});
});
describe("chunkByNewline", () => {
it("splits text on newlines", () => {
const text = "Line one\nLine two\nLine three";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["Line one", "Line two", "Line three"]);
});
it("filters empty lines", () => {
const text = "Line one\n\n\nLine two\n\nLine three";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["Line one", "Line two", "Line three"]);
});
it("trims whitespace from lines", () => {
const text = " Line one \n Line two ";
const chunks = chunkByNewline(text, 1000);
expect(chunks).toEqual(["Line one", "Line two"]);
});
it("falls back to length-based for long lines", () => {
const text = "Short line\n" + "a".repeat(50) + "\nAnother short";
const chunks = chunkByNewline(text, 20);
expect(chunks[0]).toBe("Short line");
// Long line gets split into multiple chunks
expect(chunks[1].length).toBe(20);
expect(chunks[2].length).toBe(20);
expect(chunks[3].length).toBe(10);
expect(chunks[4]).toBe("Another short");
});
it("returns empty array for empty input", () => {
expect(chunkByNewline("", 100)).toEqual([]);
});
it("returns empty array for whitespace-only input", () => {
expect(chunkByNewline(" \n\n ", 100)).toEqual([]);
});
});
describe("chunkTextWithMode", () => {
it("uses length-based chunking for length mode", () => {
const text = "Line one\nLine two";
const chunks = chunkTextWithMode(text, 1000, "length");
expect(chunks).toEqual(["Line one\nLine two"]);
});
it("uses newline-based chunking for newline mode", () => {
const text = "Line one\nLine two";
const chunks = chunkTextWithMode(text, 1000, "newline");
expect(chunks).toEqual(["Line one", "Line two"]);
});
});
describe("resolveChunkMode", () => {
it("returns length as default", () => {
expect(resolveChunkMode(undefined, "telegram")).toBe("length");
expect(resolveChunkMode({}, "discord")).toBe("length");
expect(resolveChunkMode(undefined, "bluebubbles")).toBe("length");
});
it("returns length for internal channel", () => {
const cfg = { channels: { bluebubbles: { chunkMode: "newline" as const } } };
expect(resolveChunkMode(cfg, "__internal__")).toBe("length");
});
it("supports provider-level overrides for bluebubbles", () => {
const cfg = { channels: { bluebubbles: { chunkMode: "newline" as const } } };
expect(resolveChunkMode(cfg, "bluebubbles")).toBe("newline");
expect(resolveChunkMode(cfg, "discord")).toBe("length");
});
it("supports account-level overrides for bluebubbles", () => {
const cfg = {
channels: {
bluebubbles: {
chunkMode: "length" as const,
accounts: {
primary: { chunkMode: "newline" as const },
},
},
},
};
expect(resolveChunkMode(cfg, "bluebubbles", "primary")).toBe("newline");
expect(resolveChunkMode(cfg, "bluebubbles", "other")).toBe("length");
});
it("ignores chunkMode for non-bluebubbles providers", () => {
const cfg = { channels: { ["telegram" as string]: { chunkMode: "newline" as const } } };
expect(resolveChunkMode(cfg, "telegram")).toBe("length");
});
});

View File

@@ -10,11 +10,20 @@ import { INTERNAL_MESSAGE_CHANNEL } from "../utils/message-channel.js";
export type TextChunkProvider = ChannelId | typeof INTERNAL_MESSAGE_CHANNEL;
/**
* Chunking mode for outbound messages:
* - "length": Split only when exceeding textChunkLimit (default)
* - "newline": Split on every newline, with fallback to length-based for long lines
*/
export type ChunkMode = "length" | "newline";
const DEFAULT_CHUNK_LIMIT = 4000;
const DEFAULT_CHUNK_MODE: ChunkMode = "length";
type ProviderChunkConfig = {
textChunkLimit?: number;
accounts?: Record<string, { textChunkLimit?: number }>;
chunkMode?: ChunkMode;
accounts?: Record<string, { textChunkLimit?: number; chunkMode?: ChunkMode }>;
};
function resolveChunkLimitForProvider(
@@ -63,6 +72,79 @@ export function resolveTextChunkLimit(
return fallback;
}
function resolveChunkModeForProvider(
cfgSection: ProviderChunkConfig | undefined,
accountId?: string | null,
): ChunkMode | undefined {
if (!cfgSection) return undefined;
const normalizedAccountId = normalizeAccountId(accountId);
const accounts = cfgSection.accounts;
if (accounts && typeof accounts === "object") {
const direct = accounts[normalizedAccountId];
if (direct?.chunkMode) {
return direct.chunkMode;
}
const matchKey = Object.keys(accounts).find(
(key) => key.toLowerCase() === normalizedAccountId.toLowerCase(),
);
const match = matchKey ? accounts[matchKey] : undefined;
if (match?.chunkMode) {
return match.chunkMode;
}
}
return cfgSection.chunkMode;
}
export function resolveChunkMode(
cfg: ClawdbotConfig | undefined,
provider?: TextChunkProvider,
accountId?: string | null,
): ChunkMode {
if (!provider || provider === INTERNAL_MESSAGE_CHANNEL) return DEFAULT_CHUNK_MODE;
// Chunk mode is only supported for BlueBubbles.
if (provider !== "bluebubbles") return DEFAULT_CHUNK_MODE;
const channelsConfig = cfg?.channels as Record<string, unknown> | undefined;
const providerConfig = (channelsConfig?.[provider] ??
(cfg as Record<string, unknown> | undefined)?.[provider]) as ProviderChunkConfig | undefined;
const mode = resolveChunkModeForProvider(providerConfig, accountId);
return mode ?? DEFAULT_CHUNK_MODE;
}
/**
* Split text on newlines, filtering empty lines.
* Lines exceeding maxLineLength are further split using length-based chunking.
*/
export function chunkByNewline(text: string, maxLineLength: number): string[] {
if (!text) return [];
const lines = text.split("\n");
const chunks: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue; // skip empty lines
if (trimmed.length <= maxLineLength) {
chunks.push(trimmed);
} else {
// Long line: fall back to length-based chunking
const subChunks = chunkText(trimmed, maxLineLength);
chunks.push(...subChunks);
}
}
return chunks;
}
/**
* Unified chunking function that dispatches based on mode.
*/
export function chunkTextWithMode(text: string, limit: number, mode: ChunkMode): string[] {
if (mode === "newline") {
return chunkByNewline(text, limit);
}
return chunkText(text, limit);
}
export function chunkText(text: string, limit: number): string[] {
if (!text) return [];
if (limit <= 0) return [text];

View File

@@ -7,7 +7,7 @@ import {
INTERNAL_MESSAGE_CHANNEL,
listDeliverableMessageChannels,
} from "../../utils/message-channel.js";
import { resolveTextChunkLimit, type TextChunkProvider } from "../chunk.js";
import { resolveChunkMode, resolveTextChunkLimit, type TextChunkProvider } from "../chunk.js";
const DEFAULT_BLOCK_STREAM_MIN = 800;
const DEFAULT_BLOCK_STREAM_MAX = 1200;
@@ -68,6 +68,17 @@ export function resolveBlockStreamingChunking(
fallbackLimit: providerChunkLimit,
});
const chunkCfg = cfg?.agents?.defaults?.blockStreamingChunk;
// BlueBubbles-only: if chunkMode is "newline", use newline-based streaming
const channelChunkMode = resolveChunkMode(cfg, providerKey, accountId);
if (channelChunkMode === "newline") {
// For newline mode: use very low minChars to flush quickly on newlines
const minChars = Math.max(1, Math.floor(chunkCfg?.minChars ?? 1));
const maxRequested = Math.max(1, Math.floor(chunkCfg?.maxChars ?? textLimit));
const maxChars = Math.max(1, Math.min(maxRequested, textLimit));
return { minChars, maxChars, breakPreference: "newline" };
}
const maxRequested = Math.max(1, Math.floor(chunkCfg?.maxChars ?? DEFAULT_BLOCK_STREAM_MAX));
const maxChars = Math.max(1, Math.min(maxRequested, textLimit));
const minFallback = DEFAULT_BLOCK_STREAM_MIN;
@@ -91,6 +102,13 @@ export function resolveBlockStreamingCoalescing(
},
): BlockStreamingCoalescing | undefined {
const providerKey = normalizeChunkProvider(provider);
// BlueBubbles-only: when chunkMode is "newline", disable coalescing to send each line immediately
const channelChunkMode = resolveChunkMode(cfg, providerKey, accountId);
if (channelChunkMode === "newline") {
return undefined;
}
const providerId = providerKey ? normalizeChannelId(providerKey) : null;
const providerChunkLimit = providerId
? getChannelDock(providerId)?.outbound?.textChunkLimit