* refactor: introduce provider plugin registry * refactor: move provider CLI to plugins * docs: add provider plugin implementation notes * refactor: shift provider runtime logic into plugins * refactor: add plugin defaults and summaries * docs: update provider plugin notes * feat(commands): add /commands slash list * Auto-reply: tidy help message * Auto-reply: fix status command lint * Tests: align google shared expectations * Auto-reply: tidy help message * Auto-reply: fix status command lint * refactor: move provider routing into plugins * test: align agent routing expectations * docs: update provider plugin notes * refactor: route replies via provider plugins * docs: note route-reply plugin hooks * refactor: extend provider plugin contract * refactor: derive provider status from plugins * refactor: unify gateway provider control * refactor: use plugin metadata in auto-reply * fix: parenthesize cron target selection * refactor: derive gateway methods from plugins * refactor: generalize provider logout * refactor: route provider logout through plugins * refactor: move WhatsApp web login methods into plugin * refactor: generalize provider log prefixes * refactor: centralize default chat provider * refactor: derive provider lists from registry * refactor: move provider reload noops into plugins * refactor: resolve web login provider via alias * refactor: derive CLI provider options from plugins * refactor: derive prompt provider list from plugins * style: apply biome lint fixes * fix: resolve provider routing edge cases * docs: update provider plugin refactor notes * fix(gateway): harden agent provider routing * refactor: move provider routing into plugins * refactor: move provider CLI to plugins * refactor: derive provider lists from registry * fix: restore slash command parsing * refactor: align provider ids for schema * refactor: unify outbound target resolution * fix: keep outbound labels stable * feat: add msteams to cron surfaces * fix: clean up lint build issues * refactor: localize chat provider alias normalization * refactor: drive gateway provider lists from plugins * docs: update provider plugin notes * style: format message-provider * fix: avoid provider registry init cycles * style: sort message-provider imports * fix: relax provider alias map typing * refactor: move provider routing into plugins * refactor: add plugin pairing/config adapters * refactor: route pairing and provider removal via plugins * refactor: align auto-reply provider typing * test: stabilize telegram media mocks * docs: update provider plugin refactor notes * refactor: pluginize outbound targets * refactor: pluginize provider selection * refactor: generalize text chunk limits * docs: update provider plugin notes * refactor: generalize group session/config * fix: normalize provider id for room detection * fix: avoid provider init in system prompt * style: formatting cleanup * refactor: normalize agent delivery targets * test: update outbound delivery labels * chore: fix lint regressions * refactor: extend provider plugin adapters * refactor: move elevated/block streaming defaults to plugins * refactor: defer outbound send deps to plugins * docs: note plugin-driven streaming/elevated defaults * refactor: centralize webchat provider constant * refactor: add provider setup adapters * refactor: delegate provider add config to plugins * docs: document plugin-driven provider add * refactor: add plugin state/binding metadata * refactor: build agent provider status from plugins * docs: note plugin-driven agent bindings * refactor: centralize internal provider constant usage * fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing) * refactor: centralize default chat provider * refactor: centralize WhatsApp target normalization * refactor: move provider routing into plugins * refactor: normalize agent delivery targets * chore: fix lint regressions * fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing) * feat: expand provider plugin adapters * refactor: route auto-reply via provider plugins * fix: align WhatsApp target normalization * fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing) * refactor: centralize WhatsApp target normalization * feat: add /config chat config updates * docs: add /config get alias * feat(commands): add /commands slash list * refactor: centralize default chat provider * style: apply biome lint fixes * chore: fix lint regressions * fix: clean up whatsapp allowlist typing * style: format config command helpers * refactor: pluginize tool threading context * refactor: normalize session announce targets * docs: note new plugin threading and announce hooks * refactor: pluginize message actions * docs: update provider plugin actions notes * fix: align provider action adapters * refactor: centralize webchat checks * style: format message provider helpers * refactor: move provider onboarding into adapters * docs: note onboarding provider adapters * feat: add msteams onboarding adapter * style: organize onboarding imports * fix: normalize msteams allowFrom types * feat: add plugin text chunk limits * refactor: use plugin chunk limit fallbacks * feat: add provider mention stripping hooks * style: organize provider plugin type imports * refactor: generalize health snapshots * refactor: update macOS health snapshot handling * docs: refresh health snapshot notes * style: format health snapshot updates * refactor: drive security warnings via plugins * docs: note provider security adapter * style: format provider security adapters * refactor: centralize provider account defaults * refactor: type gateway client identity constants * chore: regen gateway protocol swift * fix: degrade health on failed provider probe * refactor: centralize pairing approve hint * docs: add plugin CLI command references * refactor: route auth and tool sends through plugins * docs: expand provider plugin hooks * refactor: document provider docking touchpoints * refactor: normalize internal provider defaults * refactor: streamline outbound delivery wiring * refactor: make provider onboarding plugin-owned * refactor: support provider-owned agent tools * refactor: move telegram draft chunking into telegram module * refactor: infer provider tool sends via extractToolSend * fix: repair plugin onboarding imports * refactor: de-dup outbound target normalization * style: tidy plugin and agent imports * refactor: data-drive provider selection line * fix: satisfy lint after provider plugin rebase * test: deflake gateway-cli coverage * style: format gateway-cli coverage test * refactor(provider-plugins): simplify provider ids * test(pairing-cli): avoid provider-specific ternary * style(macos): swiftformat HealthStore * refactor(sandbox): derive provider tool denylist * fix(sandbox): avoid plugin init in defaults * refactor(provider-plugins): centralize provider aliases * style(test): satisfy biome * refactor(protocol): v3 providers.status maps * refactor(ui): adapt to protocol v3 * refactor(macos): adapt to protocol v3 * test: update providers.status v3 fixtures * refactor(gateway): map provider runtime snapshot * test(gateway): update reload runtime snapshot * refactor(whatsapp): normalize heartbeat provider id * docs(refactor): update provider plugin notes * style: satisfy biome after rebase * fix: describe sandboxed elevated in prompt * feat(gateway): add agent image attachments + live probe * refactor: derive CLI provider options from plugins * fix(gateway): harden agent provider routing * fix(gateway): harden agent provider routing * refactor: align provider ids for schema * fix(protocol): keep agent provider string * fix(gateway): harden agent provider routing * fix(protocol): keep agent provider string * refactor: normalize agent delivery targets * refactor: support provider-owned agent tools * refactor(config): provider-keyed elevated allowFrom * style: satisfy biome * fix(gateway): appease provider narrowing * style: satisfy biome * refactor(reply): move group intro hints into plugin * fix(reply): avoid plugin registry init cycle * refactor(providers): add lightweight provider dock * refactor(gateway): use typed client id in connect * refactor(providers): document docks and avoid init cycles * refactor(providers): make media limit helper generic * fix(providers): break plugin registry import cycles * style: satisfy biome * refactor(status-all): build providers table from plugins * refactor(gateway): delegate web login to provider plugin * refactor(provider): drop web alias * refactor(provider): lazy-load monitors * style: satisfy lint/format * style: format status-all providers table * style: swiftformat gateway discovery model * test: make reload plan plugin-driven * fix: avoid token stringification in status-all * refactor: make provider IDs explicit in status * feat: warn on signal/imessage provider runtime errors * test: cover gateway provider runtime warnings in status * fix: add runtime kind to provider status issues * test: cover health degradation on probe failure * fix: keep routeReply lightweight * style: organize routeReply imports * refactor(web): extract auth-store helpers * refactor(whatsapp): lazy login imports * refactor(outbound): route replies via plugin outbound * docs: update provider plugin notes * style: format provider status issues * fix: make sandbox scope warning wrap-safe * refactor: load outbound adapters from provider plugins * docs: update provider plugin outbound notes * style(macos): fix swiftformat lint * docs: changelog for provider plugins * fix(macos): satisfy swiftformat * fix(macos): open settings via menu action * style: format after rebase * fix(macos): open Settings via menu action --------- Co-authored-by: LK <luke@kyohere.com> Co-authored-by: Luke K (pr-0f3t) <2609441+lc0rp@users.noreply.github.com> Co-authored-by: Xin <xin@imfing.com>
254 lines
7.7 KiB
TypeScript
254 lines
7.7 KiB
TypeScript
// Utilities for splitting outbound text into platform-sized chunks without
|
|
// unintentionally breaking on newlines. Using [\s\S] keeps newlines inside
|
|
// the chunk so messages are only split when they truly exceed the limit.
|
|
|
|
import type { ClawdbotConfig } from "../config/config.js";
|
|
import {
|
|
findFenceSpanAt,
|
|
isSafeFenceBreak,
|
|
parseFenceSpans,
|
|
} from "../markdown/fences.js";
|
|
import type { ProviderId } from "../providers/plugins/types.js";
|
|
import { normalizeAccountId } from "../routing/session-key.js";
|
|
import { INTERNAL_MESSAGE_PROVIDER } from "../utils/message-provider.js";
|
|
|
|
export type TextChunkProvider = ProviderId | typeof INTERNAL_MESSAGE_PROVIDER;
|
|
|
|
const DEFAULT_CHUNK_LIMIT = 4000;
|
|
|
|
type ProviderChunkConfig = {
|
|
textChunkLimit?: number;
|
|
accounts?: Record<string, { textChunkLimit?: number }>;
|
|
};
|
|
|
|
function resolveChunkLimitForProvider(
|
|
cfgSection: ProviderChunkConfig | undefined,
|
|
accountId?: string | null,
|
|
): number | undefined {
|
|
if (!cfgSection) return undefined;
|
|
const normalizedAccountId = normalizeAccountId(accountId);
|
|
const accounts = cfgSection.accounts;
|
|
if (accounts && typeof accounts === "object") {
|
|
const direct = accounts[normalizedAccountId];
|
|
if (typeof direct?.textChunkLimit === "number") {
|
|
return direct.textChunkLimit;
|
|
}
|
|
const matchKey = Object.keys(accounts).find(
|
|
(key) => key.toLowerCase() === normalizedAccountId.toLowerCase(),
|
|
);
|
|
const match = matchKey ? accounts[matchKey] : undefined;
|
|
if (typeof match?.textChunkLimit === "number") {
|
|
return match.textChunkLimit;
|
|
}
|
|
}
|
|
return cfgSection.textChunkLimit;
|
|
}
|
|
|
|
export function resolveTextChunkLimit(
|
|
cfg: ClawdbotConfig | undefined,
|
|
provider?: TextChunkProvider,
|
|
accountId?: string | null,
|
|
opts?: { fallbackLimit?: number },
|
|
): number {
|
|
const fallback =
|
|
typeof opts?.fallbackLimit === "number" && opts.fallbackLimit > 0
|
|
? opts.fallbackLimit
|
|
: DEFAULT_CHUNK_LIMIT;
|
|
const providerOverride = (() => {
|
|
if (!provider || provider === INTERNAL_MESSAGE_PROVIDER) return undefined;
|
|
const providerConfig = (cfg as Record<string, unknown> | undefined)?.[
|
|
provider
|
|
] as ProviderChunkConfig | undefined;
|
|
return resolveChunkLimitForProvider(providerConfig, accountId);
|
|
})();
|
|
if (typeof providerOverride === "number" && providerOverride > 0) {
|
|
return providerOverride;
|
|
}
|
|
return fallback;
|
|
}
|
|
|
|
export function chunkText(text: string, limit: number): string[] {
|
|
if (!text) return [];
|
|
if (limit <= 0) return [text];
|
|
if (text.length <= limit) return [text];
|
|
|
|
const chunks: string[] = [];
|
|
let remaining = text;
|
|
|
|
while (remaining.length > limit) {
|
|
const window = remaining.slice(0, limit);
|
|
|
|
// 1) Prefer a newline break inside the window (outside parentheses).
|
|
const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window);
|
|
|
|
// 2) Otherwise prefer the last whitespace (word boundary) inside the window.
|
|
let breakIdx = lastNewline > 0 ? lastNewline : lastWhitespace;
|
|
|
|
// 3) Fallback: hard break exactly at the limit.
|
|
if (breakIdx <= 0) breakIdx = limit;
|
|
|
|
const rawChunk = remaining.slice(0, breakIdx);
|
|
const chunk = rawChunk.trimEnd();
|
|
if (chunk.length > 0) {
|
|
chunks.push(chunk);
|
|
}
|
|
|
|
// If we broke on whitespace/newline, skip that separator; for hard breaks keep it.
|
|
const brokeOnSeparator =
|
|
breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
|
const nextStart = Math.min(
|
|
remaining.length,
|
|
breakIdx + (brokeOnSeparator ? 1 : 0),
|
|
);
|
|
remaining = remaining.slice(nextStart).trimStart();
|
|
}
|
|
|
|
if (remaining.length) chunks.push(remaining);
|
|
|
|
return chunks;
|
|
}
|
|
|
|
export function chunkMarkdownText(text: string, limit: number): string[] {
|
|
if (!text) return [];
|
|
if (limit <= 0) return [text];
|
|
if (text.length <= limit) return [text];
|
|
|
|
const chunks: string[] = [];
|
|
let remaining = text;
|
|
|
|
while (remaining.length > limit) {
|
|
const spans = parseFenceSpans(remaining);
|
|
const window = remaining.slice(0, limit);
|
|
|
|
const softBreak = pickSafeBreakIndex(window, spans);
|
|
let breakIdx = softBreak > 0 ? softBreak : limit;
|
|
|
|
const initialFence = isSafeFenceBreak(spans, breakIdx)
|
|
? undefined
|
|
: findFenceSpanAt(spans, breakIdx);
|
|
|
|
let fenceToSplit = initialFence;
|
|
if (initialFence) {
|
|
const closeLine = `${initialFence.indent}${initialFence.marker}`;
|
|
const maxIdxIfNeedNewline = limit - (closeLine.length + 1);
|
|
|
|
if (maxIdxIfNeedNewline <= 0) {
|
|
fenceToSplit = undefined;
|
|
breakIdx = limit;
|
|
} else {
|
|
const minProgressIdx = Math.min(
|
|
remaining.length,
|
|
initialFence.start + initialFence.openLine.length + 2,
|
|
);
|
|
const maxIdxIfAlreadyNewline = limit - closeLine.length;
|
|
|
|
let pickedNewline = false;
|
|
let lastNewline = remaining.lastIndexOf(
|
|
"\n",
|
|
Math.max(0, maxIdxIfAlreadyNewline - 1),
|
|
);
|
|
while (lastNewline !== -1) {
|
|
const candidateBreak = lastNewline + 1;
|
|
if (candidateBreak < minProgressIdx) break;
|
|
const candidateFence = findFenceSpanAt(spans, candidateBreak);
|
|
if (candidateFence && candidateFence.start === initialFence.start) {
|
|
breakIdx = Math.max(1, candidateBreak);
|
|
pickedNewline = true;
|
|
break;
|
|
}
|
|
lastNewline = remaining.lastIndexOf("\n", lastNewline - 1);
|
|
}
|
|
|
|
if (!pickedNewline) {
|
|
if (minProgressIdx > maxIdxIfAlreadyNewline) {
|
|
fenceToSplit = undefined;
|
|
breakIdx = limit;
|
|
} else {
|
|
breakIdx = Math.max(minProgressIdx, maxIdxIfNeedNewline);
|
|
}
|
|
}
|
|
}
|
|
|
|
const fenceAtBreak = findFenceSpanAt(spans, breakIdx);
|
|
fenceToSplit =
|
|
fenceAtBreak && fenceAtBreak.start === initialFence.start
|
|
? fenceAtBreak
|
|
: undefined;
|
|
}
|
|
|
|
let rawChunk = remaining.slice(0, breakIdx);
|
|
if (!rawChunk) break;
|
|
|
|
const brokeOnSeparator =
|
|
breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
|
const nextStart = Math.min(
|
|
remaining.length,
|
|
breakIdx + (brokeOnSeparator ? 1 : 0),
|
|
);
|
|
let next = remaining.slice(nextStart);
|
|
|
|
if (fenceToSplit) {
|
|
const closeLine = `${fenceToSplit.indent}${fenceToSplit.marker}`;
|
|
rawChunk = rawChunk.endsWith("\n")
|
|
? `${rawChunk}${closeLine}`
|
|
: `${rawChunk}\n${closeLine}`;
|
|
next = `${fenceToSplit.openLine}\n${next}`;
|
|
} else {
|
|
next = stripLeadingNewlines(next);
|
|
}
|
|
|
|
chunks.push(rawChunk);
|
|
remaining = next;
|
|
}
|
|
|
|
if (remaining.length) chunks.push(remaining);
|
|
return chunks;
|
|
}
|
|
|
|
function stripLeadingNewlines(value: string): string {
|
|
let i = 0;
|
|
while (i < value.length && value[i] === "\n") i++;
|
|
return i > 0 ? value.slice(i) : value;
|
|
}
|
|
|
|
function pickSafeBreakIndex(
|
|
window: string,
|
|
spans: ReturnType<typeof parseFenceSpans>,
|
|
): number {
|
|
const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(
|
|
window,
|
|
(index) => isSafeFenceBreak(spans, index),
|
|
);
|
|
|
|
if (lastNewline > 0) return lastNewline;
|
|
if (lastWhitespace > 0) return lastWhitespace;
|
|
return -1;
|
|
}
|
|
|
|
function scanParenAwareBreakpoints(
|
|
window: string,
|
|
isAllowed: (index: number) => boolean = () => true,
|
|
): { lastNewline: number; lastWhitespace: number } {
|
|
let lastNewline = -1;
|
|
let lastWhitespace = -1;
|
|
let depth = 0;
|
|
|
|
for (let i = 0; i < window.length; i++) {
|
|
if (!isAllowed(i)) continue;
|
|
const char = window[i];
|
|
if (char === "(") {
|
|
depth += 1;
|
|
continue;
|
|
}
|
|
if (char === ")" && depth > 0) {
|
|
depth -= 1;
|
|
continue;
|
|
}
|
|
if (depth !== 0) continue;
|
|
if (char === "\n") lastNewline = i;
|
|
else if (/\s/.test(char)) lastWhitespace = i;
|
|
}
|
|
|
|
return { lastNewline, lastWhitespace };
|
|
}
|