Files
clawdbot/src/auto-reply/reply.ts
2026-01-04 05:47:37 +01:00

508 lines
15 KiB
TypeScript

import crypto from "node:crypto";
import { resolveModelRefFromString } from "../agents/model-selection.js";
import {
abortEmbeddedPiRun,
isEmbeddedPiRunActive,
isEmbeddedPiRunStreaming,
resolveEmbeddedSessionLane,
} from "../agents/pi-embedded.js";
import {
DEFAULT_AGENT_WORKSPACE_DIR,
ensureAgentWorkspace,
} from "../agents/workspace.js";
import { type ClawdisConfig, loadConfig } from "../config/config.js";
import { resolveSessionTranscriptPath } from "../config/sessions.js";
import { logVerbose } from "../globals.js";
import { clearCommandLane, getQueueSize } from "../process/command-queue.js";
import { defaultRuntime } from "../runtime.js";
import { getAbortMemory } from "./reply/abort.js";
import { runReplyAgent } from "./reply/agent-runner.js";
import { resolveBlockStreamingChunking } from "./reply/block-streaming.js";
import { applySessionHints } from "./reply/body.js";
import { buildCommandContext, handleCommands } from "./reply/commands.js";
import {
handleDirectiveOnly,
isDirectiveOnly,
parseInlineDirectives,
persistInlineDirectives,
resolveDefaultModel,
} from "./reply/directive-handling.js";
import {
buildGroupIntro,
defaultGroupActivation,
resolveGroupRequireMention,
} from "./reply/groups.js";
import {
createModelSelectionState,
resolveContextTokens,
} from "./reply/model-selection.js";
import { resolveQueueSettings } from "./reply/queue.js";
import { initSessionState } from "./reply/session.js";
import {
ensureSkillSnapshot,
prependSystemEvents,
} from "./reply/session-updates.js";
import { createTypingController } from "./reply/typing.js";
import type { MsgContext } from "./templating.js";
import {
normalizeThinkLevel,
type ThinkLevel,
type VerboseLevel,
} from "./thinking.js";
import { SILENT_REPLY_TOKEN } from "./tokens.js";
import { isAudio, transcribeInboundAudio } from "./transcription.js";
import type { GetReplyOptions, ReplyPayload } from "./types.js";
export {
extractThinkDirective,
extractVerboseDirective,
} from "./reply/directives.js";
export { extractQueueDirective } from "./reply/queue.js";
export { extractReplyToTag } from "./reply/reply-tags.js";
export type { GetReplyOptions, ReplyPayload } from "./types.js";
const BARE_SESSION_RESET_PROMPT =
"A new session was started via /new or /reset. Say hi briefly (1-2 sentences) and ask what the user wants to do next. Do not mention internal steps, files, tools, or reasoning.";
export async function getReplyFromConfig(
ctx: MsgContext,
opts?: GetReplyOptions,
configOverride?: ClawdisConfig,
): Promise<ReplyPayload | ReplyPayload[] | undefined> {
const cfg = configOverride ?? loadConfig();
const agentCfg = cfg.agent;
const sessionCfg = cfg.session;
const { defaultProvider, defaultModel, aliasIndex } = resolveDefaultModel({
cfg,
});
let provider = defaultProvider;
let model = defaultModel;
if (opts?.isHeartbeat) {
const heartbeatRaw = agentCfg?.heartbeat?.model?.trim() ?? "";
const heartbeatRef = heartbeatRaw
? resolveModelRefFromString({
raw: heartbeatRaw,
defaultProvider,
aliasIndex,
})
: null;
if (heartbeatRef) {
provider = heartbeatRef.ref.provider;
model = heartbeatRef.ref.model;
}
}
const workspaceDirRaw = cfg.agent?.workspace ?? DEFAULT_AGENT_WORKSPACE_DIR;
const workspace = await ensureAgentWorkspace({
dir: workspaceDirRaw,
ensureBootstrapFiles: true,
});
const workspaceDir = workspace.dir;
const timeoutSeconds = Math.max(agentCfg?.timeoutSeconds ?? 600, 1);
const timeoutMs = timeoutSeconds * 1000;
const configuredTypingSeconds =
agentCfg?.typingIntervalSeconds ?? sessionCfg?.typingIntervalSeconds;
const typingIntervalSeconds =
typeof configuredTypingSeconds === "number" ? configuredTypingSeconds : 6;
const typing = createTypingController({
onReplyStart: opts?.onReplyStart,
typingIntervalSeconds,
silentToken: SILENT_REPLY_TOKEN,
log: defaultRuntime.log,
});
let transcribedText: string | undefined;
if (cfg.routing?.transcribeAudio && isAudio(ctx.MediaType)) {
const transcribed = await transcribeInboundAudio(cfg, ctx, defaultRuntime);
if (transcribed?.text) {
transcribedText = transcribed.text;
ctx.Body = transcribed.text;
ctx.Transcript = transcribed.text;
logVerbose("Replaced Body with audio transcript for reply flow");
}
}
const sessionState = await initSessionState({ ctx, cfg });
let {
sessionCtx,
sessionEntry,
sessionStore,
sessionKey,
sessionId,
isNewSession,
systemSent,
abortedLastRun,
storePath,
sessionScope,
groupResolution,
isGroup,
triggerBodyNormalized,
} = sessionState;
const directives = parseInlineDirectives(
sessionCtx.BodyStripped ?? sessionCtx.Body ?? "",
);
sessionCtx.Body = directives.cleaned;
sessionCtx.BodyStripped = directives.cleaned;
const requireMention = resolveGroupRequireMention({
cfg,
ctx: sessionCtx,
groupResolution,
});
const defaultActivation = defaultGroupActivation(requireMention);
let resolvedThinkLevel =
(directives.thinkLevel as ThinkLevel | undefined) ??
(sessionEntry?.thinkingLevel as ThinkLevel | undefined) ??
(agentCfg?.thinkingDefault as ThinkLevel | undefined);
const resolvedVerboseLevel =
(directives.verboseLevel as VerboseLevel | undefined) ??
(sessionEntry?.verboseLevel as VerboseLevel | undefined) ??
(agentCfg?.verboseDefault as VerboseLevel | undefined);
const resolvedBlockStreaming =
agentCfg?.blockStreamingDefault === "off" ? "off" : "on";
const resolvedBlockStreamingBreak =
agentCfg?.blockStreamingBreak === "message_end"
? "message_end"
: "text_end";
const blockStreamingEnabled = resolvedBlockStreaming === "on";
const blockReplyChunking = blockStreamingEnabled
? resolveBlockStreamingChunking(cfg, sessionCtx.Surface)
: undefined;
const modelState = await createModelSelectionState({
cfg,
agentCfg,
sessionEntry,
sessionStore,
sessionKey,
storePath,
defaultProvider,
defaultModel,
provider,
model,
hasModelDirective: directives.hasModelDirective,
});
provider = modelState.provider;
model = modelState.model;
let contextTokens = resolveContextTokens({
agentCfg,
model,
});
const initialModelLabel = `${provider}/${model}`;
const formatModelSwitchEvent = (label: string, alias?: string) =>
alias
? `Model switched to ${alias} (${label}).`
: `Model switched to ${label}.`;
const isModelListAlias =
directives.hasModelDirective &&
directives.rawModelDirective?.trim().toLowerCase() === "status";
const effectiveModelDirective = isModelListAlias
? undefined
: directives.rawModelDirective;
if (
isDirectiveOnly({
directives,
cleanedBody: directives.cleaned,
ctx,
cfg,
isGroup,
})
) {
const directiveReply = await handleDirectiveOnly({
directives,
sessionEntry,
sessionStore,
sessionKey,
storePath,
defaultProvider,
defaultModel,
aliasIndex,
allowedModelKeys: modelState.allowedModelKeys,
allowedModelCatalog: modelState.allowedModelCatalog,
resetModelOverride: modelState.resetModelOverride,
provider,
model,
initialModelLabel,
formatModelSwitchEvent,
});
typing.cleanup();
return directiveReply;
}
const persisted = await persistInlineDirectives({
directives,
effectiveModelDirective,
sessionEntry,
sessionStore,
sessionKey,
storePath,
defaultProvider,
defaultModel,
aliasIndex,
allowedModelKeys: modelState.allowedModelKeys,
provider,
model,
initialModelLabel,
formatModelSwitchEvent,
agentCfg,
});
provider = persisted.provider;
model = persisted.model;
contextTokens = persisted.contextTokens;
const perMessageQueueMode =
directives.hasQueueDirective && !directives.queueReset
? directives.queueMode
: undefined;
const perMessageQueueOptions =
directives.hasQueueDirective && !directives.queueReset
? {
debounceMs: directives.debounceMs,
cap: directives.cap,
dropPolicy: directives.dropPolicy,
}
: undefined;
const command = buildCommandContext({
ctx,
cfg,
sessionKey,
isGroup,
triggerBodyNormalized,
});
const isEmptyConfig = Object.keys(cfg).length === 0;
if (
command.isWhatsAppSurface &&
isEmptyConfig &&
command.from &&
command.to &&
command.from !== command.to
) {
typing.cleanup();
return undefined;
}
if (!sessionEntry && command.abortKey) {
abortedLastRun = getAbortMemory(command.abortKey) ?? false;
}
const commandResult = await handleCommands({
ctx,
cfg,
command,
sessionEntry,
sessionStore,
sessionKey,
storePath,
sessionScope,
workspaceDir,
defaultGroupActivation: () => defaultActivation,
resolvedThinkLevel,
resolvedVerboseLevel: resolvedVerboseLevel ?? "off",
resolveDefaultThinkingLevel: modelState.resolveDefaultThinkingLevel,
provider,
model,
contextTokens,
isGroup,
});
if (!commandResult.shouldContinue) {
typing.cleanup();
return commandResult.reply;
}
const isFirstTurnInSession = isNewSession || !systemSent;
const isGroupChat = sessionCtx.ChatType === "group";
const wasMentioned = ctx.WasMentioned === true;
const shouldEagerType = !isGroupChat || wasMentioned;
const shouldInjectGroupIntro =
isGroupChat &&
(isFirstTurnInSession || sessionEntry?.groupActivationNeedsSystemIntro);
const groupIntro = shouldInjectGroupIntro
? buildGroupIntro({
sessionCtx,
sessionEntry,
defaultActivation,
silentToken: SILENT_REPLY_TOKEN,
})
: "";
const baseBody = sessionCtx.BodyStripped ?? sessionCtx.Body ?? "";
const rawBodyTrimmed = (ctx.Body ?? "").trim();
const baseBodyTrimmedRaw = baseBody.trim();
const isBareSessionReset =
isNewSession &&
baseBodyTrimmedRaw.length === 0 &&
rawBodyTrimmed.length > 0;
const baseBodyFinal = isBareSessionReset
? BARE_SESSION_RESET_PROMPT
: baseBody;
const baseBodyTrimmed = baseBodyFinal.trim();
if (!baseBodyTrimmed) {
await typing.onReplyStart();
logVerbose("Inbound body empty after normalization; skipping agent run");
typing.cleanup();
return {
text: "I didn't receive any text in your message. Please resend or add a caption.",
};
}
let prefixedBodyBase = await applySessionHints({
baseBody: baseBodyFinal,
abortedLastRun,
sessionEntry,
sessionStore,
sessionKey,
storePath,
abortKey: command.abortKey,
messageId: sessionCtx.MessageSid,
});
const isGroupSession =
sessionEntry?.chatType === "group" || sessionEntry?.chatType === "room";
const isMainSession =
!isGroupSession && sessionKey === (sessionCfg?.mainKey ?? "main");
prefixedBodyBase = await prependSystemEvents({
cfg,
isMainSession,
isNewSession,
prefixedBodyBase,
});
const skillResult = await ensureSkillSnapshot({
sessionEntry,
sessionStore,
sessionKey,
storePath,
sessionId,
isFirstTurnInSession,
workspaceDir,
cfg,
});
sessionEntry = skillResult.sessionEntry ?? sessionEntry;
systemSent = skillResult.systemSent;
const skillsSnapshot = skillResult.skillsSnapshot;
const prefixedBody = transcribedText
? [prefixedBodyBase, `Transcript:\n${transcribedText}`]
.filter(Boolean)
.join("\n\n")
: prefixedBodyBase;
const mediaNote = ctx.MediaPath?.length
? `[media attached: ${ctx.MediaPath}${ctx.MediaType ? ` (${ctx.MediaType})` : ""}${ctx.MediaUrl ? ` | ${ctx.MediaUrl}` : ""}]`
: undefined;
const mediaReplyHint = mediaNote
? "To send an image back, add a line like: MEDIA:https://example.com/image.jpg (no spaces). Keep caption in the text body."
: undefined;
let commandBody = mediaNote
? [mediaNote, mediaReplyHint, prefixedBody ?? ""]
.filter(Boolean)
.join("\n")
.trim()
: prefixedBody;
if (!resolvedThinkLevel && commandBody) {
const parts = commandBody.split(/\s+/);
const maybeLevel = normalizeThinkLevel(parts[0]);
if (maybeLevel) {
resolvedThinkLevel = maybeLevel;
commandBody = parts.slice(1).join(" ").trim();
}
}
if (!resolvedThinkLevel) {
resolvedThinkLevel = await modelState.resolveDefaultThinkingLevel();
}
const sessionIdFinal = sessionId ?? crypto.randomUUID();
const sessionFile = resolveSessionTranscriptPath(sessionIdFinal);
const queueBodyBase = transcribedText
? [baseBodyFinal, `Transcript:\n${transcribedText}`]
.filter(Boolean)
.join("\n\n")
: baseBodyFinal;
const queuedBody = mediaNote
? [mediaNote, mediaReplyHint, queueBodyBase]
.filter(Boolean)
.join("\n")
.trim()
: queueBodyBase;
const resolvedQueue = resolveQueueSettings({
cfg,
surface: sessionCtx.Surface,
sessionEntry,
inlineMode: perMessageQueueMode,
inlineOptions: perMessageQueueOptions,
});
const sessionLaneKey = resolveEmbeddedSessionLane(
sessionKey ?? sessionIdFinal,
);
const laneSize = getQueueSize(sessionLaneKey);
if (resolvedQueue.mode === "interrupt" && laneSize > 0) {
const cleared = clearCommandLane(sessionLaneKey);
const aborted = abortEmbeddedPiRun(sessionIdFinal);
logVerbose(
`Interrupting ${sessionLaneKey} (cleared ${cleared}, aborted=${aborted})`,
);
}
const queueKey = sessionKey ?? sessionIdFinal;
const isActive = isEmbeddedPiRunActive(sessionIdFinal);
const isStreaming = isEmbeddedPiRunStreaming(sessionIdFinal);
const shouldSteer =
resolvedQueue.mode === "steer" || resolvedQueue.mode === "steer-backlog";
const shouldFollowup =
resolvedQueue.mode === "followup" ||
resolvedQueue.mode === "collect" ||
resolvedQueue.mode === "steer-backlog";
const followupRun = {
prompt: queuedBody,
summaryLine: baseBodyTrimmedRaw,
enqueuedAt: Date.now(),
run: {
sessionId: sessionIdFinal,
sessionKey,
surface: sessionCtx.Surface?.trim().toLowerCase() || undefined,
sessionFile,
workspaceDir,
config: cfg,
skillsSnapshot,
provider,
model,
thinkLevel: resolvedThinkLevel,
verboseLevel: resolvedVerboseLevel,
timeoutMs,
blockReplyBreak: resolvedBlockStreamingBreak,
ownerNumbers:
command.ownerList.length > 0 ? command.ownerList : undefined,
extraSystemPrompt: groupIntro || undefined,
enforceFinalTag: provider === "ollama" ? true : undefined,
},
};
if (shouldEagerType) {
await typing.startTypingLoop();
}
return runReplyAgent({
commandBody,
followupRun,
queueKey,
resolvedQueue,
shouldSteer,
shouldFollowup,
isActive,
isStreaming,
opts,
typing,
sessionEntry,
sessionStore,
sessionKey,
storePath,
defaultModel,
agentCfgContextTokens: agentCfg?.contextTokens,
resolvedVerboseLevel: resolvedVerboseLevel ?? "off",
isNewSession,
blockStreamingEnabled,
blockReplyChunking,
resolvedBlockStreamingBreak,
sessionCtx,
shouldInjectGroupIntro,
});
}