Files
clawdbot/src/auto-reply/reply/agent-runner.ts

1147 lines
40 KiB
TypeScript

import crypto from "node:crypto";
import fs from "node:fs";
import { runCliAgent } from "../../agents/cli-runner.js";
import { getCliSessionId, setCliSessionId } from "../../agents/cli-session.js";
import { lookupContextTokens } from "../../agents/context.js";
import { DEFAULT_CONTEXT_TOKENS } from "../../agents/defaults.js";
import { resolveModelAuthMode } from "../../agents/model-auth.js";
import { runWithModelFallback } from "../../agents/model-fallback.js";
import { isCliProvider } from "../../agents/model-selection.js";
import {
queueEmbeddedPiMessage,
runEmbeddedPiAgent,
} from "../../agents/pi-embedded.js";
import {
isCompactionFailureError,
isContextOverflowError,
} from "../../agents/pi-embedded-helpers.js";
import {
resolveSandboxConfigForAgent,
resolveSandboxRuntimeStatus,
} from "../../agents/sandbox.js";
import { hasNonzeroUsage, type NormalizedUsage } from "../../agents/usage.js";
import type { ClawdbotConfig } from "../../config/config.js";
import {
loadSessionStore,
resolveAgentIdFromSessionKey,
resolveSessionTranscriptPath,
type SessionEntry,
saveSessionStore,
updateSessionStoreEntry,
} from "../../config/sessions.js";
import type { TypingMode } from "../../config/types.js";
import { logVerbose } from "../../globals.js";
import {
emitAgentEvent,
registerAgentRunContext,
} from "../../infra/agent-events.js";
import { isAudioFileName } from "../../media/mime.js";
import { getProviderDock } from "../../providers/dock.js";
import type { ProviderThreadingToolContext } from "../../providers/plugins/types.js";
import { normalizeProviderId } from "../../providers/registry.js";
import { defaultRuntime } from "../../runtime.js";
import { isReasoningTagProvider } from "../../utils/provider-utils.js";
import {
estimateUsageCost,
formatTokenCount,
formatUsd,
resolveModelCostConfig,
} from "../../utils/usage-format.js";
import { stripHeartbeatToken } from "../heartbeat.js";
import type { OriginatingChannelType, TemplateContext } from "../templating.js";
import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js";
import type { GetReplyOptions, ReplyPayload } from "../types.js";
import {
createAudioAsVoiceBuffer,
createBlockReplyPipeline,
} from "./block-reply-pipeline.js";
import { resolveBlockStreamingCoalescing } from "./block-streaming.js";
import { createFollowupRunner } from "./followup-runner.js";
import {
resolveMemoryFlushContextWindowTokens,
resolveMemoryFlushSettings,
shouldRunMemoryFlush,
} from "./memory-flush.js";
import {
enqueueFollowupRun,
type FollowupRun,
type QueueSettings,
scheduleFollowupDrain,
} from "./queue.js";
import { parseReplyDirectives } from "./reply-directives.js";
import {
applyReplyTagsToPayload,
applyReplyThreading,
filterMessagingToolDuplicates,
isRenderablePayload,
shouldSuppressMessagingToolReplies,
} from "./reply-payloads.js";
import {
createReplyToModeFilterForChannel,
resolveReplyToMode,
} from "./reply-threading.js";
import { incrementCompactionCount } from "./session-updates.js";
import type { TypingController } from "./typing.js";
import { createTypingSignaler } from "./typing-mode.js";
const BUN_FETCH_SOCKET_ERROR_RE = /socket connection was closed unexpectedly/i;
const BLOCK_REPLY_SEND_TIMEOUT_MS = 15_000;
/**
* Build provider-specific threading context for tool auto-injection.
*/
function buildThreadingToolContext(params: {
sessionCtx: TemplateContext;
config: ClawdbotConfig | undefined;
hasRepliedRef: { value: boolean } | undefined;
}): ProviderThreadingToolContext {
const { sessionCtx, config, hasRepliedRef } = params;
if (!config) return {};
const provider = normalizeProviderId(sessionCtx.Provider);
if (!provider) return {};
const dock = getProviderDock(provider);
if (!dock?.threading?.buildToolContext) return {};
return (
dock.threading.buildToolContext({
cfg: config,
accountId: sessionCtx.AccountId,
context: {
Provider: sessionCtx.Provider,
To: sessionCtx.To,
ReplyToId: sessionCtx.ReplyToId,
ThreadLabel: sessionCtx.ThreadLabel,
},
hasRepliedRef,
}) ?? {}
);
}
const isBunFetchSocketError = (message?: string) =>
Boolean(message && BUN_FETCH_SOCKET_ERROR_RE.test(message));
const formatBunFetchSocketError = (message: string) => {
const trimmed = message.trim();
return [
"⚠️ LLM connection failed. This could be due to server issues, network problems, or context length exceeded (e.g., with local LLMs like LM Studio). Original error:",
"```",
trimmed || "Unknown error",
"```",
].join("\n");
};
const formatResponseUsageLine = (params: {
usage?: NormalizedUsage;
showCost: boolean;
costConfig?: {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
};
}): string | null => {
const usage = params.usage;
if (!usage) return null;
const input = usage.input;
const output = usage.output;
if (typeof input !== "number" && typeof output !== "number") return null;
const inputLabel = typeof input === "number" ? formatTokenCount(input) : "?";
const outputLabel =
typeof output === "number" ? formatTokenCount(output) : "?";
const cost =
params.showCost && typeof input === "number" && typeof output === "number"
? estimateUsageCost({
usage: {
input,
output,
cacheRead: usage.cacheRead,
cacheWrite: usage.cacheWrite,
},
cost: params.costConfig,
})
: undefined;
const costLabel = params.showCost ? formatUsd(cost) : undefined;
const suffix = costLabel ? ` · est ${costLabel}` : "";
return `Usage: ${inputLabel} in / ${outputLabel} out${suffix}`;
};
const appendUsageLine = (
payloads: ReplyPayload[],
line: string,
): ReplyPayload[] => {
let index = -1;
for (let i = payloads.length - 1; i >= 0; i -= 1) {
if (payloads[i]?.text) {
index = i;
break;
}
}
if (index === -1) return [...payloads, { text: line }];
const existing = payloads[index];
const existingText = existing.text ?? "";
const separator = existingText.endsWith("\n") ? "" : "\n";
const next = {
...existing,
text: `${existingText}${separator}${line}`,
};
const updated = payloads.slice();
updated[index] = next;
return updated;
};
export async function runReplyAgent(params: {
commandBody: string;
followupRun: FollowupRun;
queueKey: string;
resolvedQueue: QueueSettings;
shouldSteer: boolean;
shouldFollowup: boolean;
isActive: boolean;
isStreaming: boolean;
opts?: GetReplyOptions;
typing: TypingController;
sessionEntry?: SessionEntry;
sessionStore?: Record<string, SessionEntry>;
sessionKey?: string;
storePath?: string;
defaultModel: string;
agentCfgContextTokens?: number;
resolvedVerboseLevel: VerboseLevel;
isNewSession: boolean;
blockStreamingEnabled: boolean;
blockReplyChunking?: {
minChars: number;
maxChars: number;
breakPreference: "paragraph" | "newline" | "sentence";
};
resolvedBlockStreamingBreak: "text_end" | "message_end";
sessionCtx: TemplateContext;
shouldInjectGroupIntro: boolean;
typingMode: TypingMode;
}): Promise<ReplyPayload | ReplyPayload[] | undefined> {
const {
commandBody,
followupRun,
queueKey,
resolvedQueue,
shouldSteer,
shouldFollowup,
isActive,
isStreaming,
opts,
typing,
sessionEntry,
sessionStore,
sessionKey,
storePath,
defaultModel,
agentCfgContextTokens,
resolvedVerboseLevel,
isNewSession,
blockStreamingEnabled,
blockReplyChunking,
resolvedBlockStreamingBreak,
sessionCtx,
shouldInjectGroupIntro,
typingMode,
} = params;
let activeSessionEntry = sessionEntry;
const activeSessionStore = sessionStore;
let activeIsNewSession = isNewSession;
const isHeartbeat = opts?.isHeartbeat === true;
const typingSignals = createTypingSignaler({
typing,
mode: typingMode,
isHeartbeat,
});
const shouldEmitToolResult = () => {
if (!sessionKey || !storePath) {
return resolvedVerboseLevel === "on";
}
try {
const store = loadSessionStore(storePath);
const entry = store[sessionKey];
const current = normalizeVerboseLevel(entry?.verboseLevel);
if (current) return current === "on";
} catch {
// ignore store read failures
}
return resolvedVerboseLevel === "on";
};
const pendingToolTasks = new Set<Promise<void>>();
const blockReplyTimeoutMs =
opts?.blockReplyTimeoutMs ?? BLOCK_REPLY_SEND_TIMEOUT_MS;
const hasAudioMedia = (urls?: string[]): boolean =>
Boolean(urls?.some((u) => isAudioFileName(u)));
const isAudioPayload = (payload: ReplyPayload) =>
hasAudioMedia(
payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : undefined),
);
const replyToChannel =
sessionCtx.OriginatingChannel ??
((sessionCtx.Surface ?? sessionCtx.Provider)?.toLowerCase() as
| OriginatingChannelType
| undefined);
const replyToMode = resolveReplyToMode(
followupRun.run.config,
replyToChannel,
sessionCtx.AccountId,
);
const applyReplyToMode = createReplyToModeFilterForChannel(
replyToMode,
replyToChannel,
);
const cfg = followupRun.run.config;
const blockReplyCoalescing =
blockStreamingEnabled && opts?.onBlockReply
? resolveBlockStreamingCoalescing(
cfg,
sessionCtx.Provider,
sessionCtx.AccountId,
blockReplyChunking,
)
: undefined;
const blockReplyPipeline =
blockStreamingEnabled && opts?.onBlockReply
? createBlockReplyPipeline({
onBlockReply: opts.onBlockReply,
timeoutMs: blockReplyTimeoutMs,
coalescing: blockReplyCoalescing,
buffer: createAudioAsVoiceBuffer({ isAudioPayload }),
})
: null;
if (shouldSteer && isStreaming) {
const steered = queueEmbeddedPiMessage(
followupRun.run.sessionId,
followupRun.prompt,
);
if (steered && !shouldFollowup) {
if (activeSessionEntry && activeSessionStore && sessionKey) {
activeSessionEntry.updatedAt = Date.now();
activeSessionStore[sessionKey] = activeSessionEntry;
if (storePath) {
await saveSessionStore(storePath, activeSessionStore);
}
}
typing.cleanup();
return undefined;
}
}
if (isActive && (shouldFollowup || resolvedQueue.mode === "steer")) {
enqueueFollowupRun(queueKey, followupRun, resolvedQueue);
if (activeSessionEntry && activeSessionStore && sessionKey) {
activeSessionEntry.updatedAt = Date.now();
activeSessionStore[sessionKey] = activeSessionEntry;
if (storePath) {
await saveSessionStore(storePath, activeSessionStore);
}
}
typing.cleanup();
return undefined;
}
const memoryFlushSettings = resolveMemoryFlushSettings(cfg);
const memoryFlushWritable = (() => {
if (!sessionKey) return true;
const runtime = resolveSandboxRuntimeStatus({ cfg, sessionKey });
if (!runtime.sandboxed) return true;
const sandboxCfg = resolveSandboxConfigForAgent(cfg, runtime.agentId);
return sandboxCfg.workspaceAccess === "rw";
})();
const shouldFlushMemory =
memoryFlushSettings &&
memoryFlushWritable &&
!isHeartbeat &&
!isCliProvider(followupRun.run.provider, cfg) &&
shouldRunMemoryFlush({
entry:
activeSessionEntry ??
(sessionKey ? activeSessionStore?.[sessionKey] : undefined),
contextWindowTokens: resolveMemoryFlushContextWindowTokens({
modelId: followupRun.run.model ?? defaultModel,
agentCfgContextTokens,
}),
reserveTokensFloor: memoryFlushSettings.reserveTokensFloor,
softThresholdTokens: memoryFlushSettings.softThresholdTokens,
});
if (shouldFlushMemory) {
const flushRunId = crypto.randomUUID();
if (sessionKey) {
registerAgentRunContext(flushRunId, {
sessionKey,
verboseLevel: resolvedVerboseLevel,
});
}
let memoryCompactionCompleted = false;
const flushSystemPrompt = [
followupRun.run.extraSystemPrompt,
memoryFlushSettings.systemPrompt,
]
.filter(Boolean)
.join("\n\n");
try {
await runWithModelFallback({
cfg: followupRun.run.config,
provider: followupRun.run.provider,
model: followupRun.run.model,
run: (provider, model) =>
runEmbeddedPiAgent({
sessionId: followupRun.run.sessionId,
sessionKey,
messageProvider:
sessionCtx.Provider?.trim().toLowerCase() || undefined,
agentAccountId: sessionCtx.AccountId,
// Provider threading context for tool auto-injection
...buildThreadingToolContext({
sessionCtx,
config: followupRun.run.config,
hasRepliedRef: opts?.hasRepliedRef,
}),
sessionFile: followupRun.run.sessionFile,
workspaceDir: followupRun.run.workspaceDir,
agentDir: followupRun.run.agentDir,
config: followupRun.run.config,
skillsSnapshot: followupRun.run.skillsSnapshot,
prompt: memoryFlushSettings.prompt,
extraSystemPrompt: flushSystemPrompt,
ownerNumbers: followupRun.run.ownerNumbers,
enforceFinalTag:
followupRun.run.enforceFinalTag ||
isReasoningTagProvider(provider),
provider,
model,
authProfileId: followupRun.run.authProfileId,
thinkLevel: followupRun.run.thinkLevel,
verboseLevel: followupRun.run.verboseLevel,
reasoningLevel: followupRun.run.reasoningLevel,
bashElevated: followupRun.run.bashElevated,
timeoutMs: followupRun.run.timeoutMs,
runId: flushRunId,
onAgentEvent: (evt) => {
if (evt.stream === "compaction") {
const phase =
typeof evt.data.phase === "string" ? evt.data.phase : "";
const willRetry = Boolean(evt.data.willRetry);
if (phase === "end" && !willRetry) {
memoryCompactionCompleted = true;
}
}
},
}),
});
let memoryFlushCompactionCount =
activeSessionEntry?.compactionCount ??
(sessionKey ? activeSessionStore?.[sessionKey]?.compactionCount : 0) ??
0;
if (memoryCompactionCompleted) {
const nextCount = await incrementCompactionCount({
sessionEntry: activeSessionEntry,
sessionStore: activeSessionStore,
sessionKey,
storePath,
});
if (typeof nextCount === "number") {
memoryFlushCompactionCount = nextCount;
}
}
if (storePath && sessionKey) {
try {
const updatedEntry = await updateSessionStoreEntry({
storePath,
sessionKey,
update: async () => ({
memoryFlushAt: Date.now(),
memoryFlushCompactionCount,
}),
});
if (updatedEntry) {
activeSessionEntry = updatedEntry;
}
} catch (err) {
logVerbose(`failed to persist memory flush metadata: ${String(err)}`);
}
}
} catch (err) {
logVerbose(`memory flush run failed: ${String(err)}`);
}
}
const runFollowupTurn = createFollowupRunner({
opts,
typing,
typingMode,
sessionEntry: activeSessionEntry,
sessionStore: activeSessionStore,
sessionKey,
storePath,
defaultModel,
agentCfgContextTokens,
});
const finalizeWithFollowup = <T>(value: T): T => {
scheduleFollowupDrain(queueKey, runFollowupTurn);
return value;
};
let didLogHeartbeatStrip = false;
let autoCompactionCompleted = false;
let responseUsageLine: string | undefined;
const resetSessionAfterCompactionFailure = async (
reason: string,
): Promise<boolean> => {
if (!sessionKey || !activeSessionStore || !storePath) return false;
const nextSessionId = crypto.randomUUID();
const nextEntry: SessionEntry = {
...(activeSessionStore[sessionKey] ?? activeSessionEntry),
sessionId: nextSessionId,
updatedAt: Date.now(),
systemSent: false,
abortedLastRun: false,
};
const agentId = resolveAgentIdFromSessionKey(sessionKey);
const topicId =
typeof sessionCtx.MessageThreadId === "number"
? sessionCtx.MessageThreadId
: undefined;
const nextSessionFile = resolveSessionTranscriptPath(
nextSessionId,
agentId,
topicId,
);
nextEntry.sessionFile = nextSessionFile;
activeSessionStore[sessionKey] = nextEntry;
try {
await saveSessionStore(storePath, activeSessionStore);
} catch (err) {
defaultRuntime.error(
`Failed to persist session reset after compaction failure (${sessionKey}): ${String(err)}`,
);
}
followupRun.run.sessionId = nextSessionId;
followupRun.run.sessionFile = nextSessionFile;
activeSessionEntry = nextEntry;
activeIsNewSession = true;
defaultRuntime.error(
`Auto-compaction failed (${reason}). Restarting session ${sessionKey} -> ${nextSessionId} and retrying.`,
);
return true;
};
try {
const runId = crypto.randomUUID();
if (sessionKey) {
registerAgentRunContext(runId, {
sessionKey,
verboseLevel: resolvedVerboseLevel,
});
}
let runResult: Awaited<ReturnType<typeof runEmbeddedPiAgent>>;
let fallbackProvider = followupRun.run.provider;
let fallbackModel = followupRun.run.model;
let didResetAfterCompactionFailure = false;
while (true) {
try {
const allowPartialStream = !(
followupRun.run.reasoningLevel === "stream" && opts?.onReasoningStream
);
const normalizeStreamingText = (
payload: ReplyPayload,
): { text?: string; skip: boolean } => {
if (!allowPartialStream) return { skip: true };
let text = payload.text;
if (!isHeartbeat && text?.includes("HEARTBEAT_OK")) {
const stripped = stripHeartbeatToken(text, {
mode: "message",
});
if (stripped.didStrip && !didLogHeartbeatStrip) {
didLogHeartbeatStrip = true;
logVerbose("Stripped stray HEARTBEAT_OK token from reply");
}
if (stripped.shouldSkip && (payload.mediaUrls?.length ?? 0) === 0) {
return { skip: true };
}
text = stripped.text;
}
if (isSilentReplyText(text, SILENT_REPLY_TOKEN)) {
return { skip: true };
}
return { text, skip: false };
};
const handlePartialForTyping = async (
payload: ReplyPayload,
): Promise<string | undefined> => {
const { text, skip } = normalizeStreamingText(payload);
if (skip || !text) return undefined;
await typingSignals.signalTextDelta(text);
return text;
};
const fallbackResult = await runWithModelFallback({
cfg: followupRun.run.config,
provider: followupRun.run.provider,
model: followupRun.run.model,
run: (provider, model) => {
if (isCliProvider(provider, followupRun.run.config)) {
const startedAt = Date.now();
emitAgentEvent({
runId,
stream: "lifecycle",
data: {
phase: "start",
startedAt,
},
});
const cliSessionId = getCliSessionId(
activeSessionEntry,
provider,
);
return runCliAgent({
sessionId: followupRun.run.sessionId,
sessionKey,
sessionFile: followupRun.run.sessionFile,
workspaceDir: followupRun.run.workspaceDir,
config: followupRun.run.config,
prompt: commandBody,
provider,
model,
thinkLevel: followupRun.run.thinkLevel,
timeoutMs: followupRun.run.timeoutMs,
runId,
extraSystemPrompt: followupRun.run.extraSystemPrompt,
ownerNumbers: followupRun.run.ownerNumbers,
cliSessionId,
})
.then((result) => {
emitAgentEvent({
runId,
stream: "lifecycle",
data: {
phase: "end",
startedAt,
endedAt: Date.now(),
},
});
return result;
})
.catch((err) => {
emitAgentEvent({
runId,
stream: "lifecycle",
data: {
phase: "error",
startedAt,
endedAt: Date.now(),
error: err instanceof Error ? err.message : String(err),
},
});
throw err;
});
}
return runEmbeddedPiAgent({
sessionId: followupRun.run.sessionId,
sessionKey,
messageProvider:
sessionCtx.Provider?.trim().toLowerCase() || undefined,
agentAccountId: sessionCtx.AccountId,
// Provider threading context for tool auto-injection
...buildThreadingToolContext({
sessionCtx,
config: followupRun.run.config,
hasRepliedRef: opts?.hasRepliedRef,
}),
sessionFile: followupRun.run.sessionFile,
workspaceDir: followupRun.run.workspaceDir,
agentDir: followupRun.run.agentDir,
config: followupRun.run.config,
skillsSnapshot: followupRun.run.skillsSnapshot,
prompt: commandBody,
extraSystemPrompt: followupRun.run.extraSystemPrompt,
ownerNumbers: followupRun.run.ownerNumbers,
enforceFinalTag:
followupRun.run.enforceFinalTag ||
isReasoningTagProvider(provider),
provider,
model,
authProfileId: followupRun.run.authProfileId,
thinkLevel: followupRun.run.thinkLevel,
verboseLevel: followupRun.run.verboseLevel,
reasoningLevel: followupRun.run.reasoningLevel,
bashElevated: followupRun.run.bashElevated,
timeoutMs: followupRun.run.timeoutMs,
runId,
blockReplyBreak: resolvedBlockStreamingBreak,
blockReplyChunking,
onPartialReply: allowPartialStream
? async (payload) => {
const textForTyping = await handlePartialForTyping(payload);
if (!opts?.onPartialReply || textForTyping === undefined)
return;
await opts.onPartialReply({
text: textForTyping,
mediaUrls: payload.mediaUrls,
});
}
: undefined,
onReasoningStream:
typingSignals.shouldStartOnReasoning || opts?.onReasoningStream
? async (payload) => {
await typingSignals.signalReasoningDelta();
await opts?.onReasoningStream?.({
text: payload.text,
mediaUrls: payload.mediaUrls,
});
}
: undefined,
onAgentEvent: (evt) => {
// Trigger typing when tools start executing
if (evt.stream === "tool") {
const phase =
typeof evt.data.phase === "string" ? evt.data.phase : "";
if (phase === "start") {
void typingSignals.signalToolStart();
}
}
// Track auto-compaction completion
if (evt.stream === "compaction") {
const phase =
typeof evt.data.phase === "string" ? evt.data.phase : "";
const willRetry = Boolean(evt.data.willRetry);
if (phase === "end" && !willRetry) {
autoCompactionCompleted = true;
}
}
},
onBlockReply:
blockStreamingEnabled && opts?.onBlockReply
? async (payload) => {
const { text, skip } = normalizeStreamingText(payload);
const hasPayloadMedia =
(payload.mediaUrls?.length ?? 0) > 0;
if (skip && !hasPayloadMedia) return;
const taggedPayload = applyReplyTagsToPayload(
{
text,
mediaUrls: payload.mediaUrls,
mediaUrl: payload.mediaUrls?.[0],
},
sessionCtx.MessageSid,
);
// Let through payloads with audioAsVoice flag even if empty (need to track it)
if (
!isRenderablePayload(taggedPayload) &&
!payload.audioAsVoice
)
return;
const parsed = parseReplyDirectives(
taggedPayload.text ?? "",
{
currentMessageId: sessionCtx.MessageSid,
silentToken: SILENT_REPLY_TOKEN,
},
);
const cleaned = parsed.text || undefined;
const hasRenderableMedia =
Boolean(taggedPayload.mediaUrl) ||
(taggedPayload.mediaUrls?.length ?? 0) > 0;
// Skip empty payloads unless they have audioAsVoice flag (need to track it)
if (
!cleaned &&
!hasRenderableMedia &&
!payload.audioAsVoice &&
!parsed.audioAsVoice
)
return;
if (parsed.isSilent && !hasRenderableMedia) return;
const blockPayload: ReplyPayload = applyReplyToMode({
...taggedPayload,
text: cleaned,
audioAsVoice: Boolean(
parsed.audioAsVoice || payload.audioAsVoice,
),
replyToId: taggedPayload.replyToId ?? parsed.replyToId,
replyToTag:
taggedPayload.replyToTag || parsed.replyToTag,
replyToCurrent:
taggedPayload.replyToCurrent || parsed.replyToCurrent,
});
void typingSignals
.signalTextDelta(cleaned ?? taggedPayload.text)
.catch((err) => {
logVerbose(
`block reply typing signal failed: ${String(err)}`,
);
});
blockReplyPipeline?.enqueue(blockPayload);
}
: undefined,
onBlockReplyFlush:
blockStreamingEnabled && blockReplyPipeline
? async () => {
await blockReplyPipeline.flush({ force: true });
}
: undefined,
shouldEmitToolResult,
onToolResult: opts?.onToolResult
? (payload) => {
// `subscribeEmbeddedPiSession` may invoke tool callbacks without awaiting them.
// If a tool callback starts typing after the run finalized, we can end up with
// a typing loop that never sees a matching markRunComplete(). Track and drain.
const task = (async () => {
const { text, skip } = normalizeStreamingText(payload);
if (skip) return;
await typingSignals.signalTextDelta(text);
await opts.onToolResult?.({
text,
mediaUrls: payload.mediaUrls,
});
})()
.catch((err) => {
logVerbose(
`tool result delivery failed: ${String(err)}`,
);
})
.finally(() => {
pendingToolTasks.delete(task);
});
pendingToolTasks.add(task);
}
: undefined,
});
},
});
runResult = fallbackResult.result;
fallbackProvider = fallbackResult.provider;
fallbackModel = fallbackResult.model;
break;
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
const isContextOverflow =
isContextOverflowError(message) ||
/context.*overflow|too large|context window/i.test(message);
const isCompactionFailure = isCompactionFailureError(message);
const isSessionCorruption =
/function call turn comes immediately after/i.test(message);
if (
isCompactionFailure &&
!didResetAfterCompactionFailure &&
(await resetSessionAfterCompactionFailure(message))
) {
didResetAfterCompactionFailure = true;
continue;
}
// Auto-recover from Gemini session corruption by resetting the session
if (
isSessionCorruption &&
sessionKey &&
activeSessionStore &&
storePath
) {
const corruptedSessionId = activeSessionEntry?.sessionId;
defaultRuntime.error(
`Session history corrupted (Gemini function call ordering). Resetting session: ${sessionKey}`,
);
try {
// Delete transcript file if it exists
if (corruptedSessionId) {
const transcriptPath =
resolveSessionTranscriptPath(corruptedSessionId);
try {
fs.unlinkSync(transcriptPath);
} catch {
// Ignore if file doesn't exist
}
}
// Remove session entry from store
delete activeSessionStore[sessionKey];
await saveSessionStore(storePath, activeSessionStore);
} catch (cleanupErr) {
defaultRuntime.error(
`Failed to reset corrupted session ${sessionKey}: ${String(cleanupErr)}`,
);
}
return finalizeWithFollowup({
text: "⚠️ Session history was corrupted. I've reset the conversation - please try again!",
});
}
defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
return finalizeWithFollowup({
text: isContextOverflow
? "⚠️ Context overflow - conversation too long. Starting fresh might help!"
: `⚠️ Agent failed before reply: ${message}. Check gateway logs for details.`,
});
}
}
if (
shouldInjectGroupIntro &&
activeSessionEntry &&
activeSessionStore &&
sessionKey &&
activeSessionEntry.groupActivationNeedsSystemIntro
) {
activeSessionEntry.groupActivationNeedsSystemIntro = false;
activeSessionEntry.updatedAt = Date.now();
activeSessionStore[sessionKey] = activeSessionEntry;
if (storePath) {
await saveSessionStore(storePath, activeSessionStore);
}
}
const payloadArray = runResult.payloads ?? [];
if (blockReplyPipeline) {
await blockReplyPipeline.flush({ force: true });
blockReplyPipeline.stop();
}
if (pendingToolTasks.size > 0) {
await Promise.allSettled(pendingToolTasks);
}
// Drain any late tool/block deliveries before deciding there's "nothing to send".
// Otherwise, a late typing trigger (e.g. from a tool callback) can outlive the run and
// keep the typing indicator stuck.
if (payloadArray.length === 0) return finalizeWithFollowup(undefined);
const sanitizedPayloads = isHeartbeat
? payloadArray
: payloadArray.flatMap((payload) => {
let text = payload.text;
if (payload.isError && text && isBunFetchSocketError(text)) {
text = formatBunFetchSocketError(text);
}
if (!text || !text.includes("HEARTBEAT_OK"))
return [{ ...payload, text }];
const stripped = stripHeartbeatToken(text, { mode: "message" });
if (stripped.didStrip && !didLogHeartbeatStrip) {
didLogHeartbeatStrip = true;
logVerbose("Stripped stray HEARTBEAT_OK token from reply");
}
const hasMedia =
Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0;
if (stripped.shouldSkip && !hasMedia) return [];
return [{ ...payload, text: stripped.text }];
});
const replyTaggedPayloads: ReplyPayload[] = applyReplyThreading({
payloads: sanitizedPayloads,
replyToMode,
replyToChannel,
currentMessageId: sessionCtx.MessageSid,
})
.map((payload) => {
const parsed = parseReplyDirectives(payload.text ?? "", {
currentMessageId: sessionCtx.MessageSid,
silentToken: SILENT_REPLY_TOKEN,
});
const mediaUrls = payload.mediaUrls ?? parsed.mediaUrls;
const mediaUrl = payload.mediaUrl ?? parsed.mediaUrl ?? mediaUrls?.[0];
return {
...payload,
text: parsed.text ? parsed.text : undefined,
mediaUrls,
mediaUrl,
replyToId: payload.replyToId ?? parsed.replyToId,
replyToTag: payload.replyToTag || parsed.replyToTag,
replyToCurrent: payload.replyToCurrent || parsed.replyToCurrent,
audioAsVoice: Boolean(payload.audioAsVoice || parsed.audioAsVoice),
};
})
.filter(isRenderablePayload);
// Drop final payloads only when block streaming succeeded end-to-end.
// If streaming aborted (e.g., timeout), fall back to final payloads.
const shouldDropFinalPayloads =
blockStreamingEnabled &&
Boolean(blockReplyPipeline?.didStream()) &&
!blockReplyPipeline?.isAborted();
const messagingToolSentTexts = runResult.messagingToolSentTexts ?? [];
const messagingToolSentTargets = runResult.messagingToolSentTargets ?? [];
const suppressMessagingToolReplies = shouldSuppressMessagingToolReplies({
messageProvider: followupRun.run.messageProvider,
messagingToolSentTargets,
originatingTo: sessionCtx.OriginatingTo ?? sessionCtx.To,
accountId: sessionCtx.AccountId,
});
const dedupedPayloads = filterMessagingToolDuplicates({
payloads: replyTaggedPayloads,
sentTexts: messagingToolSentTexts,
});
const filteredPayloads = shouldDropFinalPayloads
? []
: blockStreamingEnabled
? dedupedPayloads.filter(
(payload) => !blockReplyPipeline?.hasSentPayload(payload),
)
: dedupedPayloads;
const replyPayloads = suppressMessagingToolReplies ? [] : filteredPayloads;
if (replyPayloads.length === 0) return finalizeWithFollowup(undefined);
const shouldSignalTyping = replyPayloads.some((payload) => {
const trimmed = payload.text?.trim();
if (trimmed) return true;
if (payload.mediaUrl) return true;
if (payload.mediaUrls && payload.mediaUrls.length > 0) return true;
return false;
});
if (shouldSignalTyping) {
await typingSignals.signalRunStart();
}
const usage = runResult.meta.agentMeta?.usage;
const modelUsed =
runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
const providerUsed =
runResult.meta.agentMeta?.provider ??
fallbackProvider ??
followupRun.run.provider;
const cliSessionId = isCliProvider(providerUsed, cfg)
? runResult.meta.agentMeta?.sessionId?.trim()
: undefined;
const contextTokensUsed =
agentCfgContextTokens ??
lookupContextTokens(modelUsed) ??
activeSessionEntry?.contextTokens ??
DEFAULT_CONTEXT_TOKENS;
if (storePath && sessionKey) {
if (hasNonzeroUsage(usage)) {
try {
await updateSessionStoreEntry({
storePath,
sessionKey,
update: async (entry) => {
const input = usage.input ?? 0;
const output = usage.output ?? 0;
const promptTokens =
input + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
const patch: Partial<SessionEntry> = {
inputTokens: input,
outputTokens: output,
totalTokens:
promptTokens > 0 ? promptTokens : (usage.total ?? input),
modelProvider: providerUsed,
model: modelUsed,
contextTokens: contextTokensUsed ?? entry.contextTokens,
updatedAt: Date.now(),
};
if (cliSessionId) {
const nextEntry = { ...entry, ...patch };
setCliSessionId(nextEntry, providerUsed, cliSessionId);
return {
...patch,
cliSessionIds: nextEntry.cliSessionIds,
claudeCliSessionId: nextEntry.claudeCliSessionId,
};
}
return patch;
},
});
} catch (err) {
logVerbose(`failed to persist usage update: ${String(err)}`);
}
} else if (modelUsed || contextTokensUsed) {
try {
await updateSessionStoreEntry({
storePath,
sessionKey,
update: async (entry) => {
const patch: Partial<SessionEntry> = {
modelProvider: providerUsed ?? entry.modelProvider,
model: modelUsed ?? entry.model,
contextTokens: contextTokensUsed ?? entry.contextTokens,
updatedAt: Date.now(),
};
if (cliSessionId) {
const nextEntry = { ...entry, ...patch };
setCliSessionId(nextEntry, providerUsed, cliSessionId);
return {
...patch,
cliSessionIds: nextEntry.cliSessionIds,
claudeCliSessionId: nextEntry.claudeCliSessionId,
};
}
return patch;
},
});
} catch (err) {
logVerbose(`failed to persist model/context update: ${String(err)}`);
}
}
}
const responseUsageEnabled =
(activeSessionEntry?.responseUsage ??
(sessionKey
? activeSessionStore?.[sessionKey]?.responseUsage
: undefined)) === "on";
if (responseUsageEnabled && hasNonzeroUsage(usage)) {
const authMode = resolveModelAuthMode(providerUsed, cfg);
const showCost = authMode === "api-key";
const costConfig = showCost
? resolveModelCostConfig({
provider: providerUsed,
model: modelUsed,
config: cfg,
})
: undefined;
const formatted = formatResponseUsageLine({
usage,
showCost,
costConfig,
});
if (formatted) responseUsageLine = formatted;
}
// If verbose is enabled and this is a new session, prepend a session hint.
let finalPayloads = replyPayloads;
if (autoCompactionCompleted) {
const count = await incrementCompactionCount({
sessionEntry: activeSessionEntry,
sessionStore: activeSessionStore,
sessionKey,
storePath,
});
if (resolvedVerboseLevel === "on") {
const suffix = typeof count === "number" ? ` (count ${count})` : "";
finalPayloads = [
{ text: `🧹 Auto-compaction complete${suffix}.` },
...finalPayloads,
];
}
}
if (resolvedVerboseLevel === "on" && activeIsNewSession) {
finalPayloads = [
{ text: `🧭 New session: ${followupRun.run.sessionId}` },
...finalPayloads,
];
}
if (responseUsageLine) {
finalPayloads = appendUsageLine(finalPayloads, responseUsageLine);
}
return finalizeWithFollowup(
finalPayloads.length === 1 ? finalPayloads[0] : finalPayloads,
);
} finally {
blockReplyPipeline?.stop();
typing.markRunComplete();
}
}