import crypto from "node:crypto"; import fs from "node:fs"; import { resolveAgentModelFallbacksOverride } from "../../agents/agent-scope.js"; import { runCliAgent } from "../../agents/cli-runner.js"; import { getCliSessionId } from "../../agents/cli-session.js"; import { runWithModelFallback } from "../../agents/model-fallback.js"; import { isCliProvider } from "../../agents/model-selection.js"; import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js"; import { isCompactionFailureError, isContextOverflowError, sanitizeUserFacingText, } from "../../agents/pi-embedded-helpers.js"; import { resolveAgentIdFromSessionKey, resolveSessionTranscriptPath, type SessionEntry, updateSessionStore, } from "../../config/sessions.js"; import { logVerbose } from "../../globals.js"; import { emitAgentEvent, registerAgentRunContext } from "../../infra/agent-events.js"; import { defaultRuntime } from "../../runtime.js"; import { stripHeartbeatToken } from "../heartbeat.js"; import type { TemplateContext } from "../templating.js"; import type { VerboseLevel } from "../thinking.js"; import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js"; import type { GetReplyOptions, ReplyPayload } from "../types.js"; import { buildThreadingToolContext, resolveEnforceFinalTag } from "./agent-runner-utils.js"; import type { BlockReplyPipeline } from "./block-reply-pipeline.js"; import type { FollowupRun } from "./queue.js"; import { parseReplyDirectives } from "./reply-directives.js"; import { applyReplyTagsToPayload, isRenderablePayload } from "./reply-payloads.js"; import type { TypingSignaler } from "./typing-mode.js"; export type AgentRunLoopResult = | { kind: "success"; runResult: Awaited>; fallbackProvider?: string; fallbackModel?: string; didLogHeartbeatStrip: boolean; autoCompactionCompleted: boolean; } | { kind: "final"; payload: ReplyPayload }; export async function runAgentTurnWithFallback(params: { commandBody: string; followupRun: FollowupRun; sessionCtx: TemplateContext; opts?: GetReplyOptions; typingSignals: TypingSignaler; blockReplyPipeline: BlockReplyPipeline | null; blockStreamingEnabled: boolean; blockReplyChunking?: { minChars: number; maxChars: number; breakPreference: "paragraph" | "newline" | "sentence"; }; resolvedBlockStreamingBreak: "text_end" | "message_end"; applyReplyToMode: (payload: ReplyPayload) => ReplyPayload; shouldEmitToolResult: () => boolean; pendingToolTasks: Set>; resetSessionAfterCompactionFailure: (reason: string) => Promise; isHeartbeat: boolean; sessionKey?: string; getActiveSessionEntry: () => SessionEntry | undefined; activeSessionStore?: Record; storePath?: string; resolvedVerboseLevel: VerboseLevel; }): Promise { let didLogHeartbeatStrip = false; let autoCompactionCompleted = false; const runId = crypto.randomUUID(); if (params.sessionKey) { registerAgentRunContext(runId, { sessionKey: params.sessionKey, verboseLevel: params.resolvedVerboseLevel, }); } let runResult: Awaited>; let fallbackProvider = params.followupRun.run.provider; let fallbackModel = params.followupRun.run.model; let didResetAfterCompactionFailure = false; while (true) { try { const allowPartialStream = !( params.followupRun.run.reasoningLevel === "stream" && params.opts?.onReasoningStream ); const normalizeStreamingText = (payload: ReplyPayload): { text?: string; skip: boolean } => { if (!allowPartialStream) return { skip: true }; let text = payload.text; if (!params.isHeartbeat && text?.includes("HEARTBEAT_OK")) { const stripped = stripHeartbeatToken(text, { mode: "message", }); if (stripped.didStrip && !didLogHeartbeatStrip) { didLogHeartbeatStrip = true; logVerbose("Stripped stray HEARTBEAT_OK token from reply"); } if (stripped.shouldSkip && (payload.mediaUrls?.length ?? 0) === 0) { return { skip: true }; } text = stripped.text; } if (isSilentReplyText(text, SILENT_REPLY_TOKEN)) { return { skip: true }; } if (!text) return { skip: true }; const sanitized = sanitizeUserFacingText(text); if (!sanitized.trim()) return { skip: true }; return { text: sanitized, skip: false }; }; const handlePartialForTyping = async (payload: ReplyPayload): Promise => { const { text, skip } = normalizeStreamingText(payload); if (skip || !text) return undefined; await params.typingSignals.signalTextDelta(text); return text; }; const blockReplyPipeline = params.blockReplyPipeline; const onToolResult = params.opts?.onToolResult; const fallbackResult = await runWithModelFallback({ cfg: params.followupRun.run.config, provider: params.followupRun.run.provider, model: params.followupRun.run.model, fallbacksOverride: resolveAgentModelFallbacksOverride( params.followupRun.run.config, resolveAgentIdFromSessionKey(params.followupRun.run.sessionKey), ), run: (provider, model) => { // Notify that model selection is complete (including after fallback). // This allows responsePrefix template interpolation with the actual model. params.opts?.onModelSelected?.({ provider, model, thinkLevel: params.followupRun.run.thinkLevel, }); if (isCliProvider(provider, params.followupRun.run.config)) { const startedAt = Date.now(); emitAgentEvent({ runId, stream: "lifecycle", data: { phase: "start", startedAt, }, }); const cliSessionId = getCliSessionId(params.getActiveSessionEntry(), provider); return runCliAgent({ sessionId: params.followupRun.run.sessionId, sessionKey: params.sessionKey, sessionFile: params.followupRun.run.sessionFile, workspaceDir: params.followupRun.run.workspaceDir, config: params.followupRun.run.config, prompt: params.commandBody, provider, model, thinkLevel: params.followupRun.run.thinkLevel, timeoutMs: params.followupRun.run.timeoutMs, runId, extraSystemPrompt: params.followupRun.run.extraSystemPrompt, ownerNumbers: params.followupRun.run.ownerNumbers, cliSessionId, }) .then((result) => { emitAgentEvent({ runId, stream: "lifecycle", data: { phase: "end", startedAt, endedAt: Date.now(), }, }); return result; }) .catch((err) => { emitAgentEvent({ runId, stream: "lifecycle", data: { phase: "error", startedAt, endedAt: Date.now(), error: err instanceof Error ? err.message : String(err), }, }); throw err; }); } return runEmbeddedPiAgent({ sessionId: params.followupRun.run.sessionId, sessionKey: params.sessionKey, messageProvider: params.sessionCtx.Provider?.trim().toLowerCase() || undefined, agentAccountId: params.sessionCtx.AccountId, // Provider threading context for tool auto-injection ...buildThreadingToolContext({ sessionCtx: params.sessionCtx, config: params.followupRun.run.config, hasRepliedRef: params.opts?.hasRepliedRef, }), sessionFile: params.followupRun.run.sessionFile, workspaceDir: params.followupRun.run.workspaceDir, agentDir: params.followupRun.run.agentDir, config: params.followupRun.run.config, skillsSnapshot: params.followupRun.run.skillsSnapshot, prompt: params.commandBody, extraSystemPrompt: params.followupRun.run.extraSystemPrompt, ownerNumbers: params.followupRun.run.ownerNumbers, enforceFinalTag: resolveEnforceFinalTag(params.followupRun.run, provider), provider, model, authProfileId: params.followupRun.run.authProfileId, thinkLevel: params.followupRun.run.thinkLevel, verboseLevel: params.followupRun.run.verboseLevel, reasoningLevel: params.followupRun.run.reasoningLevel, bashElevated: params.followupRun.run.bashElevated, timeoutMs: params.followupRun.run.timeoutMs, runId, blockReplyBreak: params.resolvedBlockStreamingBreak, blockReplyChunking: params.blockReplyChunking, onPartialReply: allowPartialStream ? async (payload) => { const textForTyping = await handlePartialForTyping(payload); if (!params.opts?.onPartialReply || textForTyping === undefined) return; await params.opts.onPartialReply({ text: textForTyping, mediaUrls: payload.mediaUrls, }); } : undefined, onAssistantMessageStart: async () => { await params.typingSignals.signalMessageStart(); }, onReasoningStream: params.typingSignals.shouldStartOnReasoning || params.opts?.onReasoningStream ? async (payload) => { await params.typingSignals.signalReasoningDelta(); await params.opts?.onReasoningStream?.({ text: payload.text, mediaUrls: payload.mediaUrls, }); } : undefined, onAgentEvent: (evt) => { // Trigger typing when tools start executing if (evt.stream === "tool") { const phase = typeof evt.data.phase === "string" ? evt.data.phase : ""; if (phase === "start" || phase === "update") { void params.typingSignals.signalToolStart(); } } // Track auto-compaction completion if (evt.stream === "compaction") { const phase = typeof evt.data.phase === "string" ? evt.data.phase : ""; const willRetry = Boolean(evt.data.willRetry); if (phase === "end" && !willRetry) { autoCompactionCompleted = true; } } }, onBlockReply: params.blockStreamingEnabled && params.opts?.onBlockReply ? async (payload) => { const { text, skip } = normalizeStreamingText(payload); const hasPayloadMedia = (payload.mediaUrls?.length ?? 0) > 0; if (skip && !hasPayloadMedia) return; const taggedPayload = applyReplyTagsToPayload( { text, mediaUrls: payload.mediaUrls, mediaUrl: payload.mediaUrls?.[0], }, params.sessionCtx.MessageSid, ); // Let through payloads with audioAsVoice flag even if empty (need to track it) if (!isRenderablePayload(taggedPayload) && !payload.audioAsVoice) return; const parsed = parseReplyDirectives(taggedPayload.text ?? "", { currentMessageId: params.sessionCtx.MessageSid, silentToken: SILENT_REPLY_TOKEN, }); const cleaned = parsed.text || undefined; const hasRenderableMedia = Boolean(taggedPayload.mediaUrl) || (taggedPayload.mediaUrls?.length ?? 0) > 0; // Skip empty payloads unless they have audioAsVoice flag (need to track it) if ( !cleaned && !hasRenderableMedia && !payload.audioAsVoice && !parsed.audioAsVoice ) return; if (parsed.isSilent && !hasRenderableMedia) return; const blockPayload: ReplyPayload = params.applyReplyToMode({ ...taggedPayload, text: cleaned, audioAsVoice: Boolean(parsed.audioAsVoice || payload.audioAsVoice), replyToId: taggedPayload.replyToId ?? parsed.replyToId, replyToTag: taggedPayload.replyToTag || parsed.replyToTag, replyToCurrent: taggedPayload.replyToCurrent || parsed.replyToCurrent, }); void params.typingSignals .signalTextDelta(cleaned ?? taggedPayload.text) .catch((err) => { logVerbose(`block reply typing signal failed: ${String(err)}`); }); params.blockReplyPipeline?.enqueue(blockPayload); } : undefined, onBlockReplyFlush: params.blockStreamingEnabled && blockReplyPipeline ? async () => { await blockReplyPipeline.flush({ force: true }); } : undefined, shouldEmitToolResult: params.shouldEmitToolResult, onToolResult: onToolResult ? (payload) => { // `subscribeEmbeddedPiSession` may invoke tool callbacks without awaiting them. // If a tool callback starts typing after the run finalized, we can end up with // a typing loop that never sees a matching markRunComplete(). Track and drain. const task = (async () => { const { text, skip } = normalizeStreamingText(payload); if (skip) return; await params.typingSignals.signalTextDelta(text); await onToolResult({ text, mediaUrls: payload.mediaUrls, }); })() .catch((err) => { logVerbose(`tool result delivery failed: ${String(err)}`); }) .finally(() => { params.pendingToolTasks.delete(task); }); params.pendingToolTasks.add(task); } : undefined, }); }, }); runResult = fallbackResult.result; fallbackProvider = fallbackResult.provider; fallbackModel = fallbackResult.model; // Some embedded runs surface context overflow as an error payload instead of throwing. // Treat those as a session-level failure and auto-recover by starting a fresh session. const embeddedError = runResult.meta?.error; if ( embeddedError && isContextOverflowError(embeddedError.message) && !didResetAfterCompactionFailure && (await params.resetSessionAfterCompactionFailure(embeddedError.message)) ) { didResetAfterCompactionFailure = true; continue; } break; } catch (err) { const message = err instanceof Error ? err.message : String(err); const isContextOverflow = isContextOverflowError(message) || /context.*overflow|too large|context window/i.test(message); const isCompactionFailure = isCompactionFailureError(message); const isSessionCorruption = /function call turn comes immediately after/i.test(message); const isRoleOrderingError = /incorrect role information|roles must alternate/i.test(message); if ( isCompactionFailure && !didResetAfterCompactionFailure && (await params.resetSessionAfterCompactionFailure(message)) ) { didResetAfterCompactionFailure = true; continue; } // Auto-recover from Gemini session corruption by resetting the session if ( isSessionCorruption && params.sessionKey && params.activeSessionStore && params.storePath ) { const sessionKey = params.sessionKey; const corruptedSessionId = params.getActiveSessionEntry()?.sessionId; defaultRuntime.error( `Session history corrupted (Gemini function call ordering). Resetting session: ${params.sessionKey}`, ); try { // Delete transcript file if it exists if (corruptedSessionId) { const transcriptPath = resolveSessionTranscriptPath(corruptedSessionId); try { fs.unlinkSync(transcriptPath); } catch { // Ignore if file doesn't exist } } // Keep the in-memory snapshot consistent with the on-disk store reset. delete params.activeSessionStore[sessionKey]; // Remove session entry from store using a fresh, locked snapshot. await updateSessionStore(params.storePath, (store) => { delete store[sessionKey]; }); } catch (cleanupErr) { defaultRuntime.error( `Failed to reset corrupted session ${params.sessionKey}: ${String(cleanupErr)}`, ); } return { kind: "final", payload: { text: "⚠️ Session history was corrupted. I've reset the conversation - please try again!", }, }; } defaultRuntime.error(`Embedded agent failed before reply: ${message}`); return { kind: "final", payload: { text: isContextOverflow ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model." : isRoleOrderingError ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session." : `⚠️ Agent failed before reply: ${message}. Check gateway logs for details.`, }, }; } } return { kind: "success", runResult, fallbackProvider, fallbackModel, didLogHeartbeatStrip, autoCompactionCompleted, }; }