clawdbot/src/auto-reply/reply/agent-runner.ts

import crypto from "node:crypto";
import fs from "node:fs";
import { runClaudeCliAgent } from "../../agents/claude-cli-runner.js";
import { lookupContextTokens } from "../../agents/context.js";
import { DEFAULT_CONTEXT_TOKENS } from "../../agents/defaults.js";
import { resolveModelAuthMode } from "../../agents/model-auth.js";
import { runWithModelFallback } from "../../agents/model-fallback.js";
import {
  queueEmbeddedPiMessage,
  runEmbeddedPiAgent,
} from "../../agents/pi-embedded.js";
import { hasNonzeroUsage, type NormalizedUsage } from "../../agents/usage.js";
import {
  loadSessionStore,
  resolveSessionTranscriptPath,
  type SessionEntry,
  saveSessionStore,
} from "../../config/sessions.js";
import type { TypingMode } from "../../config/types.js";
import { logVerbose } from "../../globals.js";
import {
  emitAgentEvent,
  registerAgentRunContext,
} from "../../infra/agent-events.js";
import { defaultRuntime } from "../../runtime.js";
import {
  estimateUsageCost,
  formatTokenCount,
  formatUsd,
  resolveModelCostConfig,
} from "../../utils/usage-format.js";
import { stripHeartbeatToken } from "../heartbeat.js";
import type { OriginatingChannelType, TemplateContext } from "../templating.js";
import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
import { SILENT_REPLY_TOKEN } from "../tokens.js";
import type { GetReplyOptions, ReplyPayload } from "../types.js";
import { extractAudioTag } from "./audio-tags.js";
import { createBlockReplyPipeline } from "./block-reply-pipeline.js";
import { resolveBlockStreamingCoalescing } from "./block-streaming.js";
import { createFollowupRunner } from "./followup-runner.js";
import {
  enqueueFollowupRun,
  type FollowupRun,
  type QueueSettings,
  scheduleFollowupDrain,
} from "./queue.js";
import {
  applyReplyTagsToPayload,
  applyReplyThreading,
  filterMessagingToolDuplicates,
  isRenderablePayload,
  shouldSuppressMessagingToolReplies,
} from "./reply-payloads.js";
import {
  createReplyToModeFilterForChannel,
  resolveReplyToMode,
} from "./reply-threading.js";
import { incrementCompactionCount } from "./session-updates.js";
import type { TypingController } from "./typing.js";
import { createTypingSignaler } from "./typing-mode.js";

const BUN_FETCH_SOCKET_ERROR_RE = /socket connection was closed unexpectedly/i;
const BLOCK_REPLY_SEND_TIMEOUT_MS = 15_000;

const isBunFetchSocketError = (message?: string) =>
  Boolean(message && BUN_FETCH_SOCKET_ERROR_RE.test(message));

const formatBunFetchSocketError = (message: string) => {
  const trimmed = message.trim();
  return [
    "⚠️ LLM connection failed. This could be due to server issues, network problems, or context length exceeded (e.g., with local LLMs like LM Studio). Original error:",
    "```",
    trimmed || "Unknown error",
    "```",
  ].join("\n");
};

const formatResponseUsageLine = (params: {
  usage?: NormalizedUsage;
  showCost: boolean;
  costConfig?: {
    input: number;
    output: number;
    cacheRead: number;
    cacheWrite: number;
  };
}): string | null => {
  const usage = params.usage;
  if (!usage) return null;
  const input = usage.input;
  const output = usage.output;
  if (typeof input !== "number" && typeof output !== "number") return null;
  const inputLabel = typeof input === "number" ? formatTokenCount(input) : "?";
  const outputLabel =
    typeof output === "number" ? formatTokenCount(output) : "?";
  const cost =
    params.showCost && typeof input === "number" && typeof output === "number"
      ? estimateUsageCost({
          usage: {
            input,
            output,
            cacheRead: usage.cacheRead,
            cacheWrite: usage.cacheWrite,
          },
          cost: params.costConfig,
        })
      : undefined;
  const costLabel = params.showCost ? formatUsd(cost) : undefined;
  const suffix = costLabel ? ` · est ${costLabel}` : "";
  return `Usage: ${inputLabel} in / ${outputLabel} out${suffix}`;
};

const appendUsageLine = (
  payloads: ReplyPayload[],
  line: string,
): ReplyPayload[] => {
  let index = -1;
  for (let i = payloads.length - 1; i >= 0; i -= 1) {
    if (payloads[i]?.text) {
      index = i;
      break;
    }
  }
  if (index === -1) return [...payloads, { text: line }];
  const existing = payloads[index];
  const existingText = existing.text ?? "";
  const separator = existingText.endsWith("\n") ? "" : "\n";
  const next = {
    ...existing,
    text: `${existingText}${separator}${line}`,
  };
  const updated = payloads.slice();
  updated[index] = next;
  return updated;
};

export async function runReplyAgent(params: {
  commandBody: string;
  followupRun: FollowupRun;
  queueKey: string;
  resolvedQueue: QueueSettings;
  shouldSteer: boolean;
  shouldFollowup: boolean;
  isActive: boolean;
  isStreaming: boolean;
  opts?: GetReplyOptions;
  typing: TypingController;
  sessionEntry?: SessionEntry;
  sessionStore?: Record<string, SessionEntry>;
  sessionKey?: string;
  storePath?: string;
  defaultModel: string;
  agentCfgContextTokens?: number;
  resolvedVerboseLevel: VerboseLevel;
  isNewSession: boolean;
  blockStreamingEnabled: boolean;
  blockReplyChunking?: {
    minChars: number;
    maxChars: number;
    breakPreference: "paragraph" | "newline" | "sentence";
  };
  resolvedBlockStreamingBreak: "text_end" | "message_end";
  sessionCtx: TemplateContext;
  shouldInjectGroupIntro: boolean;
  typingMode: TypingMode;
}): Promise<ReplyPayload | ReplyPayload[] | undefined> {
  const {
    commandBody,
    followupRun,
    queueKey,
    resolvedQueue,
    shouldSteer,
    shouldFollowup,
    isActive,
    isStreaming,
    opts,
    typing,
    sessionEntry,
    sessionStore,
    sessionKey,
    storePath,
    defaultModel,
    agentCfgContextTokens,
    resolvedVerboseLevel,
    isNewSession,
    blockStreamingEnabled,
    blockReplyChunking,
    resolvedBlockStreamingBreak,
    sessionCtx,
    shouldInjectGroupIntro,
    typingMode,
  } = params;

  const isHeartbeat = opts?.isHeartbeat === true;
  const typingSignals = createTypingSignaler({
    typing,
    mode: typingMode,
    isHeartbeat,
  });

  const shouldEmitToolResult = () => {
    if (!sessionKey || !storePath) {
      return resolvedVerboseLevel === "on";
    }
    try {
      const store = loadSessionStore(storePath);
      const entry = store[sessionKey];
      const current = normalizeVerboseLevel(entry?.verboseLevel);
      if (current) return current === "on";
    } catch {
      // ignore store read failures
    }
    return resolvedVerboseLevel === "on";
  };

  const pendingToolTasks = new Set<Promise<void>>();
  const blockReplyTimeoutMs =
    opts?.blockReplyTimeoutMs ?? BLOCK_REPLY_SEND_TIMEOUT_MS;
  const replyToChannel =
    sessionCtx.OriginatingChannel ??
    ((sessionCtx.Surface ?? sessionCtx.Provider)?.toLowerCase() as
      | OriginatingChannelType
      | undefined);
  const replyToMode = resolveReplyToMode(
    followupRun.run.config,
    replyToChannel,
  );
  const applyReplyToMode = createReplyToModeFilterForChannel(
    replyToMode,
    replyToChannel,
  );
  const cfg = followupRun.run.config;
  const blockReplyCoalescing =
    blockStreamingEnabled && opts?.onBlockReply
      ? resolveBlockStreamingCoalescing(
          cfg,
          sessionCtx.Provider,
          sessionCtx.AccountId,
          blockReplyChunking,
        )
      : undefined;
  const blockReplyPipeline =
    blockStreamingEnabled && opts?.onBlockReply
      ? createBlockReplyPipeline({
          onBlockReply: opts.onBlockReply,
          timeoutMs: blockReplyTimeoutMs,
          coalescing: blockReplyCoalescing,
        })
      : null;

  if (shouldSteer && isStreaming) {
    const steered = queueEmbeddedPiMessage(
      followupRun.run.sessionId,
      followupRun.prompt,
    );
    if (steered && !shouldFollowup) {
      if (sessionEntry && sessionStore && sessionKey) {
        sessionEntry.updatedAt = Date.now();
        sessionStore[sessionKey] = sessionEntry;
        if (storePath) {
          await saveSessionStore(storePath, sessionStore);
        }
      }
      typing.cleanup();
      return undefined;
    }
  }

  if (isActive && (shouldFollowup || resolvedQueue.mode === "steer")) {
    enqueueFollowupRun(queueKey, followupRun, resolvedQueue);
    if (sessionEntry && sessionStore && sessionKey) {
      sessionEntry.updatedAt = Date.now();
      sessionStore[sessionKey] = sessionEntry;
      if (storePath) {
        await saveSessionStore(storePath, sessionStore);
      }
    }
    typing.cleanup();
    return undefined;
  }

  const runFollowupTurn = createFollowupRunner({
    opts,
    typing,
    typingMode,
    sessionEntry,
    sessionStore,
    sessionKey,
    storePath,
    defaultModel,
    agentCfgContextTokens,
  });

  const finalizeWithFollowup = <T>(value: T): T => {
    scheduleFollowupDrain(queueKey, runFollowupTurn);
    return value;
  };

  let didLogHeartbeatStrip = false;
  let autoCompactionCompleted = false;
  let responseUsageLine: string | undefined;
  try {
    const runId = crypto.randomUUID();
    if (sessionKey) {
      registerAgentRunContext(runId, { sessionKey });
    }
    let runResult: Awaited<ReturnType<typeof runEmbeddedPiAgent>>;
    let fallbackProvider = followupRun.run.provider;
    let fallbackModel = followupRun.run.model;
    try {
      const allowPartialStream = !(
        followupRun.run.reasoningLevel === "stream" && opts?.onReasoningStream
      );
      const fallbackResult = await runWithModelFallback({
        cfg: followupRun.run.config,
        provider: followupRun.run.provider,
        model: followupRun.run.model,
        run: (provider, model) => {
          if (provider === "claude-cli") {
            const startedAt = Date.now();
            emitAgentEvent({
              runId,
              stream: "lifecycle",
              data: {
                phase: "start",
                startedAt,
              },
            });
            return runClaudeCliAgent({
              sessionId: followupRun.run.sessionId,
              sessionKey,
              sessionFile: followupRun.run.sessionFile,
              workspaceDir: followupRun.run.workspaceDir,
              config: followupRun.run.config,
              prompt: commandBody,
              provider,
              model,
              thinkLevel: followupRun.run.thinkLevel,
              timeoutMs: followupRun.run.timeoutMs,
              runId,
              extraSystemPrompt: followupRun.run.extraSystemPrompt,
              ownerNumbers: followupRun.run.ownerNumbers,
              claudeSessionId:
                sessionEntry?.claudeCliSessionId?.trim() || undefined,
            })
              .then((result) => {
                emitAgentEvent({
                  runId,
                  stream: "lifecycle",
                  data: {
                    phase: "end",
                    startedAt,
                    endedAt: Date.now(),
                  },
                });
                return result;
              })
              .catch((err) => {
                emitAgentEvent({
                  runId,
                  stream: "lifecycle",
                  data: {
                    phase: "error",
                    startedAt,
                    endedAt: Date.now(),
                    error: err instanceof Error ? err.message : String(err),
                  },
                });
                throw err;
              });
          }
          return runEmbeddedPiAgent({
            sessionId: followupRun.run.sessionId,
            sessionKey,
            messageProvider:
              sessionCtx.Provider?.trim().toLowerCase() || undefined,
            agentAccountId: sessionCtx.AccountId,
            sessionFile: followupRun.run.sessionFile,
            workspaceDir: followupRun.run.workspaceDir,
            agentDir: followupRun.run.agentDir,
            config: followupRun.run.config,
            skillsSnapshot: followupRun.run.skillsSnapshot,
            prompt: commandBody,
            extraSystemPrompt: followupRun.run.extraSystemPrompt,
            ownerNumbers: followupRun.run.ownerNumbers,
            enforceFinalTag: followupRun.run.enforceFinalTag,
            provider,
            model,
            authProfileId: followupRun.run.authProfileId,
            thinkLevel: followupRun.run.thinkLevel,
            verboseLevel: followupRun.run.verboseLevel,
            reasoningLevel: followupRun.run.reasoningLevel,
            bashElevated: followupRun.run.bashElevated,
            timeoutMs: followupRun.run.timeoutMs,
            runId,
            blockReplyBreak: resolvedBlockStreamingBreak,
            blockReplyChunking,
            onPartialReply:
              opts?.onPartialReply && allowPartialStream
                ? async (payload) => {
                    let text = payload.text;
                    if (!isHeartbeat && text?.includes("HEARTBEAT_OK")) {
                      const stripped = stripHeartbeatToken(text, {
                        mode: "message",
                      });
                      if (stripped.didStrip && !didLogHeartbeatStrip) {
                        didLogHeartbeatStrip = true;
                        logVerbose(
                          "Stripped stray HEARTBEAT_OK token from reply",
                        );
                      }
                      if (
                        stripped.shouldSkip &&
                        (payload.mediaUrls?.length ?? 0) === 0
                      ) {
                        return;
                      }
                      text = stripped.text;
                    }
                    await typingSignals.signalTextDelta(text);
                    await opts.onPartialReply?.({
                      text,
                      mediaUrls: payload.mediaUrls,
                    });
                  }
                : undefined,
            onReasoningStream:
              typingSignals.shouldStartOnReasoning || opts?.onReasoningStream
                ? async (payload) => {
                    await typingSignals.signalReasoningDelta();
                    await opts?.onReasoningStream?.({
                      text: payload.text,
                      mediaUrls: payload.mediaUrls,
                    });
                  }
                : undefined,
            onAgentEvent: (evt) => {
              // Trigger typing when tools start executing
              if (evt.stream === "tool") {
                const phase =
                  typeof evt.data.phase === "string" ? evt.data.phase : "";
                if (phase === "start") {
                  void typingSignals.signalToolStart();
                }
              }
              // Track auto-compaction completion
              if (evt.stream === "compaction") {
                const phase =
                  typeof evt.data.phase === "string" ? evt.data.phase : "";
                const willRetry = Boolean(evt.data.willRetry);
                if (phase === "end" && !willRetry) {
                  autoCompactionCompleted = true;
                }
              }
            },
            onBlockReply:
              blockStreamingEnabled && opts?.onBlockReply
                ? async (payload) => {
                    let text = payload.text;
                    if (!isHeartbeat && text?.includes("HEARTBEAT_OK")) {
                      const stripped = stripHeartbeatToken(text, {
                        mode: "message",
                      });
                      if (stripped.didStrip && !didLogHeartbeatStrip) {
                        didLogHeartbeatStrip = true;
                        logVerbose(
                          "Stripped stray HEARTBEAT_OK token from reply",
                        );
                      }
                      const hasMedia = (payload.mediaUrls?.length ?? 0) > 0;
                      if (stripped.shouldSkip && !hasMedia) return;
                      text = stripped.text;
                    }
                    const taggedPayload = applyReplyTagsToPayload(
                      {
                        text,
                        mediaUrls: payload.mediaUrls,
                        mediaUrl: payload.mediaUrls?.[0],
                      },
                      sessionCtx.MessageSid,
                    );
                    if (!isRenderablePayload(taggedPayload)) return;
                    const audioTagResult = extractAudioTag(taggedPayload.text);
                    const cleaned = audioTagResult.cleaned || undefined;
                    const hasMedia =
                      Boolean(taggedPayload.mediaUrl) ||
                      (taggedPayload.mediaUrls?.length ?? 0) > 0;
                    if (!cleaned && !hasMedia) return;
                    if (cleaned?.trim() === SILENT_REPLY_TOKEN && !hasMedia)
                      return;
                    const blockPayload: ReplyPayload = applyReplyToMode({
                      ...taggedPayload,
                      text: cleaned,
                      audioAsVoice: audioTagResult.audioAsVoice,
                    });
                    void typingSignals
                      .signalTextDelta(taggedPayload.text)
                      .catch((err) => {
                        logVerbose(
                          `block reply typing signal failed: ${String(err)}`,
                        );
                      });
                    blockReplyPipeline?.enqueue(blockPayload);
                  }
                : undefined,
            shouldEmitToolResult,
            onToolResult: opts?.onToolResult
              ? (payload) => {
                  // `subscribeEmbeddedPiSession` may invoke tool callbacks without awaiting them.
                  // If a tool callback starts typing after the run finalized, we can end up with
                  // a typing loop that never sees a matching markRunComplete(). Track and drain.
                  const task = (async () => {
                    let text = payload.text;
                    if (!isHeartbeat && text?.includes("HEARTBEAT_OK")) {
                      const stripped = stripHeartbeatToken(text, {
                        mode: "message",
                      });
                      if (stripped.didStrip && !didLogHeartbeatStrip) {
                        didLogHeartbeatStrip = true;
                        logVerbose(
                          "Stripped stray HEARTBEAT_OK token from reply",
                        );
                      }
                      if (
                        stripped.shouldSkip &&
                        (payload.mediaUrls?.length ?? 0) === 0
                      ) {
                        return;
                      }
                      text = stripped.text;
                    }
                    await typingSignals.signalTextDelta(text);
                    await opts.onToolResult?.({
                      text,
                      mediaUrls: payload.mediaUrls,
                    });
                  })()
                    .catch((err) => {
                      logVerbose(`tool result delivery failed: ${String(err)}`);
                    })
                    .finally(() => {
                      pendingToolTasks.delete(task);
                    });
                  pendingToolTasks.add(task);
                }
              : undefined,
          });
        },
      });
      runResult = fallbackResult.result;
      fallbackProvider = fallbackResult.provider;
      fallbackModel = fallbackResult.model;
    } catch (err) {
      const message = err instanceof Error ? err.message : String(err);
      const isContextOverflow =
        /context.*overflow|too large|context window/i.test(message);
      const isSessionCorruption =
        /function call turn comes immediately after/i.test(message);

      // Auto-recover from Gemini session corruption by resetting the session
      if (isSessionCorruption && sessionKey && sessionStore && storePath) {
        const corruptedSessionId = sessionEntry?.sessionId;
        defaultRuntime.error(
          `Session history corrupted (Gemini function call ordering). Resetting session: ${sessionKey}`,
        );

        try {
          // Delete transcript file if it exists
          if (corruptedSessionId) {
            const transcriptPath =
              resolveSessionTranscriptPath(corruptedSessionId);
            try {
              fs.unlinkSync(transcriptPath);
            } catch {
              // Ignore if file doesn't exist
            }
          }

          // Remove session entry from store
          delete sessionStore[sessionKey];
          await saveSessionStore(storePath, sessionStore);
        } catch (cleanupErr) {
          defaultRuntime.error(
            `Failed to reset corrupted session ${sessionKey}: ${String(cleanupErr)}`,
          );
        }

        return finalizeWithFollowup({
          text: "⚠️ Session history was corrupted. I've reset the conversation - please try again!",
        });
      }

      defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
      return finalizeWithFollowup({
        text: isContextOverflow
          ? "⚠️ Context overflow - conversation too long. Starting fresh might help!"
          : `⚠️ Agent failed before reply: ${message}. Check gateway logs for details.`,
      });
    }

    if (
      shouldInjectGroupIntro &&
      sessionEntry &&
      sessionStore &&
      sessionKey &&
      sessionEntry.groupActivationNeedsSystemIntro
    ) {
      sessionEntry.groupActivationNeedsSystemIntro = false;
      sessionEntry.updatedAt = Date.now();
      sessionStore[sessionKey] = sessionEntry;
      if (storePath) {
        await saveSessionStore(storePath, sessionStore);
      }
    }

    const payloadArray = runResult.payloads ?? [];
    if (blockReplyPipeline) {
      await blockReplyPipeline.flush({ force: true });
      blockReplyPipeline.stop();
    }
    if (pendingToolTasks.size > 0) {
      await Promise.allSettled(pendingToolTasks);
    }
    // Drain any late tool/block deliveries before deciding there's "nothing to send".
    // Otherwise, a late typing trigger (e.g. from a tool callback) can outlive the run and
    // keep the typing indicator stuck.
    if (payloadArray.length === 0) return finalizeWithFollowup(undefined);

    const sanitizedPayloads = isHeartbeat
      ? payloadArray
      : payloadArray.flatMap((payload) => {
          let text = payload.text;

          if (payload.isError && text && isBunFetchSocketError(text)) {
            text = formatBunFetchSocketError(text);
          }

          if (!text || !text.includes("HEARTBEAT_OK"))
            return [{ ...payload, text }];
          const stripped = stripHeartbeatToken(text, { mode: "message" });
          if (stripped.didStrip && !didLogHeartbeatStrip) {
            didLogHeartbeatStrip = true;
            logVerbose("Stripped stray HEARTBEAT_OK token from reply");
          }
          const hasMedia =
            Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0;
          if (stripped.shouldSkip && !hasMedia) return [];
          return [{ ...payload, text: stripped.text }];
        });

    const replyTaggedPayloads: ReplyPayload[] = applyReplyThreading({
      payloads: sanitizedPayloads,
      replyToMode,
      replyToChannel,
      currentMessageId: sessionCtx.MessageSid,
    })
      .map((payload) => {
        const audioTagResult = extractAudioTag(payload.text);
        return {
          ...payload,
          text: audioTagResult.cleaned ? audioTagResult.cleaned : undefined,
          audioAsVoice: audioTagResult.audioAsVoice,
        };
      })
      .filter(isRenderablePayload);

    // Drop final payloads only when block streaming succeeded end-to-end.
    // If streaming aborted (e.g., timeout), fall back to final payloads.
    const shouldDropFinalPayloads =
      blockStreamingEnabled &&
      Boolean(blockReplyPipeline?.didStream()) &&
      !blockReplyPipeline?.isAborted();
    const messagingToolSentTexts = runResult.messagingToolSentTexts ?? [];
    const messagingToolSentTargets = runResult.messagingToolSentTargets ?? [];
    const suppressMessagingToolReplies = shouldSuppressMessagingToolReplies({
      messageProvider: followupRun.run.messageProvider,
      messagingToolSentTargets,
      originatingTo: sessionCtx.OriginatingTo ?? sessionCtx.To,
      accountId: sessionCtx.AccountId,
    });
    const dedupedPayloads = filterMessagingToolDuplicates({
      payloads: replyTaggedPayloads,
      sentTexts: messagingToolSentTexts,
    });
    const filteredPayloads = shouldDropFinalPayloads
      ? []
      : blockStreamingEnabled
        ? dedupedPayloads.filter(
            (payload) => !blockReplyPipeline?.hasSentPayload(payload),
          )
        : dedupedPayloads;
    const replyPayloads = suppressMessagingToolReplies ? [] : filteredPayloads;

    if (replyPayloads.length === 0) return finalizeWithFollowup(undefined);

    const shouldSignalTyping = replyPayloads.some((payload) => {
      const trimmed = payload.text?.trim();
      if (trimmed && trimmed !== SILENT_REPLY_TOKEN) return true;
      if (payload.mediaUrl) return true;
      if (payload.mediaUrls && payload.mediaUrls.length > 0) return true;
      return false;
    });
    if (shouldSignalTyping) {
      await typingSignals.signalRunStart();
    }

    const usage = runResult.meta.agentMeta?.usage;
    const modelUsed =
      runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
    const providerUsed =
      runResult.meta.agentMeta?.provider ??
      fallbackProvider ??
      followupRun.run.provider;
    const cliSessionId =
      providerUsed === "claude-cli"
        ? runResult.meta.agentMeta?.sessionId?.trim()
        : undefined;
    const contextTokensUsed =
      agentCfgContextTokens ??
      lookupContextTokens(modelUsed) ??
      sessionEntry?.contextTokens ??
      DEFAULT_CONTEXT_TOKENS;

    if (sessionStore && sessionKey) {
      if (hasNonzeroUsage(usage)) {
        const entry = sessionEntry ?? sessionStore[sessionKey];
        if (entry) {
          const input = usage.input ?? 0;
          const output = usage.output ?? 0;
          const promptTokens =
            input + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
          const nextEntry = {
            ...entry,
            inputTokens: input,
            outputTokens: output,
            totalTokens:
              promptTokens > 0 ? promptTokens : (usage.total ?? input),
            modelProvider: providerUsed,
            model: modelUsed,
            contextTokens: contextTokensUsed ?? entry.contextTokens,
            updatedAt: Date.now(),
          };
          if (cliSessionId) {
            nextEntry.claudeCliSessionId = cliSessionId;
          }
          sessionStore[sessionKey] = nextEntry;
          if (storePath) {
            await saveSessionStore(storePath, sessionStore);
          }
        }
      } else if (modelUsed || contextTokensUsed) {
        const entry = sessionEntry ?? sessionStore[sessionKey];
        if (entry) {
          sessionStore[sessionKey] = {
            ...entry,
            modelProvider: providerUsed ?? entry.modelProvider,
            model: modelUsed ?? entry.model,
            contextTokens: contextTokensUsed ?? entry.contextTokens,
            claudeCliSessionId: cliSessionId ?? entry.claudeCliSessionId,
          };
          if (storePath) {
            await saveSessionStore(storePath, sessionStore);
          }
        }
      }
    }

    const responseUsageEnabled =
      (sessionEntry?.responseUsage ??
        (sessionKey
          ? sessionStore?.[sessionKey]?.responseUsage
          : undefined)) === "on";
    if (responseUsageEnabled && hasNonzeroUsage(usage)) {
      const authMode = resolveModelAuthMode(providerUsed, cfg);
      const showCost = authMode === "api-key";
      const costConfig = showCost
        ? resolveModelCostConfig({
            provider: providerUsed,
            model: modelUsed,
            config: cfg,
          })
        : undefined;
      const formatted = formatResponseUsageLine({
        usage,
        showCost,
        costConfig,
      });
      if (formatted) responseUsageLine = formatted;
    }

    // If verbose is enabled and this is a new session, prepend a session hint.
    let finalPayloads = replyPayloads;
    if (autoCompactionCompleted) {
      const count = await incrementCompactionCount({
        sessionEntry,
        sessionStore,
        sessionKey,
        storePath,
      });
      if (resolvedVerboseLevel === "on") {
        const suffix = typeof count === "number" ? ` (count ${count})` : "";
        finalPayloads = [
          { text: `🧹 Auto-compaction complete${suffix}.` },
          ...finalPayloads,
        ];
      }
    }
    if (resolvedVerboseLevel === "on" && isNewSession) {
      finalPayloads = [
        { text: `🧭 New session: ${followupRun.run.sessionId}` },
        ...finalPayloads,
      ];
    }
    if (responseUsageLine) {
      finalPayloads = appendUsageLine(finalPayloads, responseUsageLine);
    }

    return finalizeWithFollowup(
      finalPayloads.length === 1 ? finalPayloads[0] : finalPayloads,
    );
  } finally {
    blockReplyPipeline?.stop();
    typing.markRunComplete();
  }
}