fix: harden session caching and topic transcripts

This commit is contained in:
Peter Steinberger
2026-01-07 22:38:41 +00:00
parent 8da4f259dd
commit 67d1f61872
13 changed files with 75 additions and 289 deletions

View File

@@ -70,6 +70,7 @@
- Telegram: support forum topics with topic-isolated sessions and message_thread_id routing. Thanks @HazAT, @nachoiacovino, @RandyVentures for PR #321/#333/#334. - Telegram: support forum topics with topic-isolated sessions and message_thread_id routing. Thanks @HazAT, @nachoiacovino, @RandyVentures for PR #321/#333/#334.
- Telegram: add draft streaming via `sendMessageDraft` with `telegram.streamMode`, plus `/reasoning stream` for draft-only reasoning. - Telegram: add draft streaming via `sendMessageDraft` with `telegram.streamMode`, plus `/reasoning stream` for draft-only reasoning.
- Telegram: honor `/activation` session mode for group mention gating and clarify group activation docs. Thanks @julianengel for PR #377. - Telegram: honor `/activation` session mode for group mention gating and clarify group activation docs. Thanks @julianengel for PR #377.
- Telegram: isolate forum topic transcripts per thread and validate Gemini turn ordering in multi-topic sessions. Thanks @hsrvc for PR #407.
- iMessage: ignore disconnect errors during shutdown (avoid unhandled promise rejections). Thanks @antons for PR #359. - iMessage: ignore disconnect errors during shutdown (avoid unhandled promise rejections). Thanks @antons for PR #359.
- Messages: stop defaulting ack reactions to 👀 when identity emoji is missing. - Messages: stop defaulting ack reactions to 👀 when identity emoji is missing.
- Auto-reply: require slash for control commands to avoid false triggers in normal text. - Auto-reply: require slash for control commands to avoid false triggers in normal text.

View File

@@ -16,7 +16,7 @@ All session state is **owned by the gateway** (the “master” Clawdbot). UI cl
## Where state lives ## Where state lives
- On the **gateway host**: - On the **gateway host**:
- Store file: `~/.clawdbot/agents/<agentId>/sessions/sessions.json` (per agent). - Store file: `~/.clawdbot/agents/<agentId>/sessions/sessions.json` (per agent).
- Transcripts: `~/.clawdbot/agents/<agentId>/sessions/<SessionId>.jsonl` (one file per session id). - Transcripts: `~/.clawdbot/agents/<agentId>/sessions/<SessionId>.jsonl` (Telegram topic sessions use `.../<SessionId>-topic-<threadId>.jsonl`).
- The store is a map `sessionKey -> { sessionId, updatedAt, ... }`. Deleting entries is safe; they are recreated on demand. - The store is a map `sessionKey -> { sessionId, updatedAt, ... }`. Deleting entries is safe; they are recreated on demand.
- Group entries may include `displayName`, `provider`, `subject`, `room`, and `space` to label sessions in UIs. - Group entries may include `displayName`, `provider`, `subject`, `room`, and `space` to label sessions in UIs.
- Clawdbot does **not** read legacy Pi/Tau session folders. - Clawdbot does **not** read legacy Pi/Tau session folders.

View File

@@ -1,46 +0,0 @@
#!/bin/bash
# =============================================================================
# Config Lock: Makes clawdbot.json immutable to prevent any writes
# Usage: config-lock.sh [lock|unlock|status]
# =============================================================================
# Source unified environment
source "$(dirname "$0")/env.sh"
lock_config() {
chflags uchg "$CONFIG"
log "🔒 Config LOCKED - write access disabled."
}
unlock_config() {
chflags nouchg "$CONFIG"
log "🔓 Config UNLOCKED - write access enabled."
}
check_status() {
if config_is_locked; then
echo "🔒 Config is LOCKED (immutable)"
return 0
else
echo "🔓 Config is UNLOCKED (writable)"
return 1
fi
}
case "${1:-status}" in
lock)
lock_config
;;
unlock)
unlock_config
;;
status)
check_status
;;
*)
echo "Usage: $0 [lock|unlock|status]"
echo " lock - Make config immutable (no writes allowed)"
echo " unlock - Allow writes (for manual edits)"
echo " status - Show current lock status"
;;
esac

View File

@@ -1,55 +0,0 @@
#!/bin/bash
# =============================================================================
# Config Watchdog: Detects unauthorized changes to model config
# Restores if changed (backup protection if config unlocked)
# =============================================================================
# Source unified environment
source "$(dirname "$0")/env.sh"
EXPECTED_PRIMARY="antigravity/gemini-3-pro-low"
EXPECTED_FALLBACKS='["antigravity/claude-sonnet-4-5","antigravity/gemini-3-flash","antigravity/gemini-3-pro-high","antigravity/claude-opus-4-5","antigravity/claude-sonnet-4-5-thinking","antigravity/claude-opus-4-5-thinking"]'
log "Config watchdog check..."
# If config is locked, just verify and exit
if config_is_locked; then
log "✅ Config is LOCKED (immutable) - no changes possible."
exit 0
fi
# Config is unlocked - check for tampering
log "⚠️ Config is UNLOCKED - checking for unauthorized changes..."
CURRENT_PRIMARY=$(jq -r '.agent.model.primary' "$CONFIG" 2>/dev/null)
CURRENT_FALLBACKS=$(jq -c '.agent.model.fallbacks' "$CONFIG" 2>/dev/null)
CHANGED=false
if [ "$CURRENT_PRIMARY" != "$EXPECTED_PRIMARY" ]; then
log "⚠️ PRIMARY CHANGED: $CURRENT_PRIMARY$EXPECTED_PRIMARY"
CHANGED=true
fi
if [ "$CURRENT_FALLBACKS" != "$EXPECTED_FALLBACKS" ]; then
log "⚠️ FALLBACKS CHANGED!"
CHANGED=true
fi
if [ "$CHANGED" = true ]; then
log "🔧 RESTORING CONFIG..."
jq --arg primary "$EXPECTED_PRIMARY" \
--argjson fallbacks "$EXPECTED_FALLBACKS" \
'.agent.model.primary = $primary | .agent.model.fallbacks = $fallbacks' \
"$CONFIG" > "${CONFIG}.tmp" && mv "${CONFIG}.tmp" "$CONFIG"
if [ $? -eq 0 ]; then
log "✅ Config restored. Re-locking..."
"$SCRIPTS_DIR/config-lock.sh" lock
else
log "❌ Failed to restore config!"
fi
else
log "✅ Config OK - re-locking..."
"$SCRIPTS_DIR/config-lock.sh" lock
fi

View File

@@ -1,30 +0,0 @@
#!/bin/bash
# =============================================================================
# Unified environment for all clawdbot scripts
# Source this at the top of every script: source "$(dirname "$0")/env.sh"
# =============================================================================
# Comprehensive PATH for cron environment
export PATH="/usr/sbin:/usr/bin:/bin:/opt/homebrew/bin:$HOME/.bun/bin:/usr/local/bin:$PATH"
# Core directories
export CLAWDBOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." 2>/dev/null && pwd)"
export SCRIPTS_DIR="$CLAWDBOT_DIR/scripts"
export CONFIG="$HOME/.clawdbot/clawdbot.json"
export LOG_DIR="$HOME/.clawdbot/logs"
# Gateway settings
export PORT=18789
# Ensure log directory exists
mkdir -p "$LOG_DIR" 2>/dev/null
# Helper: Check if config is locked
config_is_locked() {
ls -lO "$CONFIG" 2>/dev/null | grep -q "uchg"
}
# Helper: Log with timestamp
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
}

View File

@@ -1,25 +0,0 @@
#!/bin/bash
# =============================================================================
# Keep-Alive: Ensures clawdbot gateway is always running
# Runs via cron every 2 minutes
# =============================================================================
# Source unified environment
source "$(dirname "$0")/env.sh"
log "Checking clawdbot status..."
# Check if gateway is running (port check)
if lsof -i :$PORT > /dev/null 2>&1; then
# Additional health check via HTTP
if curl -sf "http://127.0.0.1:$PORT/health" > /dev/null 2>&1; then
log "✅ Status: ONLINE (Port $PORT active, health OK)"
else
log "⚠️ Status: DEGRADED (Port $PORT active, but health check failed)"
fi
exit 0
else
log "❌ Status: OFFLINE (Port $PORT closed). Initiating restart..."
"$SCRIPTS_DIR/models.sh" restart
log "Restart command executed."
fi

View File

@@ -1,82 +0,0 @@
#!/bin/bash
# =============================================================================
# Models: Gateway management and model config display
# Usage: ./scripts/models.sh [edit|restart|show]
# =============================================================================
# Source unified environment
source "$(dirname "$0")/env.sh"
wait_for_port() {
local port=$1
for i in {1..10}; do
if ! lsof -i :$port > /dev/null 2>&1; then
return 0
fi
echo "Waiting for port $port to clear... ($i/10)"
sleep 1
done
return 1
}
restart_gateway() {
log "Restarting gateway..."
# Try graceful kill first
pkill -f "bun.*gateway --port $PORT" 2>/dev/null
pkill -f "node.*gateway.*$PORT" 2>/dev/null
pkill -f "tsx.*gateway.*$PORT" 2>/dev/null
if ! wait_for_port $PORT; then
log "Port $PORT still in use. Forcing cleanup..."
lsof -ti :$PORT | xargs kill -9 2>/dev/null
sleep 1
fi
# Start gateway in background
cd "$CLAWDBOT_DIR" && pnpm clawdbot gateway --port $PORT &
# Verify start
sleep 3
if lsof -i :$PORT > /dev/null 2>&1; then
log "✅ Gateway restarted successfully on port $PORT."
# Auto-lock config after successful restart
"$SCRIPTS_DIR/config-lock.sh" lock
return 0
else
log "❌ Gateway failed to start. Check logs."
return 1
fi
}
case "${1:-show}" in
edit)
# Unlock config for editing
if config_is_locked; then
"$SCRIPTS_DIR/config-lock.sh" unlock
fi
${EDITOR:-nano} "$CONFIG"
echo "Config saved."
restart_gateway
;;
restart)
restart_gateway
;;
show)
echo "=== Model Priority ==="
echo "Primary: $(jq -r '.agent.model.primary' "$CONFIG")"
echo ""
echo "Fallbacks:"
jq -r '.agent.model.fallbacks[]' "$CONFIG" | nl
echo ""
echo "Config Lock: $(config_is_locked && echo '🔒 LOCKED' || echo '🔓 UNLOCKED')"
;;
*)
echo "Usage: $0 [edit|restart|show]"
echo " show - Display current model priority (default)"
echo " edit - Edit config and restart gateway"
echo " restart - Just restart gateway"
;;
esac

View File

@@ -278,14 +278,12 @@ export function pickFallbackThinkingLevel(params: {
* Gemini requires strict alternating user→assistant→tool→user pattern. * Gemini requires strict alternating user→assistant→tool→user pattern.
* This function: * This function:
* 1. Detects consecutive messages from the same role * 1. Detects consecutive messages from the same role
* 2. Merges consecutive assistant/tool messages together * 2. Merges consecutive assistant messages together
* 3. Preserves metadata (usage, stopReason, etc.) * 3. Preserves metadata (usage, stopReason, etc.)
* *
* This prevents the "function call turn comes immediately after a user turn or after a function response turn" error. * This prevents the "function call turn comes immediately after a user turn or after a function response turn" error.
*/ */
export function validateGeminiTurns( export function validateGeminiTurns(messages: AgentMessage[]): AgentMessage[] {
messages: AgentMessage[],
): AgentMessage[] {
if (!Array.isArray(messages) || messages.length === 0) { if (!Array.isArray(messages) || messages.length === 0) {
return messages; return messages;
} }
@@ -299,9 +297,7 @@ export function validateGeminiTurns(
continue; continue;
} }
const msgRole = (msg as { role?: unknown }).role as const msgRole = (msg as { role?: unknown }).role as string | undefined;
| string
| undefined;
if (!msgRole) { if (!msgRole) {
result.push(msg); result.push(msg);
continue; continue;

View File

@@ -334,7 +334,6 @@ const EMBEDDED_RUN_WAITERS = new Map<string, Set<EmbeddedRunWaiter>>();
type SessionManagerCacheEntry = { type SessionManagerCacheEntry = {
sessionFile: string; sessionFile: string;
loadedAt: number; loadedAt: number;
lastAccessAt: number;
}; };
const SESSION_MANAGER_CACHE = new Map<string, SessionManagerCacheEntry>(); const SESSION_MANAGER_CACHE = new Map<string, SessionManagerCacheEntry>();
@@ -362,7 +361,6 @@ function trackSessionManagerAccess(sessionFile: string): void {
SESSION_MANAGER_CACHE.set(sessionFile, { SESSION_MANAGER_CACHE.set(sessionFile, {
sessionFile, sessionFile,
loadedAt: now, loadedAt: now,
lastAccessAt: now,
}); });
} }
@@ -380,9 +378,14 @@ async function prewarmSessionFile(sessionFile: string): Promise<void> {
if (isSessionManagerCached(sessionFile)) return; if (isSessionManagerCached(sessionFile)) return;
try { try {
// Touch the file to bring it into OS page cache // Read a small chunk to encourage OS page cache warmup.
// This is much faster than letting SessionManager.open() do it cold const handle = await fs.open(sessionFile, "r");
await fs.stat(sessionFile); try {
const buffer = Buffer.alloc(4096);
await handle.read(buffer, 0, buffer.length, 0);
} finally {
await handle.close();
}
trackSessionManagerAccess(sessionFile); trackSessionManagerAccess(sessionFile);
} catch { } catch {
// File doesn't exist yet, SessionManager will create it // File doesn't exist yet, SessionManager will create it

View File

@@ -1,12 +1,12 @@
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
import fs from "node:fs"; import fs from "node:fs";
import path from "node:path";
import os from "node:os"; import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { import {
loadSessionStore,
saveSessionStore,
clearSessionStoreCacheForTest, clearSessionStoreCacheForTest,
loadSessionStore,
type SessionEntry, type SessionEntry,
saveSessionStore,
} from "./sessions.js"; } from "./sessions.js";
describe("Session Store Cache", () => { describe("Session Store Cache", () => {
@@ -52,7 +52,7 @@ describe("Session Store Cache", () => {
expect(loaded).toEqual(testStore); expect(loaded).toEqual(testStore);
}); });
it("should cache session store on first load", async () => { it("should cache session store on first load when file is unchanged", async () => {
const testStore: Record<string, SessionEntry> = { const testStore: Record<string, SessionEntry> = {
"session:1": { "session:1": {
sessionId: "id-1", sessionId: "id-1",
@@ -63,26 +63,20 @@ describe("Session Store Cache", () => {
await saveSessionStore(storePath, testStore); await saveSessionStore(storePath, testStore);
const readSpy = vi.spyOn(fs, "readFileSync");
// First load - from disk // First load - from disk
const loaded1 = loadSessionStore(storePath); const loaded1 = loadSessionStore(storePath);
expect(loaded1).toEqual(testStore); expect(loaded1).toEqual(testStore);
// Modify file on disk // Second load - should return cached data (no extra disk read)
const modifiedStore: Record<string, SessionEntry> = {
"session:2": {
sessionId: "id-2",
updatedAt: Date.now(),
displayName: "Test Session 2",
},
};
fs.writeFileSync(storePath, JSON.stringify(modifiedStore, null, 2));
// Second load - should still return cached data (not the modified file)
const loaded2 = loadSessionStore(storePath); const loaded2 = loadSessionStore(storePath);
expect(loaded2).toEqual(testStore); // Should be original, not modified expect(loaded2).toEqual(testStore);
expect(readSpy).toHaveBeenCalledTimes(1);
readSpy.mockRestore();
}); });
it("should cache multiple calls to the same store path", async () => { it("should refresh cache when store file changes on disk", async () => {
const testStore: Record<string, SessionEntry> = { const testStore: Record<string, SessionEntry> = {
"session:1": { "session:1": {
sessionId: "id-1", sessionId: "id-1",
@@ -98,12 +92,16 @@ describe("Session Store Cache", () => {
expect(loaded1).toEqual(testStore); expect(loaded1).toEqual(testStore);
// Modify file on disk while cache is valid // Modify file on disk while cache is valid
fs.writeFileSync(storePath, JSON.stringify({ "session:99": { sessionId: "id-99", updatedAt: Date.now() } }, null, 2)); const modifiedStore: Record<string, SessionEntry> = {
"session:99": { sessionId: "id-99", updatedAt: Date.now() },
};
fs.writeFileSync(storePath, JSON.stringify(modifiedStore, null, 2));
const bump = new Date(Date.now() + 2000);
fs.utimesSync(storePath, bump, bump);
// Second load - should still return original cached data // Second load - should return the updated store
const loaded2 = loadSessionStore(storePath); const loaded2 = loadSessionStore(storePath);
expect(loaded2).toEqual(testStore); expect(loaded2).toEqual(modifiedStore);
expect(loaded2).not.toHaveProperty("session:99");
}); });
it("should invalidate cache on write", async () => { it("should invalidate cache on write", async () => {

View File

@@ -8,6 +8,7 @@ import {
deriveSessionKey, deriveSessionKey,
loadSessionStore, loadSessionStore,
resolveSessionKey, resolveSessionKey,
resolveSessionTranscriptPath,
resolveSessionTranscriptsDir, resolveSessionTranscriptsDir,
updateLastRoute, updateLastRoute,
} from "./sessions.js"; } from "./sessions.js";
@@ -147,4 +148,21 @@ describe("sessions", () => {
); );
expect(dir).toBe("/legacy/state/agents/main/sessions"); expect(dir).toBe("/legacy/state/agents/main/sessions");
}); });
it("includes topic ids in session transcript filenames", () => {
const prev = process.env.CLAWDBOT_STATE_DIR;
process.env.CLAWDBOT_STATE_DIR = "/custom/state";
try {
const sessionFile = resolveSessionTranscriptPath("sess-1", "main", 123);
expect(sessionFile).toBe(
"/custom/state/agents/main/sessions/sess-1-topic-123.jsonl",
);
} finally {
if (prev === undefined) {
delete process.env.CLAWDBOT_STATE_DIR;
} else {
process.env.CLAWDBOT_STATE_DIR = prev;
}
}
});
}); });

View File

@@ -24,6 +24,7 @@ type SessionStoreCacheEntry = {
store: Record<string, SessionEntry>; store: Record<string, SessionEntry>;
loadedAt: number; loadedAt: number;
storePath: string; storePath: string;
mtimeMs?: number;
}; };
const SESSION_STORE_CACHE = new Map<string, SessionStoreCacheEntry>(); const SESSION_STORE_CACHE = new Map<string, SessionStoreCacheEntry>();
@@ -52,6 +53,14 @@ function isSessionStoreCacheValid(entry: SessionStoreCacheEntry): boolean {
return now - entry.loadedAt <= ttl; return now - entry.loadedAt <= ttl;
} }
function getSessionStoreMtimeMs(storePath: string): number | undefined {
try {
return fs.statSync(storePath).mtimeMs;
} catch {
return undefined;
}
}
function invalidateSessionStoreCache(storePath: string): void { function invalidateSessionStoreCache(storePath: string): void {
SESSION_STORE_CACHE.delete(storePath); SESSION_STORE_CACHE.delete(storePath);
} }
@@ -180,19 +189,22 @@ export function resolveSessionTranscriptPath(
agentId?: string, agentId?: string,
topicId?: number, topicId?: number,
): string { ): string {
const fileName = topicId !== undefined ? `${sessionId}-topic-${topicId}.jsonl` : `${sessionId}.jsonl`; const fileName =
topicId !== undefined
? `${sessionId}-topic-${topicId}.jsonl`
: `${sessionId}.jsonl`;
return path.join(resolveAgentSessionsDir(agentId), fileName); return path.join(resolveAgentSessionsDir(agentId), fileName);
} }
export function resolveSessionFilePath( export function resolveSessionFilePath(
sessionId: string, sessionId: string,
entry?: SessionEntry, entry?: SessionEntry,
opts?: { agentId?: string }, opts?: { agentId?: string; topicId?: number },
): string { ): string {
const candidate = entry?.sessionFile?.trim(); const candidate = entry?.sessionFile?.trim();
return candidate return candidate
? candidate ? candidate
: resolveSessionTranscriptPath(sessionId, opts?.agentId); : resolveSessionTranscriptPath(sessionId, opts?.agentId, opts?.topicId);
} }
export function resolveStorePath(store?: string, opts?: { agentId?: string }) { export function resolveStorePath(store?: string, opts?: { agentId?: string }) {
@@ -390,19 +402,25 @@ export function loadSessionStore(
if (isSessionStoreCacheEnabled()) { if (isSessionStoreCacheEnabled()) {
const cached = SESSION_STORE_CACHE.get(storePath); const cached = SESSION_STORE_CACHE.get(storePath);
if (cached && isSessionStoreCacheValid(cached)) { if (cached && isSessionStoreCacheValid(cached)) {
// Return a shallow copy to prevent external mutations affecting cache const currentMtimeMs = getSessionStoreMtimeMs(storePath);
return { ...cached.store }; if (currentMtimeMs === cached.mtimeMs) {
// Return a shallow copy to prevent external mutations affecting cache
return { ...cached.store };
}
invalidateSessionStoreCache(storePath);
} }
} }
// Cache miss or disabled - load from disk // Cache miss or disabled - load from disk
let store: Record<string, SessionEntry> = {}; let store: Record<string, SessionEntry> = {};
let mtimeMs = getSessionStoreMtimeMs(storePath);
try { try {
const raw = fs.readFileSync(storePath, "utf-8"); const raw = fs.readFileSync(storePath, "utf-8");
const parsed = JSON5.parse(raw); const parsed = JSON5.parse(raw);
if (parsed && typeof parsed === "object") { if (parsed && typeof parsed === "object") {
store = parsed as Record<string, SessionEntry>; store = parsed as Record<string, SessionEntry>;
} }
mtimeMs = getSessionStoreMtimeMs(storePath) ?? mtimeMs;
} catch { } catch {
// ignore missing/invalid store; we'll recreate it // ignore missing/invalid store; we'll recreate it
} }
@@ -413,6 +431,7 @@ export function loadSessionStore(
store: { ...store }, // Store a copy to prevent external mutations store: { ...store }, // Store a copy to prevent external mutations
loadedAt: Date.now(), loadedAt: Date.now(),
storePath, storePath,
mtimeMs,
}); });
} }

View File

@@ -31,15 +31,6 @@ export type SessionSendPolicyConfig = {
rules?: SessionSendPolicyRule[]; rules?: SessionSendPolicyRule[];
}; };
export type SessionCacheConfig = {
/** Enable session store caching (default: true). Set to false to disable. */
enabled?: boolean;
/** Session store cache TTL in milliseconds (default: 45000 = 45s). Set to 0 to disable. */
storeTtlMs?: number;
/** SessionManager cache TTL in milliseconds (default: 45000 = 45s). Set to 0 to disable. */
managerTtlMs?: number;
};
export type SessionConfig = { export type SessionConfig = {
scope?: SessionScope; scope?: SessionScope;
resetTriggers?: string[]; resetTriggers?: string[];
@@ -50,8 +41,6 @@ export type SessionConfig = {
typingMode?: TypingMode; typingMode?: TypingMode;
mainKey?: string; mainKey?: string;
sendPolicy?: SessionSendPolicyConfig; sendPolicy?: SessionSendPolicyConfig;
/** Session caching configuration. */
cache?: SessionCacheConfig;
agentToAgent?: { agentToAgent?: {
/** Max ping-pong turns between requester/target (05). Default: 5. */ /** Max ping-pong turns between requester/target (05). Default: 5. */
maxPingPongTurns?: number; maxPingPongTurns?: number;