From 565944ec7129fdc77ef1b2349cf829a5959931ce Mon Sep 17 00:00:00 2001 From: Tobias Bischoff <711564+tobiasbischoff@users.noreply.github.com> Date: Thu, 22 Jan 2026 10:04:56 +0100 Subject: [PATCH] fix(auth): skip auth profiles in cooldown during selection and rotation Auth profiles in cooldown (due to rate limiting) were being attempted, causing unnecessary retries and delays. This fix ensures: 1. Initial profile selection skips profiles in cooldown 2. Profile rotation (after failures) skips cooldown profiles 3. Clear error message when all profiles are unavailable Tests added: - Skips profiles in cooldown during initial selection - Skips profiles in cooldown when rotating after failure Fixes #1316 --- CHANGELOG.md | 81 ++++------ ...ded-pi-agent.auth-profile-rotation.test.ts | 139 ++++++++++++++++++ src/agents/pi-embedded-runner/run.ts | 20 ++- .../bot-message-context.sender-prefix.test.ts | 92 ++++++++++++ 4 files changed, 277 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66d8bf81f..284029c25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,12 +2,9 @@ Docs: https://docs.clawd.bot -## 2026.1.22 (unreleased) +## 2026.1.22 ### Changes -- Highlight: Mattermost plugin channel support with pairing + allowlist gating. (#1428) Thanks @damoahdominic. -- Highlight: OpenProse plugin skill pack with `/prose` slash command, plugin-shipped skills, and docs. https://docs.clawd.bot/prose -- TUI: run local shell commands with `!` after per-session consent, and warn when local exec stays disabled. (#1463) Thanks @vignesh07. - Highlight: Lobster optional plugin tool for typed workflows + approval gates. https://docs.clawd.bot/tools/lobster - Lobster: allow workflow file args via `argsJson` in the plugin tool. https://docs.clawd.bot/tools/lobster - Agents: add identity avatar config support and Control UI avatar rendering. (#1329, #1424) Thanks @dlauer. @@ -20,7 +17,6 @@ Docs: https://docs.clawd.bot - Signal: add typing indicators and DM read receipts via signal-cli. - MSTeams: add file uploads, adaptive cards, and attachment handling improvements. (#1410) Thanks @Evizero. - CLI: add `clawdbot update wizard` for interactive channel selection and restart prompts. https://docs.clawd.bot/cli/update -- macOS: add attach-only debug toggle + `--attach-only`/`--no-launchd` flag to skip launchd installs. ### Breaking - **BREAKING:** Envelope and system event timestamps now default to host-local time (was UTC) so agents don’t have to constantly convert. @@ -36,6 +32,8 @@ Docs: https://docs.clawd.bot - Doctor: honor CLAWDBOT_GATEWAY_TOKEN for auth checks and security audit token reuse. (#1448) Thanks @azade-c. - Agents: make tool summaries more readable and only show optional params when set. - Agents: centralize transcript sanitization in the runner; keep tags and error turns intact. +- Auth: skip auth profiles in cooldown during initial selection and rotation. (#1316) Thanks @odrobnik. +- Agents/TUI: honor user-pinned auth profiles during cooldown and preserve search picker ranking. (#1432) Thanks @tobiasbischoff. - Mattermost (plugin): enforce pairing/allowlist gating, keep @username targets, and clarify plugin-only docs. (#1428) Thanks @damoahdominic. - Docs: fix gog auth services example to include docs scope. (#1454) Thanks @zerone0x. - macOS: prefer linked channels in gateway summary to avoid false “not linked” status. @@ -49,63 +47,38 @@ Docs: https://docs.clawd.bot ## 2026.1.21 -### Highlights -- Lobster optional plugin tool for typed workflows + approval gates. https://docs.clawd.bot/tools/lobster -- Custom assistant identity + avatars in the Control UI. https://docs.clawd.bot/cli/agents https://docs.clawd.bot/web/control-ui -- Cache optimizations: cache-ttl pruning + defaults reduce token spend on cold requests. https://docs.clawd.bot/concepts/session-pruning -- Exec approvals + elevated ask/full modes. https://docs.clawd.bot/tools/exec-approvals https://docs.clawd.bot/tools/elevated -- Signal typing/read receipts + MSTeams attachments. https://docs.clawd.bot/channels/signal https://docs.clawd.bot/channels/msteams -- `/models` UX refresh + `clawdbot update wizard`. https://docs.clawd.bot/cli/models https://docs.clawd.bot/cli/update - ### Changes -- Highlight: Lobster optional plugin tool for typed workflows + approval gates. https://docs.clawd.bot/tools/lobster (#1152) Thanks @vignesh07. -- Agents/UI: add identity avatar config support and Control UI avatar rendering. (#1329, #1424) Thanks @dlauer. https://docs.clawd.bot/gateway/configuration https://docs.clawd.bot/cli/agents -- Control UI: add custom assistant identity support and per-session identity display. (#1420) Thanks @robbyczgw-cla. https://docs.clawd.bot/web/control-ui -- CLI: add `clawdbot update wizard` with interactive channel selection + restart prompts, plus preflight checks before rebasing. https://docs.clawd.bot/cli/update -- Models/Commands: add `/models`, improve `/model` listing UX, and expand `clawdbot models` paging. (#1398) Thanks @vignesh07. https://docs.clawd.bot/cli/models -- CLI: move gateway service commands under `clawdbot gateway`, flatten node service commands under `clawdbot node`, and add `gateway probe` for reachability. https://docs.clawd.bot/cli/gateway https://docs.clawd.bot/cli/node -- Exec: add elevated ask/full modes, tighten allowlist gating, and render approvals tables on write. https://docs.clawd.bot/tools/elevated https://docs.clawd.bot/tools/exec-approvals -- Exec approvals: default to local host, add gateway/node targeting + target details, support wildcard agent allowlists, and tighten allowlist parsing/safe bins. https://docs.clawd.bot/cli/approvals https://docs.clawd.bot/tools/exec-approvals -- Heartbeat: allow explicit session keys and active hours. (#1256) Thanks @zknicker. https://docs.clawd.bot/gateway/heartbeat -- Sessions: add per-channel idle durations via `sessions.channelIdleMinutes`. (#1353) Thanks @cash-echo-bot. -- Nodes: run exec-style, expose PATH in status/describe, and bootstrap PATH for node-host execution. https://docs.clawd.bot/cli/node -- Cache: add `cache.ttlPrune` mode and auth-aware defaults for cache TTL behavior. -- Queue: add per-channel debounce overrides for auto-reply. https://docs.clawd.bot/concepts/queue -- Discord: add wildcard channel config support. (#1334) Thanks @pvoo. https://docs.clawd.bot/channels/discord -- Signal: add typing indicators and DM read receipts via signal-cli. https://docs.clawd.bot/channels/signal -- MSTeams: add file uploads, adaptive cards, and attachment handling improvements. (#1410) Thanks @Evizero. https://docs.clawd.bot/channels/msteams -- Onboarding: remove the run setup-token auth option (paste setup-token or reuse CLI creds instead). -- macOS: refresh Settings (location access in Permissions, connection mode in menu, remove CLI install UI). -- Diagnostics: add cache trace config for debugging. (#1370) Thanks @parubets. -- Docs: Lobster guides + org URL updates, /model allowlist troubleshooting, Gmail message search examples, gateway.mode troubleshooting, prompt injection guidance, npm prefix/node CLI notes, control UI dev gatewayUrl note, tool_use FAQ, showcase video, and sharp/node-gyp workaround. (#1427, #1220, #1405) Thanks @vignesh07, @mbelinky. +- Heartbeat: allow running heartbeats in an explicit session key. (#1256) Thanks @zknicker. +- CLI: default exec approvals to the local host, add gateway/node targeting flags, and show target details in allowlist output. +- CLI: exec approvals mutations render tables instead of raw JSON. +- Exec approvals: support wildcard agent allowlists (`*`) across all agents. +- Exec approvals: allowlist matches resolved binary paths only, add safe stdin-only bins, and tighten allowlist shell parsing. +- Nodes: expose node PATH in status/describe and bootstrap PATH for node-host execution. +- CLI: flatten node service commands under `clawdbot node` and remove `service node` docs. +- CLI: move gateway service commands under `clawdbot gateway` and add `gateway probe` for reachability. +- Sessions: add per-channel reset overrides via `session.resetByChannel`. (#1353) Thanks @cash-echo-bot. ### Breaking - **BREAKING:** Control UI now rejects insecure HTTP without device identity by default. Use HTTPS (Tailscale Serve) or set `gateway.controlUi.allowInsecureAuth: true` to allow token-only auth. https://docs.clawd.bot/web/control-ui#insecure-http -- **BREAKING:** Envelope and system event timestamps now default to host-local time (was UTC) so agents don’t have to constantly convert. ### Fixes -- Streaming/Typing/Media: keep reply tags across streamed chunks, start typing indicators at run start, and accept MEDIA paths with spaces/tilde while preferring the message tool hint for image replies. -- Agents/Providers: drop unsigned thinking blocks for Claude models (Google Antigravity) and enforce alphanumeric tool call ids for strict providers (Mistral/OpenRouter). (#1372) Thanks @zerone0x. -- Exec approvals: treat main as the default agent, align node/gateway allowlist prechecks, validate resolved paths, avoid allowlist resolve races, and avoid null optional params. (#1417, #1414, #1425) Thanks @czekaj. -- Exec/Windows: resolve Windows exec paths with extensions and handle safe-bin exe names. - Nodes/macOS: prompt on allowlist miss for node exec approvals, persist allowlist decisions, and flatten node invoke errors. (#1394) Thanks @ngutman. -- Gateway: prevent multiple gateways from sharing the same config/state (singleton lock), keep auto bind loopback-first with explicit tailnet binding, and improve SSH auth handling. (#1380) -- Control UI: remove the chat stop button, keep the composer aligned to the bottom edge, stabilize session previews, and refresh the debug panel on route-driven tab changes. (#1373) Thanks @yazinsai. -- UI/config: export `SECTION_META` for config form modules. (#1418) Thanks @MaudeBot. -- macOS: keep chat pinned during streaming replies, include Textual resources, respect wildcard exec approvals, allow SSH agent auth, and default distribution builds to universal binaries. (#1279, #1362, #1384, #1396) Thanks @ameno-, @JustYannicc. -- BlueBubbles: resolve short message IDs safely, expose full IDs in templates, and harden short-id fetch wrappers. (#1369, #1387) Thanks @tyler6204. -- Models/Configure: inherit session model overrides in threads/topics, map OpenCode Zen models to the correct APIs, narrow Anthropic OAuth allowlist handling, seed allowlist fallbacks, list the full catalog when no allowlist is set, and limit `/model` list output. (#1376, #1416) -- Memory: prevent CLI hangs by deferring vector probes, add sqlite-vec/embedding timeouts, and make session memory indexing async. -- Cron: cap reminder context history to 10 messages and honor `contextMessages`. (#1103) Thanks @mkbehr. -- Cache: restore the 1h cache TTL option and reset the pruning window. -- Zalo Personal: tolerate ANSI/log-prefixed JSON output from `zca`. (#1379) Thanks @ptn1411. -- Browser: suppress Chrome restore prompts for managed profiles. (#1419) Thanks @jamesgroat. -- Infra: preserve fetch helper methods/preconnect when wrapping abort signals and normalize Telegram fetch aborts. -- Config/Doctor: avoid stack traces for invalid configs, log the config path, avoid WhatsApp config resurrection, and warn when `gateway.mode` is unset. (#900) -- CLI: read Codex CLI account_id for workspace billing. (#1422) Thanks @aj47. -- Logs/Status: align rolling log filenames with local time and report sandboxed runtime in `clawdbot status`. (#1343) +- Gateway: keep auto bind loopback-first and add explicit tailnet binding to avoid Tailscale taking over local UI. (#1380) +- Agents: enforce 9-char alphanumeric tool call ids for Mistral providers. (#1372) Thanks @zerone0x. - Embedded runner: persist injected history images so attachments aren’t reloaded each turn. (#1374) Thanks @Nicell. -- Nodes/Subagents: include agent/node/gateway context in tool failure logs and ensure subagent list uses the command session. +- Nodes tool: include agent/node/gateway context in tool failure logs to speed approval debugging. +- macOS: exec approvals now respect wildcard agent allowlists (`*`). +- macOS: allow SSH agent auth when no identity file is set. (#1384) Thanks @ameno-. +- Gateway: prevent multiple gateways from sharing the same config/state at once (singleton lock). +- UI: remove the chat stop button and keep the composer aligned to the bottom edge. +- Typing: start instant typing indicators at run start so DMs and mentions show immediately. +- Configure: restrict the model allowlist picker to OAuth-compatible Anthropic models and preselect Opus 4.5. +- Configure: seed model fallbacks from the allowlist selection when multiple models are chosen. +- Model picker: list the full catalog when no model allowlist is configured. +- Discord: honor wildcard channel configs via shared match helpers. (#1334) Thanks @pvoo. +- BlueBubbles: resolve short message IDs safely and expose full IDs in templates. (#1387) Thanks @tyler6204. +- Infra: preserve fetch helper methods when wrapping abort signals. (#1387) +- macOS: default distribution packaging to universal binaries. (#1396) Thanks @JustYannicc. ## 2026.1.20 diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.test.ts index b931230af..27bd96419 100644 --- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.test.ts +++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.test.ts @@ -248,4 +248,143 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { await fs.rm(workspaceDir, { recursive: true, force: true }); } }); + + it("skips profiles in cooldown during initial selection", async () => { + vi.useFakeTimers(); + try { + const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-agent-")); + const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-workspace-")); + const now = Date.now(); + vi.setSystemTime(now); + + try { + const authPath = path.join(agentDir, "auth-profiles.json"); + const payload = { + version: 1, + profiles: { + "openai:p1": { type: "api_key", provider: "openai", key: "sk-one" }, + "openai:p2": { type: "api_key", provider: "openai", key: "sk-two" }, + }, + usageStats: { + "openai:p1": { lastUsed: 1, cooldownUntil: now + 60 * 60 * 1000 }, // p1 in cooldown for 1 hour + "openai:p2": { lastUsed: 2 }, + }, + }; + await fs.writeFile(authPath, JSON.stringify(payload)); + + runEmbeddedAttemptMock.mockResolvedValueOnce( + makeAttempt({ + assistantTexts: ["ok"], + lastAssistant: buildAssistant({ + stopReason: "stop", + content: [{ type: "text", text: "ok" }], + }), + }), + ); + + await runEmbeddedPiAgent({ + sessionId: "session:test", + sessionKey: "agent:test:skip-cooldown", + sessionFile: path.join(workspaceDir, "session.jsonl"), + workspaceDir, + agentDir, + config: makeConfig(), + prompt: "hello", + provider: "openai", + model: "mock-1", + authProfileId: undefined, + authProfileIdSource: "auto", + timeoutMs: 5_000, + runId: "run:skip-cooldown", + }); + + expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(1); + + const stored = JSON.parse( + await fs.readFile(path.join(agentDir, "auth-profiles.json"), "utf-8"), + ) as { usageStats?: Record }; + expect(stored.usageStats?.["openai:p1"]?.cooldownUntil).toBe(now + 60 * 60 * 1000); + expect(typeof stored.usageStats?.["openai:p2"]?.lastUsed).toBe("number"); + } finally { + await fs.rm(agentDir, { recursive: true, force: true }); + await fs.rm(workspaceDir, { recursive: true, force: true }); + } + } finally { + vi.useRealTimers(); + } + }); + + it("skips profiles in cooldown when rotating after failure", async () => { + vi.useFakeTimers(); + try { + const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-agent-")); + const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-workspace-")); + const now = Date.now(); + vi.setSystemTime(now); + + try { + const authPath = path.join(agentDir, "auth-profiles.json"); + const payload = { + version: 1, + profiles: { + "openai:p1": { type: "api_key", provider: "openai", key: "sk-one" }, + "openai:p2": { type: "api_key", provider: "openai", key: "sk-two" }, + }, + usageStats: { + "openai:p1": { lastUsed: 1 }, + "openai:p2": { cooldownUntil: now + 60 * 60 * 1000 }, // p2 in cooldown + }, + }; + await fs.writeFile(authPath, JSON.stringify(payload)); + + runEmbeddedAttemptMock + .mockResolvedValueOnce( + makeAttempt({ + assistantTexts: [], + lastAssistant: buildAssistant({ + stopReason: "error", + errorMessage: "rate limit", + }), + }), + ) + .mockResolvedValueOnce( + makeAttempt({ + assistantTexts: ["ok"], + lastAssistant: buildAssistant({ + stopReason: "stop", + content: [{ type: "text", text: "ok" }], + }), + }), + ); + + await runEmbeddedPiAgent({ + sessionId: "session:test", + sessionKey: "agent:test:rotate-skip-cooldown", + sessionFile: path.join(workspaceDir, "session.jsonl"), + workspaceDir, + agentDir, + config: makeConfig(), + prompt: "hello", + provider: "openai", + model: "mock-1", + authProfileId: "openai:p1", + authProfileIdSource: "auto", + timeoutMs: 5_000, + runId: "run:rotate-skip-cooldown", + }); + + expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2); + + const stored = JSON.parse( + await fs.readFile(path.join(agentDir, "auth-profiles.json"), "utf-8"), + ) as { usageStats?: Record }; + expect(typeof stored.usageStats?.["openai:p1"]?.lastUsed).toBe("number"); + } finally { + await fs.rm(agentDir, { recursive: true, force: true }); + await fs.rm(workspaceDir, { recursive: true, force: true }); + } + } finally { + vi.useRealTimers(); + } + }); }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 12bfa218a..a6947c025 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -5,6 +5,7 @@ import { resolveUserPath } from "../../utils.js"; import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js"; import { resolveClawdbotAgentDir } from "../agent-paths.js"; import { + isProfileInCooldown, markAuthProfileFailure, markAuthProfileGood, markAuthProfileUsed, @@ -187,6 +188,10 @@ export async function runEmbeddedPiAgent( let nextIndex = profileIndex + 1; while (nextIndex < profileCandidates.length) { const candidate = profileCandidates[nextIndex]; + if (candidate && isProfileInCooldown(authStore, candidate)) { + nextIndex += 1; + continue; + } try { await applyApiKeyInfo(candidate); profileIndex = nextIndex; @@ -202,7 +207,20 @@ export async function runEmbeddedPiAgent( }; try { - await applyApiKeyInfo(profileCandidates[profileIndex]); + while (profileIndex < profileCandidates.length) { + const candidate = profileCandidates[profileIndex]; + if (candidate && isProfileInCooldown(authStore, candidate)) { + profileIndex += 1; + continue; + } + await applyApiKeyInfo(profileCandidates[profileIndex]); + break; + } + if (profileIndex >= profileCandidates.length) { + throw new Error( + `No available auth profile for ${provider} (all in cooldown or unavailable).`, + ); + } } catch (err) { if (profileCandidates[profileIndex] === lockedProfileId) throw err; const advanced = await advanceAuthProfile(); diff --git a/src/telegram/bot-message-context.sender-prefix.test.ts b/src/telegram/bot-message-context.sender-prefix.test.ts index 12d7b09e5..c7f0e5de9 100644 --- a/src/telegram/bot-message-context.sender-prefix.test.ts +++ b/src/telegram/bot-message-context.sender-prefix.test.ts @@ -49,4 +49,96 @@ describe("buildTelegramMessageContext sender prefix", () => { const body = ctx?.ctxPayload?.Body ?? ""; expect(body).toContain("Alice (42): hello"); }); + + it("sets MessageSid from message_id", async () => { + const ctx = await buildTelegramMessageContext({ + primaryCtx: { + message: { + message_id: 12345, + chat: { id: -99, type: "supergroup", title: "Dev Chat" }, + date: 1700000000, + text: "hello", + from: { id: 42, first_name: "Alice" }, + }, + me: { id: 7, username: "bot" }, + } as never, + allMedia: [], + storeAllowFrom: [], + options: {}, + bot: { + api: { + sendChatAction: vi.fn(), + setMessageReaction: vi.fn(), + }, + } as never, + cfg: { + agents: { defaults: { model: "anthropic/claude-opus-4-5", workspace: "/tmp/clawd" } }, + channels: { telegram: {} }, + messages: { groupChat: { mentionPatterns: [] } }, + } as never, + account: { accountId: "default" } as never, + historyLimit: 0, + groupHistories: new Map(), + dmPolicy: "open", + allowFrom: [], + groupAllowFrom: [], + ackReactionScope: "off", + logger: { info: vi.fn() }, + resolveGroupActivation: () => undefined, + resolveGroupRequireMention: () => false, + resolveTelegramGroupConfig: () => ({ + groupConfig: { requireMention: false }, + topicConfig: undefined, + }), + }); + + expect(ctx).not.toBeNull(); + expect(ctx?.ctxPayload?.MessageSid).toBe("12345"); + }); + + it("respects messageIdOverride option", async () => { + const ctx = await buildTelegramMessageContext({ + primaryCtx: { + message: { + message_id: 12345, + chat: { id: -99, type: "supergroup", title: "Dev Chat" }, + date: 1700000000, + text: "hello", + from: { id: 42, first_name: "Alice" }, + }, + me: { id: 7, username: "bot" }, + } as never, + allMedia: [], + storeAllowFrom: [], + options: { messageIdOverride: "67890" }, + bot: { + api: { + sendChatAction: vi.fn(), + setMessageReaction: vi.fn(), + }, + } as never, + cfg: { + agents: { defaults: { model: "anthropic/claude-opus-4-5", workspace: "/tmp/clawd" } }, + channels: { telegram: {} }, + messages: { groupChat: { mentionPatterns: [] } }, + } as never, + account: { accountId: "default" } as never, + historyLimit: 0, + groupHistories: new Map(), + dmPolicy: "open", + allowFrom: [], + groupAllowFrom: [], + ackReactionScope: "off", + logger: { info: vi.fn() }, + resolveGroupActivation: () => undefined, + resolveGroupRequireMention: () => false, + resolveTelegramGroupConfig: () => ({ + groupConfig: { requireMention: false }, + topicConfig: undefined, + }), + }); + + expect(ctx).not.toBeNull(); + expect(ctx?.ctxPayload?.MessageSid).toBe("67890"); + }); });