feat: audit fixes and documentation improvements (#1762)

* feat: audit fixes and documentation improvements - Refactored model selection to drop legacy fallback and add warning - Improved heartbeat content validation - Added Skill Creation guide - Updated CONTRIBUTING.md with roadmap * style: fix formatting in model-selection.ts * style: fix formatting and improve model selection logic with tests
2026-01-25 14:54:48 +02:00
parent 026def686e
commit 7253bf398d
5 changed files with 187 additions and 237 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -40,3 +40,13 @@ Please include in your PR:
 - [ ] Confirm you understand what the code does

 AI PRs are first-class citizens here. We just want transparency so reviewers know what to look for.
+
+## Current Focus & Roadmap 🗺
+
+We are currently prioritizing:
+- **Stability**: Fixing edge cases in channel connections (WhatsApp/Telegram).
+- **UX**: Improving the onboarding wizard and error messages.
+- **Skills**: Expanding the library of bundled skills and improving the Skill Creation developer experience.
+- **Performance**: Optimizing token usage and compaction logic.
+
+Check the [GitHub Issues](https://github.com/clawdbot/clawdbot/issues) for "good first issue" labels!
--- a/docs/tools/creating-skills.md
+++ b/docs/tools/creating-skills.md
@@ -0,0 +1,41 @@
+# Creating Custom Skills 🛠
+
+Clawdbot is designed to be easily extensible. "Skills" are the primary way to add new capabilities to your assistant.
+
+## What is a Skill?
+A skill is a directory containing a `SKILL.md` file (which provides instructions and tool definitions to the LLM) and optionally some scripts or resources.
+
+## Step-by-Step: Your First Skill
+
+### 1. Create the Directory
+Skills live in your workspace, usually `~/clawd/skills/`. Create a new folder for your skill:
+```bash
+mkdir -p ~/clawd/skills/hello-world
+```
+
+### 2. Define the `SKILL.md`
+Create a `SKILL.md` file in that directory. This file uses YAML frontmatter for metadata and Markdown for instructions.
+
+```markdown
+---
+name: hello_world
+description: A simple skill that says hello.
+---
+
+# Hello World Skill
+When the user asks for a greeting, use the `echo` tool to say "Hello from your custom skill!".
+```
+
+### 3. Add Tools (Optional)
+You can define custom tools in the frontmatter or instruct the agent to use existing system tools (like `bash` or `browser`).
+
+### 4. Refresh Clawdbot
+Ask your agent to "refresh skills" or restart the gateway. Clawdbot will discover the new directory and index the `SKILL.md`.
+
+## Best Practices
+- **Be Concise**: Instruct the model on *what* to do, not how to be an AI.
+- **Safety First**: If your skill uses `bash`, ensure the prompts don't allow arbitrary command injection from untrusted user input.
+- **Test Locally**: Use `clawdbot agent --message "use my new skill"` to test.
+
+## Shared Skills
+You can also browse and contribute skills to [ClawdHub](https://clawdhub.com).
--- a/src/agents/model-selection.test.ts
+++ b/src/agents/model-selection.test.ts
@@ -1,252 +1,139 @@
-import { describe, expect, it } from "vitest";
-
-import type { ClawdbotConfig } from "../config/config.js";
-import { DEFAULT_PROVIDER } from "./defaults.js";
+import { describe, it, expect, vi } from "vitest";
 import {
-  buildAllowedModelSet,
-  modelKey,
  parseModelRef,
-  resolveAllowedModelRef,
-  resolveHooksGmailModel,
+  resolveModelRefFromString,
+  resolveConfiguredModelRef,
+  buildModelAliasIndex,
+  normalizeProviderId,
+  modelKey,
 } from "./model-selection.js";
+import type { ClawdbotConfig } from "../config/config.js";

-const catalog = [
-  {
-    provider: "openai",
-    id: "gpt-4",
-    name: "GPT-4",
-  },
-];
-
-describe("buildAllowedModelSet", () => {
-  it("always allows the configured default model", () => {
-    const cfg = {
-      agents: {
-        defaults: {
-          models: {
-            "openai/gpt-4": { alias: "gpt4" },
-          },
-        },
-      },
-    } as ClawdbotConfig;
-
-    const allowed = buildAllowedModelSet({
-      cfg,
-      catalog,
-      defaultProvider: "claude-cli",
-      defaultModel: "opus-4.5",
-    });
-
-    expect(allowed.allowAny).toBe(false);
-    expect(allowed.allowedKeys.has(modelKey("openai", "gpt-4"))).toBe(true);
-    expect(allowed.allowedKeys.has(modelKey("claude-cli", "opus-4.5"))).toBe(true);
-  });
-
-  it("includes the default model when no allowlist is set", () => {
-    const cfg = {
-      agents: { defaults: {} },
-    } as ClawdbotConfig;
-
-    const allowed = buildAllowedModelSet({
-      cfg,
-      catalog,
-      defaultProvider: "claude-cli",
-      defaultModel: "opus-4.5",
-    });
-
-    expect(allowed.allowAny).toBe(true);
-    expect(allowed.allowedKeys.has(modelKey("openai", "gpt-4"))).toBe(true);
-    expect(allowed.allowedKeys.has(modelKey("claude-cli", "opus-4.5"))).toBe(true);
-  });
-
-  it("allows explicit custom providers from models.providers", () => {
-    const cfg = {
-      agents: {
-        defaults: {
-          models: {
-            "moonshot/kimi-k2-0905-preview": { alias: "kimi" },
-          },
-        },
-      },
-      models: {
-        mode: "merge",
-        providers: {
-          moonshot: {
-            baseUrl: "https://api.moonshot.ai/v1",
-            apiKey: "x",
-            api: "openai-completions",
-            models: [{ id: "kimi-k2-0905-preview", name: "Kimi" }],
-          },
-        },
-      },
-    } as ClawdbotConfig;
-
-    const allowed = buildAllowedModelSet({
-      cfg,
-      catalog: [],
-      defaultProvider: "anthropic",
-      defaultModel: "claude-opus-4-5",
-    });
-
-    expect(allowed.allowAny).toBe(false);
-    expect(allowed.allowedKeys.has(modelKey("moonshot", "kimi-k2-0905-preview"))).toBe(true);
-  });
-});
-
-describe("parseModelRef", () => {
-  it("normalizes anthropic/opus-4.5 to claude-opus-4-5", () => {
-    const ref = parseModelRef("anthropic/opus-4.5", "anthropic");
-    expect(ref).toEqual({
-      provider: "anthropic",
-      model: "claude-opus-4-5",
+describe("model-selection", () => {
+  describe("normalizeProviderId", () => {
+    it("should normalize provider names", () => {
+      expect(normalizeProviderId("Anthropic")).toBe("anthropic");
+      expect(normalizeProviderId("Z.ai")).toBe("zai");
+      expect(normalizeProviderId("z-ai")).toBe("zai");
+      expect(normalizeProviderId("OpenCode-Zen")).toBe("opencode");
+      expect(normalizeProviderId("qwen")).toBe("qwen-portal");
    });
  });

-  it("normalizes google gemini 3 models to preview ids", () => {
-    expect(parseModelRef("google/gemini-3-pro", "anthropic")).toEqual({
-      provider: "google",
-      model: "gemini-3-pro-preview",
-    });
-    expect(parseModelRef("google/gemini-3-flash", "anthropic")).toEqual({
-      provider: "google",
-      model: "gemini-3-flash-preview",
-    });
-  });
-
-  it("normalizes default-provider google models", () => {
-    expect(parseModelRef("gemini-3-pro", "google")).toEqual({
-      provider: "google",
-      model: "gemini-3-pro-preview",
-    });
-  });
-});
-
-describe("resolveHooksGmailModel", () => {
-  it("returns null when hooks.gmail.model is not set", () => {
-    const cfg = {} satisfies ClawdbotConfig;
-    const result = resolveHooksGmailModel({
-      cfg,
-      defaultProvider: DEFAULT_PROVIDER,
-    });
-    expect(result).toBeNull();
-  });
-
-  it("returns null when hooks.gmail.model is empty", () => {
-    const cfg = {
-      hooks: { gmail: { model: "" } },
-    } satisfies ClawdbotConfig;
-    const result = resolveHooksGmailModel({
-      cfg,
-      defaultProvider: DEFAULT_PROVIDER,
-    });
-    expect(result).toBeNull();
-  });
-
-  it("parses provider/model from hooks.gmail.model", () => {
-    const cfg = {
-      hooks: { gmail: { model: "openrouter/meta-llama/llama-3.3-70b:free" } },
-    } satisfies ClawdbotConfig;
-    const result = resolveHooksGmailModel({
-      cfg,
-      defaultProvider: DEFAULT_PROVIDER,
-    });
-    expect(result).toEqual({
-      provider: "openrouter",
-      model: "meta-llama/llama-3.3-70b:free",
-    });
-  });
-
-  it("resolves alias from agent.models", () => {
-    const cfg = {
-      agents: {
-        defaults: {
-          models: {
-            "anthropic/claude-sonnet-4-1": { alias: "Sonnet" },
-          },
-        },
-      },
-      hooks: { gmail: { model: "Sonnet" } },
-    } satisfies ClawdbotConfig;
-    const result = resolveHooksGmailModel({
-      cfg,
-      defaultProvider: DEFAULT_PROVIDER,
-    });
-    expect(result).toEqual({
-      provider: "anthropic",
-      model: "claude-sonnet-4-1",
-    });
-  });
-
-  it("uses default provider when model omits provider", () => {
-    const cfg = {
-      hooks: { gmail: { model: "claude-haiku-3-5" } },
-    } satisfies ClawdbotConfig;
-    const result = resolveHooksGmailModel({
-      cfg,
-      defaultProvider: "anthropic",
-    });
-    expect(result).toEqual({
-      provider: "anthropic",
-      model: "claude-haiku-3-5",
-    });
-  });
-});
-
-describe("resolveAllowedModelRef", () => {
-  it("resolves aliases when allowed", () => {
-    const cfg = {
-      agents: {
-        defaults: {
-          models: {
-            "anthropic/claude-sonnet-4-1": { alias: "Sonnet" },
-          },
-        },
-      },
-    } satisfies ClawdbotConfig;
-    const resolved = resolveAllowedModelRef({
-      cfg,
-      catalog: [
-        {
-          provider: "anthropic",
-          id: "claude-sonnet-4-1",
-          name: "Sonnet",
-        },
-      ],
-      raw: "Sonnet",
-      defaultProvider: "anthropic",
-      defaultModel: "claude-opus-4-5",
-    });
-    expect("error" in resolved).toBe(false);
-    if ("ref" in resolved) {
-      expect(resolved.ref).toEqual({
+  describe("parseModelRef", () => {
+    it("should parse full model refs", () => {
+      expect(parseModelRef("anthropic/claude-3-5-sonnet", "openai")).toEqual({
        provider: "anthropic",
-        model: "claude-sonnet-4-1",
+        model: "claude-3-5-sonnet",
      });
-    }
+    });
+
+    it("should use default provider if none specified", () => {
+      expect(parseModelRef("claude-3-5-sonnet", "anthropic")).toEqual({
+        provider: "anthropic",
+        model: "claude-3-5-sonnet",
+      });
+    });
+
+    it("should return null for empty strings", () => {
+      expect(parseModelRef("", "anthropic")).toBeNull();
+      expect(parseModelRef("  ", "anthropic")).toBeNull();
+    });
+
+    it("should handle invalid slash usage", () => {
+      expect(parseModelRef("/", "anthropic")).toBeNull();
+      expect(parseModelRef("anthropic/", "anthropic")).toBeNull();
+      expect(parseModelRef("/model", "anthropic")).toBeNull();
+    });
  });

-  it("rejects disallowed models", () => {
-    const cfg = {
-      agents: {
-        defaults: {
-          models: {
-            "openai/gpt-4": { alias: "GPT4" },
+  describe("buildModelAliasIndex", () => {
+    it("should build alias index from config", () => {
+      const cfg: Partial<ClawdbotConfig> = {
+        agents: {
+          defaults: {
+            models: {
+              "anthropic/claude-3-5-sonnet": { alias: "fast" },
+              "openai/gpt-4o": { alias: "smart" },
+            },
          },
        },
-      },
-    } satisfies ClawdbotConfig;
-    const resolved = resolveAllowedModelRef({
-      cfg,
-      catalog: [
-        { provider: "openai", id: "gpt-4", name: "GPT-4" },
-        { provider: "anthropic", id: "claude-sonnet-4-1", name: "Sonnet" },
-      ],
-      raw: "anthropic/claude-sonnet-4-1",
-      defaultProvider: "openai",
-      defaultModel: "gpt-4",
+      };
+
+      const index = buildModelAliasIndex({
+        cfg: cfg as ClawdbotConfig,
+        defaultProvider: "anthropic",
+      });
+
+      expect(index.byAlias.get("fast")?.ref).toEqual({
+        provider: "anthropic",
+        model: "claude-3-5-sonnet",
+      });
+      expect(index.byAlias.get("smart")?.ref).toEqual({ provider: "openai", model: "gpt-4o" });
+      expect(index.byKey.get(modelKey("anthropic", "claude-3-5-sonnet"))).toEqual(["fast"]);
    });
-    expect(resolved).toEqual({
-      error: "model not allowed: anthropic/claude-sonnet-4-1",
+  });
+
+  describe("resolveModelRefFromString", () => {
+    it("should resolve from string with alias", () => {
+      const index = {
+        byAlias: new Map([
+          ["fast", { alias: "fast", ref: { provider: "anthropic", model: "sonnet" } }],
+        ]),
+        byKey: new Map(),
+      };
+
+      const resolved = resolveModelRefFromString({
+        raw: "fast",
+        defaultProvider: "openai",
+        aliasIndex: index,
+      });
+
+      expect(resolved?.ref).toEqual({ provider: "anthropic", model: "sonnet" });
+      expect(resolved?.alias).toBe("fast");
+    });
+
+    it("should resolve direct ref if no alias match", () => {
+      const resolved = resolveModelRefFromString({
+        raw: "openai/gpt-4",
+        defaultProvider: "anthropic",
+      });
+      expect(resolved?.ref).toEqual({ provider: "openai", model: "gpt-4" });
+    });
+  });
+
+  describe("resolveConfiguredModelRef", () => {
+    it("should fall back to anthropic and warn if provider is missing for non-alias", () => {
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+      const cfg: Partial<ClawdbotConfig> = {
+        agents: {
+          defaults: {
+            model: "claude-3-5-sonnet",
+          },
+        },
+      };
+
+      const result = resolveConfiguredModelRef({
+        cfg: cfg as ClawdbotConfig,
+        defaultProvider: "google",
+        defaultModel: "gemini-pro",
+      });
+
+      expect(result).toEqual({ provider: "anthropic", model: "claude-3-5-sonnet" });
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining('Falling back to "anthropic/claude-3-5-sonnet"'),
+      );
+      warnSpy.mockRestore();
+    });
+
+    it("should use default provider/model if config is empty", () => {
+      const cfg: Partial<ClawdbotConfig> = {};
+      const result = resolveConfiguredModelRef({
+        cfg: cfg as ClawdbotConfig,
+        defaultProvider: "openai",
+        defaultModel: "gpt-4",
+      });
+      expect(result).toEqual({ provider: "openai", model: "gpt-4" });
    });
  });
 });
--- a/src/agents/model-selection.ts
+++ b/src/agents/model-selection.ts
@@ -131,14 +131,24 @@ export function resolveConfiguredModelRef(params: {
      cfg: params.cfg,
      defaultProvider: params.defaultProvider,
    });
+    if (!trimmed.includes("/")) {
+      const aliasKey = normalizeAliasKey(trimmed);
+      const aliasMatch = aliasIndex.byAlias.get(aliasKey);
+      if (aliasMatch) return aliasMatch.ref;
+
+      // Default to anthropic if no provider is specified, but warn as this is deprecated.
+      console.warn(
+        `[clawdbot] Model "${trimmed}" specified without provider. Falling back to "anthropic/${trimmed}". Please use "anthropic/${trimmed}" in your config.`,
+      );
+      return { provider: "anthropic", model: trimmed };
+    }
+
    const resolved = resolveModelRefFromString({
      raw: trimmed,
      defaultProvider: params.defaultProvider,
      aliasIndex,
    });
    if (resolved) return resolved.ref;
-    // TODO(steipete): drop this fallback once provider-less agents.defaults.model is fully deprecated.
-    return { provider: "anthropic", model: trimmed };
  }
  return { provider: params.defaultProvider, model: params.defaultModel };
 }
--- a/src/auto-reply/heartbeat.ts
+++ b/src/auto-reply/heartbeat.ts
@@ -32,6 +32,8 @@ export function isHeartbeatContentEffectivelyEmpty(content: string | undefined |
    // This intentionally does NOT skip lines like "#TODO" or "#hashtag" which might be content
    // (Those aren't valid markdown headers - ATX headers require space after #)
    if (/^#+(\s|$)/.test(trimmed)) continue;
+    // Skip empty markdown list items like "- [ ]" or "* [ ]" or just "- "
+    if (/^[-*+]\s*(\[[\sXx]?\]\s*)?$/.test(trimmed)) continue;
    // Found a non-empty, non-comment line - there's actionable content
    return false;
  }