feat: audit fixes and documentation improvements (#1762)

* feat: audit fixes and documentation improvements - Refactored model selection to drop legacy fallback and add warning - Improved heartbeat content validation - Added Skill Creation guide - Updated CONTRIBUTING.md with roadmap * style: fix formatting in model-selection.ts * style: fix formatting and improve model selection logic with tests
2026-01-25 14:54:48 +02:00
parent 026def686e
commit 7253bf398d
5 changed files with 187 additions and 237 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -40,3 +40,13 @@ Please include in your PR:
 - [ ] Confirm you understand what the code does
 AI PRs are first-class citizens here. We just want transparency so reviewers know what to look for.
 ## Current Focus & Roadmap 🗺
 We are currently prioritizing:
 - **Stability**: Fixing edge cases in channel connections (WhatsApp/Telegram).
 - **UX**: Improving the onboarding wizard and error messages.
 - **Skills**: Expanding the library of bundled skills and improving the Skill Creation developer experience.
 - **Performance**: Optimizing token usage and compaction logic.
 Check the [GitHub Issues](https://github.com/clawdbot/clawdbot/issues) for "good first issue" labels!
--- a/docs/tools/creating-skills.md
+++ b/docs/tools/creating-skills.md
@@ -0,0 +1,41 @@
 # Creating Custom Skills 🛠
 Clawdbot is designed to be easily extensible. "Skills" are the primary way to add new capabilities to your assistant.
 ## What is a Skill?
 A skill is a directory containing a `SKILL.md` file (which provides instructions and tool definitions to the LLM) and optionally some scripts or resources.
 ## Step-by-Step: Your First Skill
 ### 1. Create the Directory
 Skills live in your workspace, usually `~/clawd/skills/`. Create a new folder for your skill:
 ```bash
 mkdir -p ~/clawd/skills/hello-world
 ```
 ### 2. Define the `SKILL.md`
 Create a `SKILL.md` file in that directory. This file uses YAML frontmatter for metadata and Markdown for instructions.
 ```markdown
 ---
 name: hello_world
 description: A simple skill that says hello.
 ---
 # Hello World Skill
 When the user asks for a greeting, use the `echo` tool to say "Hello from your custom skill!".
 ```
 ### 3. Add Tools (Optional)
 You can define custom tools in the frontmatter or instruct the agent to use existing system tools (like `bash` or `browser`).
 ### 4. Refresh Clawdbot
 Ask your agent to "refresh skills" or restart the gateway. Clawdbot will discover the new directory and index the `SKILL.md`.
 ## Best Practices
 - **Be Concise**: Instruct the model on *what* to do, not how to be an AI.
 - **Safety First**: If your skill uses `bash`, ensure the prompts don't allow arbitrary command injection from untrusted user input.
 - **Test Locally**: Use `clawdbot agent --message "use my new skill"` to test.
 ## Shared Skills
 You can also browse and contribute skills to [ClawdHub](https://clawdhub.com).
--- a/src/agents/model-selection.test.ts
+++ b/src/agents/model-selection.test.ts
@@ -1,252 +1,139 @@
-import { describe, expect, it } from "vitest";
+import { describe, it, expect, vi } from "vitest";
 import type { ClawdbotConfig } from "../config/config.js";
 import { DEFAULT_PROVIDER } from "./defaults.js";
 import {
  buildAllowedModelSet,
  modelKey,
  parseModelRef,
-  resolveAllowedModelRef,
+  resolveModelRefFromString,
-  resolveHooksGmailModel,
+  resolveConfiguredModelRef,
  buildModelAliasIndex,
  normalizeProviderId,
  modelKey,
 } from "./model-selection.js";
 import type { ClawdbotConfig } from "../config/config.js";
-const catalog = [
+describe("model-selection", () => {
-  {
+  describe("normalizeProviderId", () => {
-    provider: "openai",
+    it("should normalize provider names", () => {
-    id: "gpt-4",
+      expect(normalizeProviderId("Anthropic")).toBe("anthropic");
-    name: "GPT-4",
+      expect(normalizeProviderId("Z.ai")).toBe("zai");
-  },
+      expect(normalizeProviderId("z-ai")).toBe("zai");
-];
+      expect(normalizeProviderId("OpenCode-Zen")).toBe("opencode");
-
+      expect(normalizeProviderId("qwen")).toBe("qwen-portal");
 describe("buildAllowedModelSet", () => {
  it("always allows the configured default model", () => {
    const cfg = {
      agents: {
        defaults: {
          models: {
            "openai/gpt-4": { alias: "gpt4" },
          },
        },
      },
    } as ClawdbotConfig;
    const allowed = buildAllowedModelSet({
      cfg,
      catalog,
      defaultProvider: "claude-cli",
      defaultModel: "opus-4.5",
    });
    expect(allowed.allowAny).toBe(false);
    expect(allowed.allowedKeys.has(modelKey("openai", "gpt-4"))).toBe(true);
    expect(allowed.allowedKeys.has(modelKey("claude-cli", "opus-4.5"))).toBe(true);
  });
  it("includes the default model when no allowlist is set", () => {
    const cfg = {
      agents: { defaults: {} },
    } as ClawdbotConfig;
    const allowed = buildAllowedModelSet({
      cfg,
      catalog,
      defaultProvider: "claude-cli",
      defaultModel: "opus-4.5",
    });
    expect(allowed.allowAny).toBe(true);
    expect(allowed.allowedKeys.has(modelKey("openai", "gpt-4"))).toBe(true);
    expect(allowed.allowedKeys.has(modelKey("claude-cli", "opus-4.5"))).toBe(true);
  });
  it("allows explicit custom providers from models.providers", () => {
    const cfg = {
      agents: {
        defaults: {
          models: {
            "moonshot/kimi-k2-0905-preview": { alias: "kimi" },
          },
        },
      },
      models: {
        mode: "merge",
        providers: {
          moonshot: {
            baseUrl: "https://api.moonshot.ai/v1",
            apiKey: "x",
            api: "openai-completions",
            models: [{ id: "kimi-k2-0905-preview", name: "Kimi" }],
          },
        },
      },
    } as ClawdbotConfig;
    const allowed = buildAllowedModelSet({
      cfg,
      catalog: [],
      defaultProvider: "anthropic",
      defaultModel: "claude-opus-4-5",
    });
    expect(allowed.allowAny).toBe(false);
    expect(allowed.allowedKeys.has(modelKey("moonshot", "kimi-k2-0905-preview"))).toBe(true);
  });
 });
 describe("parseModelRef", () => {
  it("normalizes anthropic/opus-4.5 to claude-opus-4-5", () => {
    const ref = parseModelRef("anthropic/opus-4.5", "anthropic");
    expect(ref).toEqual({
      provider: "anthropic",
      model: "claude-opus-4-5",
    });
  });
-  it("normalizes google gemini 3 models to preview ids", () => {
+  describe("parseModelRef", () => {
-    expect(parseModelRef("google/gemini-3-pro", "anthropic")).toEqual({
+    it("should parse full model refs", () => {
-      provider: "google",
+      expect(parseModelRef("anthropic/claude-3-5-sonnet", "openai")).toEqual({
      model: "gemini-3-pro-preview",
    });
    expect(parseModelRef("google/gemini-3-flash", "anthropic")).toEqual({
      provider: "google",
      model: "gemini-3-flash-preview",
    });
  });
  it("normalizes default-provider google models", () => {
    expect(parseModelRef("gemini-3-pro", "google")).toEqual({
      provider: "google",
      model: "gemini-3-pro-preview",
    });
  });
 });
 describe("resolveHooksGmailModel", () => {
  it("returns null when hooks.gmail.model is not set", () => {
    const cfg = {} satisfies ClawdbotConfig;
    const result = resolveHooksGmailModel({
      cfg,
      defaultProvider: DEFAULT_PROVIDER,
    });
    expect(result).toBeNull();
  });
  it("returns null when hooks.gmail.model is empty", () => {
    const cfg = {
      hooks: { gmail: { model: "" } },
    } satisfies ClawdbotConfig;
    const result = resolveHooksGmailModel({
      cfg,
      defaultProvider: DEFAULT_PROVIDER,
    });
    expect(result).toBeNull();
  });
  it("parses provider/model from hooks.gmail.model", () => {
    const cfg = {
      hooks: { gmail: { model: "openrouter/meta-llama/llama-3.3-70b:free" } },
    } satisfies ClawdbotConfig;
    const result = resolveHooksGmailModel({
      cfg,
      defaultProvider: DEFAULT_PROVIDER,
    });
    expect(result).toEqual({
      provider: "openrouter",
      model: "meta-llama/llama-3.3-70b:free",
    });
  });
  it("resolves alias from agent.models", () => {
    const cfg = {
      agents: {
        defaults: {
          models: {
            "anthropic/claude-sonnet-4-1": { alias: "Sonnet" },
          },
        },
      },
      hooks: { gmail: { model: "Sonnet" } },
    } satisfies ClawdbotConfig;
    const result = resolveHooksGmailModel({
      cfg,
      defaultProvider: DEFAULT_PROVIDER,
    });
    expect(result).toEqual({
      provider: "anthropic",
      model: "claude-sonnet-4-1",
    });
  });
  it("uses default provider when model omits provider", () => {
    const cfg = {
      hooks: { gmail: { model: "claude-haiku-3-5" } },
    } satisfies ClawdbotConfig;
    const result = resolveHooksGmailModel({
      cfg,
      defaultProvider: "anthropic",
    });
    expect(result).toEqual({
      provider: "anthropic",
      model: "claude-haiku-3-5",
    });
  });
 });
 describe("resolveAllowedModelRef", () => {
  it("resolves aliases when allowed", () => {
    const cfg = {
      agents: {
        defaults: {
          models: {
            "anthropic/claude-sonnet-4-1": { alias: "Sonnet" },
          },
        },
      },
    } satisfies ClawdbotConfig;
    const resolved = resolveAllowedModelRef({
      cfg,
      catalog: [
        {
          provider: "anthropic",
          id: "claude-sonnet-4-1",
          name: "Sonnet",
        },
      ],
      raw: "Sonnet",
      defaultProvider: "anthropic",
      defaultModel: "claude-opus-4-5",
    });
    expect("error" in resolved).toBe(false);
    if ("ref" in resolved) {
      expect(resolved.ref).toEqual({
        provider: "anthropic",
-        model: "claude-sonnet-4-1",
+        model: "claude-3-5-sonnet",
      });
-    }
+    });
    it("should use default provider if none specified", () => {
      expect(parseModelRef("claude-3-5-sonnet", "anthropic")).toEqual({
        provider: "anthropic",
        model: "claude-3-5-sonnet",
      });
    });
    it("should return null for empty strings", () => {
      expect(parseModelRef("", "anthropic")).toBeNull();
      expect(parseModelRef("  ", "anthropic")).toBeNull();
    });
    it("should handle invalid slash usage", () => {
      expect(parseModelRef("/", "anthropic")).toBeNull();
      expect(parseModelRef("anthropic/", "anthropic")).toBeNull();
      expect(parseModelRef("/model", "anthropic")).toBeNull();
    });
  });
-  it("rejects disallowed models", () => {
+  describe("buildModelAliasIndex", () => {
-    const cfg = {
+    it("should build alias index from config", () => {
-      agents: {
+      const cfg: Partial<ClawdbotConfig> = {
-        defaults: {
+        agents: {
-          models: {
+          defaults: {
-            "openai/gpt-4": { alias: "GPT4" },
+            models: {
              "anthropic/claude-3-5-sonnet": { alias: "fast" },
              "openai/gpt-4o": { alias: "smart" },
            },
          },
        },
-      },
+      };
-    } satisfies ClawdbotConfig;
+
-    const resolved = resolveAllowedModelRef({
+      const index = buildModelAliasIndex({
-      cfg,
+        cfg: cfg as ClawdbotConfig,
-      catalog: [
+        defaultProvider: "anthropic",
-        { provider: "openai", id: "gpt-4", name: "GPT-4" },
+      });
-        { provider: "anthropic", id: "claude-sonnet-4-1", name: "Sonnet" },
+
-      ],
+      expect(index.byAlias.get("fast")?.ref).toEqual({
-      raw: "anthropic/claude-sonnet-4-1",
+        provider: "anthropic",
-      defaultProvider: "openai",
+        model: "claude-3-5-sonnet",
-      defaultModel: "gpt-4",
+      });
      expect(index.byAlias.get("smart")?.ref).toEqual({ provider: "openai", model: "gpt-4o" });
      expect(index.byKey.get(modelKey("anthropic", "claude-3-5-sonnet"))).toEqual(["fast"]);
    });
-    expect(resolved).toEqual({
+  });
-      error: "model not allowed: anthropic/claude-sonnet-4-1",
+
  describe("resolveModelRefFromString", () => {
    it("should resolve from string with alias", () => {
      const index = {
        byAlias: new Map([
          ["fast", { alias: "fast", ref: { provider: "anthropic", model: "sonnet" } }],
        ]),
        byKey: new Map(),
      };
      const resolved = resolveModelRefFromString({
        raw: "fast",
        defaultProvider: "openai",
        aliasIndex: index,
      });
      expect(resolved?.ref).toEqual({ provider: "anthropic", model: "sonnet" });
      expect(resolved?.alias).toBe("fast");
    });
    it("should resolve direct ref if no alias match", () => {
      const resolved = resolveModelRefFromString({
        raw: "openai/gpt-4",
        defaultProvider: "anthropic",
      });
      expect(resolved?.ref).toEqual({ provider: "openai", model: "gpt-4" });
    });
  });
  describe("resolveConfiguredModelRef", () => {
    it("should fall back to anthropic and warn if provider is missing for non-alias", () => {
      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
      const cfg: Partial<ClawdbotConfig> = {
        agents: {
          defaults: {
            model: "claude-3-5-sonnet",
          },
        },
      };
      const result = resolveConfiguredModelRef({
        cfg: cfg as ClawdbotConfig,
        defaultProvider: "google",
        defaultModel: "gemini-pro",
      });
      expect(result).toEqual({ provider: "anthropic", model: "claude-3-5-sonnet" });
      expect(warnSpy).toHaveBeenCalledWith(
        expect.stringContaining('Falling back to "anthropic/claude-3-5-sonnet"'),
      );
      warnSpy.mockRestore();
    });
    it("should use default provider/model if config is empty", () => {
      const cfg: Partial<ClawdbotConfig> = {};
      const result = resolveConfiguredModelRef({
        cfg: cfg as ClawdbotConfig,
        defaultProvider: "openai",
        defaultModel: "gpt-4",
      });
      expect(result).toEqual({ provider: "openai", model: "gpt-4" });
    });
  });
 });
--- a/src/agents/model-selection.ts
+++ b/src/agents/model-selection.ts
@@ -131,14 +131,24 @@ export function resolveConfiguredModelRef(params: {
      cfg: params.cfg,
      defaultProvider: params.defaultProvider,
    });
    if (!trimmed.includes("/")) {
      const aliasKey = normalizeAliasKey(trimmed);
      const aliasMatch = aliasIndex.byAlias.get(aliasKey);
      if (aliasMatch) return aliasMatch.ref;
      // Default to anthropic if no provider is specified, but warn as this is deprecated.
      console.warn(
        `[clawdbot] Model "${trimmed}" specified without provider. Falling back to "anthropic/${trimmed}". Please use "anthropic/${trimmed}" in your config.`,
      );
      return { provider: "anthropic", model: trimmed };
    }
    const resolved = resolveModelRefFromString({
      raw: trimmed,
      defaultProvider: params.defaultProvider,
      aliasIndex,
    });
    if (resolved) return resolved.ref;
    // TODO(steipete): drop this fallback once provider-less agents.defaults.model is fully deprecated.
    return { provider: "anthropic", model: trimmed };
  }
  return { provider: params.defaultProvider, model: params.defaultModel };
 }
--- a/src/auto-reply/heartbeat.ts
+++ b/src/auto-reply/heartbeat.ts
@@ -32,6 +32,8 @@ export function isHeartbeatContentEffectivelyEmpty(content: string | undefined |
    // This intentionally does NOT skip lines like "#TODO" or "#hashtag" which might be content
    // (Those aren't valid markdown headers - ATX headers require space after #)
    if (/^#+(\s|$)/.test(trimmed)) continue;
    // Skip empty markdown list items like "- [ ]" or "* [ ]" or just "- "
    if (/^[-*+]\s*(\[[\sXx]?\]\s*)?$/.test(trimmed)) continue;
    // Found a non-empty, non-comment line - there's actionable content
    return false;
  }