fix: tighten small-model audit guardrails

2026-01-20 23:45:50 +00:00
parent 4fad74738a
commit 51dfd6efdb
6 changed files with 229 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,7 @@ Docs: https://docs.clawd.bot
 - Matrix: migrate to matrix-bot-sdk with E2EE support, location handling, and group allowlist upgrades. (#1298) — thanks @sibbl.
 - Plugins/UI: let channel plugin metadata drive UI labels/icons and cron channel options. (#1306) — thanks @steipete.
 - Zalouser: add channel dock metadata, config schema, setup wiring, probe, and status issues. (#1219) — thanks @suminhthanh.
 - Security: warn when <=300B models run without sandboxing and with web tools enabled.
 ### Fixes
 - Discovery: shorten Bonjour DNS-SD service type to `_clawdbot-gw._tcp` and update discovery clients/docs.
 - Agents: preserve subagent announce thread/topic routing + queued replies across channels. (#1241) — thanks @gnarco.
--- a/docs/cli/security.md
+++ b/docs/cli/security.md
@@ -21,3 +21,4 @@ clawdbot security audit --fix
 ```
 The audit warns when multiple DM senders share the main session and recommends `session.dmScope="per-channel-peer"` for shared inboxes.
 It also warns when small models (<=300B) are used without sandboxing and with web/browser tools enabled.
--- a/docs/gateway/security.md
+++ b/docs/gateway/security.md
@@ -177,6 +177,7 @@ Recommendations:
 - **Use the latest generation, best-tier model** for any bot that can run tools or touch files/networks.
 - **Avoid weaker tiers** (for example, Sonnet or Haiku) for tool-enabled agents or untrusted inboxes.
 - If you must use a smaller model, **reduce blast radius** (read-only tools, strong sandboxing, minimal filesystem access, strict allowlists).
 - When running small models, **enable sandboxing for all sessions** and **disable web_search/web_fetch/browser** unless inputs are tightly controlled.
 ## Reasoning & verbose output in groups
--- a/src/security/audit-extra.ts
+++ b/src/security/audit-extra.ts
@@ -9,6 +9,15 @@ import { resolveNativeSkillsEnabled } from "../config/commands.js";
 import { resolveOAuthDir } from "../config/paths.js";
 import { formatCliCommand } from "../cli/command-format.js";
 import { resolveDefaultAgentId } from "../agents/agent-scope.js";
 import type { AgentToolsConfig } from "../config/types.tools.js";
 import { resolveBrowserConfig } from "../browser/config.js";
 import { isToolAllowedByPolicies } from "../agents/pi-tools.policy.js";
 import { resolveToolProfilePolicy } from "../agents/tool-policy.js";
 import {
  resolveSandboxConfigForAgent,
  resolveSandboxToolPolicyForAgent,
 } from "../agents/sandbox.js";
 import type { SandboxToolPolicy } from "../agents/sandbox/types.js";
 import { INCLUDE_KEY, MAX_INCLUDE_DEPTH } from "../config/includes.js";
 import { normalizeAgentId } from "../routing/session-key.js";
 import {
@@ -29,6 +38,8 @@ export type SecurityAuditFinding = {
  remediation?: string;
 };
 const SMALL_MODEL_PARAM_B_MAX = 300;
 function expandTilde(p: string, env: NodeJS.ProcessEnv): string | null {
  if (!p.startsWith("~")) return p;
  const home = typeof env.HOME === "string" && env.HOME.trim() ? env.HOME.trim() : null;
@@ -266,6 +277,20 @@ const WEAK_TIER_MODEL_PATTERNS: Array<{ id: string; re: RegExp; label: string }>
  { id: "anthropic.haiku", re: /\bhaiku\b/i, label: "Haiku tier (smaller model)" },
 ];
 function inferParamBFromIdOrName(text: string): number | null {
  const raw = text.toLowerCase();
  const matches = raw.matchAll(/(?:^|[^a-z0-9])[a-z]?(\d+(?:\.\d+)?)b(?:[^a-z0-9]|$)/g);
  let best: number | null = null;
  for (const match of matches) {
    const numRaw = match[1];
    if (!numRaw) continue;
    const value = Number(numRaw);
    if (!Number.isFinite(value) || value <= 0) continue;
    if (best === null || value > best) best = value;
  }
  return best;
 }
 function isGptModel(id: string): boolean {
  return /\bgpt-/i.test(id);
 }
@@ -363,6 +388,155 @@ export function collectModelHygieneFindings(cfg: ClawdbotConfig): SecurityAuditF
  return findings;
 }
 function extractAgentIdFromSource(source: string): string | null {
  const match = source.match(/^agents\.list\.([^.]*)\./);
  return match?.[1] ?? null;
 }
 function pickToolPolicy(config?: { allow?: string[]; deny?: string[] }): SandboxToolPolicy | null {
  if (!config) return null;
  const allow = Array.isArray(config.allow) ? config.allow : undefined;
  const deny = Array.isArray(config.deny) ? config.deny : undefined;
  if (!allow && !deny) return null;
  return { allow, deny };
 }
 function resolveToolPolicies(params: {
  cfg: ClawdbotConfig;
  agentTools?: AgentToolsConfig;
  sandboxMode?: "off" | "non-main" | "all";
  agentId?: string | null;
 }): SandboxToolPolicy[] {
  const policies: SandboxToolPolicy[] = [];
  const profile = params.agentTools?.profile ?? params.cfg.tools?.profile;
  const profilePolicy = resolveToolProfilePolicy(profile);
  if (profilePolicy) policies.push(profilePolicy);
  const globalPolicy = pickToolPolicy(params.cfg.tools ?? undefined);
  if (globalPolicy) policies.push(globalPolicy);
  const agentPolicy = pickToolPolicy(params.agentTools);
  if (agentPolicy) policies.push(agentPolicy);
  if (params.sandboxMode === "all") {
    const sandboxPolicy = resolveSandboxToolPolicyForAgent(params.cfg, params.agentId ?? undefined);
    policies.push(sandboxPolicy);
  }
  return policies;
 }
 function hasWebSearchKey(cfg: ClawdbotConfig, env: NodeJS.ProcessEnv): boolean {
  const search = cfg.tools?.web?.search;
  return Boolean(
    search?.apiKey ||
    search?.perplexity?.apiKey ||
    env.BRAVE_API_KEY ||
    env.PERPLEXITY_API_KEY ||
    env.OPENROUTER_API_KEY,
  );
 }
 function isWebSearchEnabled(cfg: ClawdbotConfig, env: NodeJS.ProcessEnv): boolean {
  const enabled = cfg.tools?.web?.search?.enabled;
  if (enabled === false) return false;
  if (enabled === true) return true;
  return hasWebSearchKey(cfg, env);
 }
 function isWebFetchEnabled(cfg: ClawdbotConfig): boolean {
  const enabled = cfg.tools?.web?.fetch?.enabled;
  if (enabled === false) return false;
  return true;
 }
 function isBrowserEnabled(cfg: ClawdbotConfig): boolean {
  try {
    return resolveBrowserConfig(cfg.browser).enabled;
  } catch {
    return true;
  }
 }
 export function collectSmallModelRiskFindings(params: {
  cfg: ClawdbotConfig;
  env: NodeJS.ProcessEnv;
 }): SecurityAuditFinding[] {
  const findings: SecurityAuditFinding[] = [];
  const models = collectModels(params.cfg).filter((entry) => !entry.source.includes("imageModel"));
  if (models.length === 0) return findings;
  const smallModels = models
    .map((entry) => {
      const paramB = inferParamBFromIdOrName(entry.id);
      if (!paramB || paramB > SMALL_MODEL_PARAM_B_MAX) return null;
      return { ...entry, paramB };
    })
    .filter((entry): entry is { id: string; source: string; paramB: number } => Boolean(entry));
  if (smallModels.length === 0) return findings;
  let hasUnsafe = false;
  const modelLines: string[] = [];
  const exposureSet = new Set<string>();
  for (const entry of smallModels) {
    const agentId = extractAgentIdFromSource(entry.source);
    const sandboxMode = resolveSandboxConfigForAgent(params.cfg, agentId ?? undefined).mode;
    const agentTools =
      agentId && params.cfg.agents?.list
        ? params.cfg.agents.list.find((agent) => agent?.id === agentId)?.tools
        : undefined;
    const policies = resolveToolPolicies({
      cfg: params.cfg,
      agentTools,
      sandboxMode,
      agentId,
    });
    const exposed: string[] = [];
    if (isWebSearchEnabled(params.cfg, params.env)) {
      if (isToolAllowedByPolicies("web_search", policies)) exposed.push("web_search");
    }
    if (isWebFetchEnabled(params.cfg)) {
      if (isToolAllowedByPolicies("web_fetch", policies)) exposed.push("web_fetch");
    }
    if (isBrowserEnabled(params.cfg)) {
      if (isToolAllowedByPolicies("browser", policies)) exposed.push("browser");
    }
    for (const tool of exposed) exposureSet.add(tool);
    const sandboxLabel = sandboxMode === "all" ? "sandbox=all" : `sandbox=${sandboxMode}`;
    const exposureLabel = exposed.length > 0 ? ` web=[${exposed.join(", ")}]` : " web=[off]";
    const safe = sandboxMode === "all" && exposed.length === 0;
    if (!safe) hasUnsafe = true;
    const statusLabel = safe ? "ok" : "unsafe";
    modelLines.push(
      `- ${entry.id} (${entry.paramB}B) @ ${entry.source} (${statusLabel}; ${sandboxLabel};${exposureLabel})`,
    );
  }
  const exposureList = Array.from(exposureSet);
  const exposureDetail =
    exposureList.length > 0
      ? `Uncontrolled input tools allowed: ${exposureList.join(", ")}.`
      : "No web/browser tools detected for these models.";
  findings.push({
    checkId: "models.small_params",
    severity: hasUnsafe ? "critical" : "info",
    title: "Small models require sandboxing and web tools disabled",
    detail:
      `Small models (<=${SMALL_MODEL_PARAM_B_MAX}B params) detected:\n` +
      modelLines.join("\n") +
      `\n` +
      exposureDetail +
      `\n` +
      "Small models are not recommended for untrusted inputs.",
    remediation:
      'If you must use small models, enable sandboxing for all sessions (agents.defaults.sandbox.mode="all") and disable web_search/web_fetch/browser (tools.deny=["group:web","browser"]).',
  });
  return findings;
 }
 export async function collectPluginsTrustFindings(params: {
  cfg: ClawdbotConfig;
  stateDir: string;
--- a/src/security/audit.test.ts
+++ b/src/security/audit.test.ts
@@ -71,6 +71,56 @@ describe("security audit", () => {
    );
  });
  it("warns when small models are paired with web/browser tools", async () => {
    const cfg: ClawdbotConfig = {
      agents: { defaults: { model: { primary: "ollama/mistral-8b" } } },
      tools: {
        web: {
          search: { enabled: true },
          fetch: { enabled: true },
        },
      },
      browser: { enabled: true },
    };
    const res = await runSecurityAudit({
      config: cfg,
      includeFilesystem: false,
      includeChannelSecurity: false,
    });
    const finding = res.findings.find((f) => f.checkId === "models.small_params");
    expect(finding?.severity).toBe("critical");
    expect(finding?.detail).toContain("mistral-8b");
    expect(finding?.detail).toContain("web_search");
    expect(finding?.detail).toContain("web_fetch");
    expect(finding?.detail).toContain("browser");
  });
  it("treats small models as safe when sandbox is on and web tools are disabled", async () => {
    const cfg: ClawdbotConfig = {
      agents: { defaults: { model: { primary: "ollama/mistral-8b" }, sandbox: { mode: "all" } } },
      tools: {
        web: {
          search: { enabled: false },
          fetch: { enabled: false },
        },
      },
      browser: { enabled: false },
    };
    const res = await runSecurityAudit({
      config: cfg,
      includeFilesystem: false,
      includeChannelSecurity: false,
    });
    const finding = res.findings.find((f) => f.checkId === "models.small_params");
    expect(finding?.severity).toBe("info");
    expect(finding?.detail).toContain("mistral-8b");
    expect(finding?.detail).toContain("sandbox=all");
  });
  it("flags tools.elevated allowFrom wildcard as critical", async () => {
    const cfg: ClawdbotConfig = {
      tools: {
--- a/src/security/audit.ts
+++ b/src/security/audit.ts
@@ -14,6 +14,7 @@ import {
  collectHooksHardeningFindings,
  collectIncludeFilePermFindings,
  collectModelHygieneFindings,
  collectSmallModelRiskFindings,
  collectPluginsTrustFindings,
  collectSecretsInConfigFindings,
  collectStateDeepFilesystemFindings,
@@ -805,6 +806,7 @@ export async function runSecurityAudit(opts: SecurityAuditOptions): Promise<Secu
  findings.push(...collectHooksHardeningFindings(cfg));
  findings.push(...collectSecretsInConfigFindings(cfg));
  findings.push(...collectModelHygieneFindings(cfg));
  findings.push(...collectSmallModelRiskFindings({ cfg, env }));
  findings.push(...collectExposureMatrixFindings(cfg));
  const configSnapshot =