fix: tighten small-model audit guardrails

2026-01-20 23:45:50 +00:00
parent 4fad74738a
commit 51dfd6efdb
6 changed files with 229 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,7 @@ Docs: https://docs.clawd.bot
 - Matrix: migrate to matrix-bot-sdk with E2EE support, location handling, and group allowlist upgrades. (#1298) — thanks @sibbl.
 - Plugins/UI: let channel plugin metadata drive UI labels/icons and cron channel options. (#1306) — thanks @steipete.
 - Zalouser: add channel dock metadata, config schema, setup wiring, probe, and status issues. (#1219) — thanks @suminhthanh.
+- Security: warn when <=300B models run without sandboxing and with web tools enabled.
 ### Fixes
 - Discovery: shorten Bonjour DNS-SD service type to `_clawdbot-gw._tcp` and update discovery clients/docs.
 - Agents: preserve subagent announce thread/topic routing + queued replies across channels. (#1241) — thanks @gnarco.
--- a/docs/cli/security.md
+++ b/docs/cli/security.md
@@ -21,3 +21,4 @@ clawdbot security audit --fix
 ```

 The audit warns when multiple DM senders share the main session and recommends `session.dmScope="per-channel-peer"` for shared inboxes.
+It also warns when small models (<=300B) are used without sandboxing and with web/browser tools enabled.
--- a/docs/gateway/security.md
+++ b/docs/gateway/security.md
@@ -177,6 +177,7 @@ Recommendations:
 - **Use the latest generation, best-tier model** for any bot that can run tools or touch files/networks.
 - **Avoid weaker tiers** (for example, Sonnet or Haiku) for tool-enabled agents or untrusted inboxes.
 - If you must use a smaller model, **reduce blast radius** (read-only tools, strong sandboxing, minimal filesystem access, strict allowlists).
+- When running small models, **enable sandboxing for all sessions** and **disable web_search/web_fetch/browser** unless inputs are tightly controlled.

 ## Reasoning & verbose output in groups

--- a/src/security/audit-extra.ts
+++ b/src/security/audit-extra.ts
@@ -9,6 +9,15 @@ import { resolveNativeSkillsEnabled } from "../config/commands.js";
 import { resolveOAuthDir } from "../config/paths.js";
 import { formatCliCommand } from "../cli/command-format.js";
 import { resolveDefaultAgentId } from "../agents/agent-scope.js";
+import type { AgentToolsConfig } from "../config/types.tools.js";
+import { resolveBrowserConfig } from "../browser/config.js";
+import { isToolAllowedByPolicies } from "../agents/pi-tools.policy.js";
+import { resolveToolProfilePolicy } from "../agents/tool-policy.js";
+import {
+  resolveSandboxConfigForAgent,
+  resolveSandboxToolPolicyForAgent,
+} from "../agents/sandbox.js";
+import type { SandboxToolPolicy } from "../agents/sandbox/types.js";
 import { INCLUDE_KEY, MAX_INCLUDE_DEPTH } from "../config/includes.js";
 import { normalizeAgentId } from "../routing/session-key.js";
 import {
@@ -29,6 +38,8 @@ export type SecurityAuditFinding = {
  remediation?: string;
 };

+const SMALL_MODEL_PARAM_B_MAX = 300;
+
 function expandTilde(p: string, env: NodeJS.ProcessEnv): string | null {
  if (!p.startsWith("~")) return p;
  const home = typeof env.HOME === "string" && env.HOME.trim() ? env.HOME.trim() : null;
@@ -266,6 +277,20 @@ const WEAK_TIER_MODEL_PATTERNS: Array<{ id: string; re: RegExp; label: string }>
  { id: "anthropic.haiku", re: /\bhaiku\b/i, label: "Haiku tier (smaller model)" },
 ];

+function inferParamBFromIdOrName(text: string): number | null {
+  const raw = text.toLowerCase();
+  const matches = raw.matchAll(/(?:^|[^a-z0-9])[a-z]?(\d+(?:\.\d+)?)b(?:[^a-z0-9]|$)/g);
+  let best: number | null = null;
+  for (const match of matches) {
+    const numRaw = match[1];
+    if (!numRaw) continue;
+    const value = Number(numRaw);
+    if (!Number.isFinite(value) || value <= 0) continue;
+    if (best === null || value > best) best = value;
+  }
+  return best;
+}
+
 function isGptModel(id: string): boolean {
  return /\bgpt-/i.test(id);
 }
@@ -363,6 +388,155 @@ export function collectModelHygieneFindings(cfg: ClawdbotConfig): SecurityAuditF
  return findings;
 }

+function extractAgentIdFromSource(source: string): string | null {
+  const match = source.match(/^agents\.list\.([^.]*)\./);
+  return match?.[1] ?? null;
+}
+
+function pickToolPolicy(config?: { allow?: string[]; deny?: string[] }): SandboxToolPolicy | null {
+  if (!config) return null;
+  const allow = Array.isArray(config.allow) ? config.allow : undefined;
+  const deny = Array.isArray(config.deny) ? config.deny : undefined;
+  if (!allow && !deny) return null;
+  return { allow, deny };
+}
+
+function resolveToolPolicies(params: {
+  cfg: ClawdbotConfig;
+  agentTools?: AgentToolsConfig;
+  sandboxMode?: "off" | "non-main" | "all";
+  agentId?: string | null;
+}): SandboxToolPolicy[] {
+  const policies: SandboxToolPolicy[] = [];
+  const profile = params.agentTools?.profile ?? params.cfg.tools?.profile;
+  const profilePolicy = resolveToolProfilePolicy(profile);
+  if (profilePolicy) policies.push(profilePolicy);
+
+  const globalPolicy = pickToolPolicy(params.cfg.tools ?? undefined);
+  if (globalPolicy) policies.push(globalPolicy);
+
+  const agentPolicy = pickToolPolicy(params.agentTools);
+  if (agentPolicy) policies.push(agentPolicy);
+
+  if (params.sandboxMode === "all") {
+    const sandboxPolicy = resolveSandboxToolPolicyForAgent(params.cfg, params.agentId ?? undefined);
+    policies.push(sandboxPolicy);
+  }
+
+  return policies;
+}
+
+function hasWebSearchKey(cfg: ClawdbotConfig, env: NodeJS.ProcessEnv): boolean {
+  const search = cfg.tools?.web?.search;
+  return Boolean(
+    search?.apiKey ||
+    search?.perplexity?.apiKey ||
+    env.BRAVE_API_KEY ||
+    env.PERPLEXITY_API_KEY ||
+    env.OPENROUTER_API_KEY,
+  );
+}
+
+function isWebSearchEnabled(cfg: ClawdbotConfig, env: NodeJS.ProcessEnv): boolean {
+  const enabled = cfg.tools?.web?.search?.enabled;
+  if (enabled === false) return false;
+  if (enabled === true) return true;
+  return hasWebSearchKey(cfg, env);
+}
+
+function isWebFetchEnabled(cfg: ClawdbotConfig): boolean {
+  const enabled = cfg.tools?.web?.fetch?.enabled;
+  if (enabled === false) return false;
+  return true;
+}
+
+function isBrowserEnabled(cfg: ClawdbotConfig): boolean {
+  try {
+    return resolveBrowserConfig(cfg.browser).enabled;
+  } catch {
+    return true;
+  }
+}
+
+export function collectSmallModelRiskFindings(params: {
+  cfg: ClawdbotConfig;
+  env: NodeJS.ProcessEnv;
+}): SecurityAuditFinding[] {
+  const findings: SecurityAuditFinding[] = [];
+  const models = collectModels(params.cfg).filter((entry) => !entry.source.includes("imageModel"));
+  if (models.length === 0) return findings;
+
+  const smallModels = models
+    .map((entry) => {
+      const paramB = inferParamBFromIdOrName(entry.id);
+      if (!paramB || paramB > SMALL_MODEL_PARAM_B_MAX) return null;
+      return { ...entry, paramB };
+    })
+    .filter((entry): entry is { id: string; source: string; paramB: number } => Boolean(entry));
+
+  if (smallModels.length === 0) return findings;
+
+  let hasUnsafe = false;
+  const modelLines: string[] = [];
+  const exposureSet = new Set<string>();
+  for (const entry of smallModels) {
+    const agentId = extractAgentIdFromSource(entry.source);
+    const sandboxMode = resolveSandboxConfigForAgent(params.cfg, agentId ?? undefined).mode;
+    const agentTools =
+      agentId && params.cfg.agents?.list
+        ? params.cfg.agents.list.find((agent) => agent?.id === agentId)?.tools
+        : undefined;
+    const policies = resolveToolPolicies({
+      cfg: params.cfg,
+      agentTools,
+      sandboxMode,
+      agentId,
+    });
+    const exposed: string[] = [];
+    if (isWebSearchEnabled(params.cfg, params.env)) {
+      if (isToolAllowedByPolicies("web_search", policies)) exposed.push("web_search");
+    }
+    if (isWebFetchEnabled(params.cfg)) {
+      if (isToolAllowedByPolicies("web_fetch", policies)) exposed.push("web_fetch");
+    }
+    if (isBrowserEnabled(params.cfg)) {
+      if (isToolAllowedByPolicies("browser", policies)) exposed.push("browser");
+    }
+    for (const tool of exposed) exposureSet.add(tool);
+    const sandboxLabel = sandboxMode === "all" ? "sandbox=all" : `sandbox=${sandboxMode}`;
+    const exposureLabel = exposed.length > 0 ? ` web=[${exposed.join(", ")}]` : " web=[off]";
+    const safe = sandboxMode === "all" && exposed.length === 0;
+    if (!safe) hasUnsafe = true;
+    const statusLabel = safe ? "ok" : "unsafe";
+    modelLines.push(
+      `- ${entry.id} (${entry.paramB}B) @ ${entry.source} (${statusLabel}; ${sandboxLabel};${exposureLabel})`,
+    );
+  }
+
+  const exposureList = Array.from(exposureSet);
+  const exposureDetail =
+    exposureList.length > 0
+      ? `Uncontrolled input tools allowed: ${exposureList.join(", ")}.`
+      : "No web/browser tools detected for these models.";
+
+  findings.push({
+    checkId: "models.small_params",
+    severity: hasUnsafe ? "critical" : "info",
+    title: "Small models require sandboxing and web tools disabled",
+    detail:
+      `Small models (<=${SMALL_MODEL_PARAM_B_MAX}B params) detected:\n` +
+      modelLines.join("\n") +
+      `\n` +
+      exposureDetail +
+      `\n` +
+      "Small models are not recommended for untrusted inputs.",
+    remediation:
+      'If you must use small models, enable sandboxing for all sessions (agents.defaults.sandbox.mode="all") and disable web_search/web_fetch/browser (tools.deny=["group:web","browser"]).',
+  });
+
+  return findings;
+}
+
 export async function collectPluginsTrustFindings(params: {
  cfg: ClawdbotConfig;
  stateDir: string;
--- a/src/security/audit.test.ts
+++ b/src/security/audit.test.ts
@@ -71,6 +71,56 @@ describe("security audit", () => {
    );
  });

+  it("warns when small models are paired with web/browser tools", async () => {
+    const cfg: ClawdbotConfig = {
+      agents: { defaults: { model: { primary: "ollama/mistral-8b" } } },
+      tools: {
+        web: {
+          search: { enabled: true },
+          fetch: { enabled: true },
+        },
+      },
+      browser: { enabled: true },
+    };
+
+    const res = await runSecurityAudit({
+      config: cfg,
+      includeFilesystem: false,
+      includeChannelSecurity: false,
+    });
+
+    const finding = res.findings.find((f) => f.checkId === "models.small_params");
+    expect(finding?.severity).toBe("critical");
+    expect(finding?.detail).toContain("mistral-8b");
+    expect(finding?.detail).toContain("web_search");
+    expect(finding?.detail).toContain("web_fetch");
+    expect(finding?.detail).toContain("browser");
+  });
+
+  it("treats small models as safe when sandbox is on and web tools are disabled", async () => {
+    const cfg: ClawdbotConfig = {
+      agents: { defaults: { model: { primary: "ollama/mistral-8b" }, sandbox: { mode: "all" } } },
+      tools: {
+        web: {
+          search: { enabled: false },
+          fetch: { enabled: false },
+        },
+      },
+      browser: { enabled: false },
+    };
+
+    const res = await runSecurityAudit({
+      config: cfg,
+      includeFilesystem: false,
+      includeChannelSecurity: false,
+    });
+
+    const finding = res.findings.find((f) => f.checkId === "models.small_params");
+    expect(finding?.severity).toBe("info");
+    expect(finding?.detail).toContain("mistral-8b");
+    expect(finding?.detail).toContain("sandbox=all");
+  });
+
  it("flags tools.elevated allowFrom wildcard as critical", async () => {
    const cfg: ClawdbotConfig = {
      tools: {
--- a/src/security/audit.ts
+++ b/src/security/audit.ts
@@ -14,6 +14,7 @@ import {
  collectHooksHardeningFindings,
  collectIncludeFilePermFindings,
  collectModelHygieneFindings,
+  collectSmallModelRiskFindings,
  collectPluginsTrustFindings,
  collectSecretsInConfigFindings,
  collectStateDeepFilesystemFindings,
@@ -805,6 +806,7 @@ export async function runSecurityAudit(opts: SecurityAuditOptions): Promise<Secu
  findings.push(...collectHooksHardeningFindings(cfg));
  findings.push(...collectSecretsInConfigFindings(cfg));
  findings.push(...collectModelHygieneFindings(cfg));
+  findings.push(...collectSmallModelRiskFindings({ cfg, env }));
  findings.push(...collectExposureMatrixFindings(cfg));

  const configSnapshot =