fix: tighten small-model audit guardrails

This commit is contained in:
Peter Steinberger
2026-01-20 23:45:50 +00:00
parent 4fad74738a
commit 51dfd6efdb
6 changed files with 229 additions and 0 deletions

View File

@@ -17,6 +17,7 @@ Docs: https://docs.clawd.bot
- Matrix: migrate to matrix-bot-sdk with E2EE support, location handling, and group allowlist upgrades. (#1298) — thanks @sibbl.
- Plugins/UI: let channel plugin metadata drive UI labels/icons and cron channel options. (#1306) — thanks @steipete.
- Zalouser: add channel dock metadata, config schema, setup wiring, probe, and status issues. (#1219) — thanks @suminhthanh.
- Security: warn when <=300B models run without sandboxing and with web tools enabled.
### Fixes
- Discovery: shorten Bonjour DNS-SD service type to `_clawdbot-gw._tcp` and update discovery clients/docs.
- Agents: preserve subagent announce thread/topic routing + queued replies across channels. (#1241) — thanks @gnarco.

View File

@@ -21,3 +21,4 @@ clawdbot security audit --fix
```
The audit warns when multiple DM senders share the main session and recommends `session.dmScope="per-channel-peer"` for shared inboxes.
It also warns when small models (<=300B) are used without sandboxing and with web/browser tools enabled.

View File

@@ -177,6 +177,7 @@ Recommendations:
- **Use the latest generation, best-tier model** for any bot that can run tools or touch files/networks.
- **Avoid weaker tiers** (for example, Sonnet or Haiku) for tool-enabled agents or untrusted inboxes.
- If you must use a smaller model, **reduce blast radius** (read-only tools, strong sandboxing, minimal filesystem access, strict allowlists).
- When running small models, **enable sandboxing for all sessions** and **disable web_search/web_fetch/browser** unless inputs are tightly controlled.
## Reasoning & verbose output in groups

View File

@@ -9,6 +9,15 @@ import { resolveNativeSkillsEnabled } from "../config/commands.js";
import { resolveOAuthDir } from "../config/paths.js";
import { formatCliCommand } from "../cli/command-format.js";
import { resolveDefaultAgentId } from "../agents/agent-scope.js";
import type { AgentToolsConfig } from "../config/types.tools.js";
import { resolveBrowserConfig } from "../browser/config.js";
import { isToolAllowedByPolicies } from "../agents/pi-tools.policy.js";
import { resolveToolProfilePolicy } from "../agents/tool-policy.js";
import {
resolveSandboxConfigForAgent,
resolveSandboxToolPolicyForAgent,
} from "../agents/sandbox.js";
import type { SandboxToolPolicy } from "../agents/sandbox/types.js";
import { INCLUDE_KEY, MAX_INCLUDE_DEPTH } from "../config/includes.js";
import { normalizeAgentId } from "../routing/session-key.js";
import {
@@ -29,6 +38,8 @@ export type SecurityAuditFinding = {
remediation?: string;
};
const SMALL_MODEL_PARAM_B_MAX = 300;
function expandTilde(p: string, env: NodeJS.ProcessEnv): string | null {
if (!p.startsWith("~")) return p;
const home = typeof env.HOME === "string" && env.HOME.trim() ? env.HOME.trim() : null;
@@ -266,6 +277,20 @@ const WEAK_TIER_MODEL_PATTERNS: Array<{ id: string; re: RegExp; label: string }>
{ id: "anthropic.haiku", re: /\bhaiku\b/i, label: "Haiku tier (smaller model)" },
];
function inferParamBFromIdOrName(text: string): number | null {
const raw = text.toLowerCase();
const matches = raw.matchAll(/(?:^|[^a-z0-9])[a-z]?(\d+(?:\.\d+)?)b(?:[^a-z0-9]|$)/g);
let best: number | null = null;
for (const match of matches) {
const numRaw = match[1];
if (!numRaw) continue;
const value = Number(numRaw);
if (!Number.isFinite(value) || value <= 0) continue;
if (best === null || value > best) best = value;
}
return best;
}
function isGptModel(id: string): boolean {
return /\bgpt-/i.test(id);
}
@@ -363,6 +388,155 @@ export function collectModelHygieneFindings(cfg: ClawdbotConfig): SecurityAuditF
return findings;
}
function extractAgentIdFromSource(source: string): string | null {
const match = source.match(/^agents\.list\.([^.]*)\./);
return match?.[1] ?? null;
}
function pickToolPolicy(config?: { allow?: string[]; deny?: string[] }): SandboxToolPolicy | null {
if (!config) return null;
const allow = Array.isArray(config.allow) ? config.allow : undefined;
const deny = Array.isArray(config.deny) ? config.deny : undefined;
if (!allow && !deny) return null;
return { allow, deny };
}
function resolveToolPolicies(params: {
cfg: ClawdbotConfig;
agentTools?: AgentToolsConfig;
sandboxMode?: "off" | "non-main" | "all";
agentId?: string | null;
}): SandboxToolPolicy[] {
const policies: SandboxToolPolicy[] = [];
const profile = params.agentTools?.profile ?? params.cfg.tools?.profile;
const profilePolicy = resolveToolProfilePolicy(profile);
if (profilePolicy) policies.push(profilePolicy);
const globalPolicy = pickToolPolicy(params.cfg.tools ?? undefined);
if (globalPolicy) policies.push(globalPolicy);
const agentPolicy = pickToolPolicy(params.agentTools);
if (agentPolicy) policies.push(agentPolicy);
if (params.sandboxMode === "all") {
const sandboxPolicy = resolveSandboxToolPolicyForAgent(params.cfg, params.agentId ?? undefined);
policies.push(sandboxPolicy);
}
return policies;
}
function hasWebSearchKey(cfg: ClawdbotConfig, env: NodeJS.ProcessEnv): boolean {
const search = cfg.tools?.web?.search;
return Boolean(
search?.apiKey ||
search?.perplexity?.apiKey ||
env.BRAVE_API_KEY ||
env.PERPLEXITY_API_KEY ||
env.OPENROUTER_API_KEY,
);
}
function isWebSearchEnabled(cfg: ClawdbotConfig, env: NodeJS.ProcessEnv): boolean {
const enabled = cfg.tools?.web?.search?.enabled;
if (enabled === false) return false;
if (enabled === true) return true;
return hasWebSearchKey(cfg, env);
}
function isWebFetchEnabled(cfg: ClawdbotConfig): boolean {
const enabled = cfg.tools?.web?.fetch?.enabled;
if (enabled === false) return false;
return true;
}
function isBrowserEnabled(cfg: ClawdbotConfig): boolean {
try {
return resolveBrowserConfig(cfg.browser).enabled;
} catch {
return true;
}
}
export function collectSmallModelRiskFindings(params: {
cfg: ClawdbotConfig;
env: NodeJS.ProcessEnv;
}): SecurityAuditFinding[] {
const findings: SecurityAuditFinding[] = [];
const models = collectModels(params.cfg).filter((entry) => !entry.source.includes("imageModel"));
if (models.length === 0) return findings;
const smallModels = models
.map((entry) => {
const paramB = inferParamBFromIdOrName(entry.id);
if (!paramB || paramB > SMALL_MODEL_PARAM_B_MAX) return null;
return { ...entry, paramB };
})
.filter((entry): entry is { id: string; source: string; paramB: number } => Boolean(entry));
if (smallModels.length === 0) return findings;
let hasUnsafe = false;
const modelLines: string[] = [];
const exposureSet = new Set<string>();
for (const entry of smallModels) {
const agentId = extractAgentIdFromSource(entry.source);
const sandboxMode = resolveSandboxConfigForAgent(params.cfg, agentId ?? undefined).mode;
const agentTools =
agentId && params.cfg.agents?.list
? params.cfg.agents.list.find((agent) => agent?.id === agentId)?.tools
: undefined;
const policies = resolveToolPolicies({
cfg: params.cfg,
agentTools,
sandboxMode,
agentId,
});
const exposed: string[] = [];
if (isWebSearchEnabled(params.cfg, params.env)) {
if (isToolAllowedByPolicies("web_search", policies)) exposed.push("web_search");
}
if (isWebFetchEnabled(params.cfg)) {
if (isToolAllowedByPolicies("web_fetch", policies)) exposed.push("web_fetch");
}
if (isBrowserEnabled(params.cfg)) {
if (isToolAllowedByPolicies("browser", policies)) exposed.push("browser");
}
for (const tool of exposed) exposureSet.add(tool);
const sandboxLabel = sandboxMode === "all" ? "sandbox=all" : `sandbox=${sandboxMode}`;
const exposureLabel = exposed.length > 0 ? ` web=[${exposed.join(", ")}]` : " web=[off]";
const safe = sandboxMode === "all" && exposed.length === 0;
if (!safe) hasUnsafe = true;
const statusLabel = safe ? "ok" : "unsafe";
modelLines.push(
`- ${entry.id} (${entry.paramB}B) @ ${entry.source} (${statusLabel}; ${sandboxLabel};${exposureLabel})`,
);
}
const exposureList = Array.from(exposureSet);
const exposureDetail =
exposureList.length > 0
? `Uncontrolled input tools allowed: ${exposureList.join(", ")}.`
: "No web/browser tools detected for these models.";
findings.push({
checkId: "models.small_params",
severity: hasUnsafe ? "critical" : "info",
title: "Small models require sandboxing and web tools disabled",
detail:
`Small models (<=${SMALL_MODEL_PARAM_B_MAX}B params) detected:\n` +
modelLines.join("\n") +
`\n` +
exposureDetail +
`\n` +
"Small models are not recommended for untrusted inputs.",
remediation:
'If you must use small models, enable sandboxing for all sessions (agents.defaults.sandbox.mode="all") and disable web_search/web_fetch/browser (tools.deny=["group:web","browser"]).',
});
return findings;
}
export async function collectPluginsTrustFindings(params: {
cfg: ClawdbotConfig;
stateDir: string;

View File

@@ -71,6 +71,56 @@ describe("security audit", () => {
);
});
it("warns when small models are paired with web/browser tools", async () => {
const cfg: ClawdbotConfig = {
agents: { defaults: { model: { primary: "ollama/mistral-8b" } } },
tools: {
web: {
search: { enabled: true },
fetch: { enabled: true },
},
},
browser: { enabled: true },
};
const res = await runSecurityAudit({
config: cfg,
includeFilesystem: false,
includeChannelSecurity: false,
});
const finding = res.findings.find((f) => f.checkId === "models.small_params");
expect(finding?.severity).toBe("critical");
expect(finding?.detail).toContain("mistral-8b");
expect(finding?.detail).toContain("web_search");
expect(finding?.detail).toContain("web_fetch");
expect(finding?.detail).toContain("browser");
});
it("treats small models as safe when sandbox is on and web tools are disabled", async () => {
const cfg: ClawdbotConfig = {
agents: { defaults: { model: { primary: "ollama/mistral-8b" }, sandbox: { mode: "all" } } },
tools: {
web: {
search: { enabled: false },
fetch: { enabled: false },
},
},
browser: { enabled: false },
};
const res = await runSecurityAudit({
config: cfg,
includeFilesystem: false,
includeChannelSecurity: false,
});
const finding = res.findings.find((f) => f.checkId === "models.small_params");
expect(finding?.severity).toBe("info");
expect(finding?.detail).toContain("mistral-8b");
expect(finding?.detail).toContain("sandbox=all");
});
it("flags tools.elevated allowFrom wildcard as critical", async () => {
const cfg: ClawdbotConfig = {
tools: {

View File

@@ -14,6 +14,7 @@ import {
collectHooksHardeningFindings,
collectIncludeFilePermFindings,
collectModelHygieneFindings,
collectSmallModelRiskFindings,
collectPluginsTrustFindings,
collectSecretsInConfigFindings,
collectStateDeepFilesystemFindings,
@@ -805,6 +806,7 @@ export async function runSecurityAudit(opts: SecurityAuditOptions): Promise<Secu
findings.push(...collectHooksHardeningFindings(cfg));
findings.push(...collectSecretsInConfigFindings(cfg));
findings.push(...collectModelHygieneFindings(cfg));
findings.push(...collectSmallModelRiskFindings({ cfg, env }));
findings.push(...collectExposureMatrixFindings(cfg));
const configSnapshot =