feat: warn on weak model tiers
This commit is contained in:
@@ -13,6 +13,7 @@
|
|||||||
- TUI: show provider/model labels for the active session and default model.
|
- TUI: show provider/model labels for the active session and default model.
|
||||||
- Heartbeat: add per-agent heartbeat configuration and multi-agent docs example.
|
- Heartbeat: add per-agent heartbeat configuration and multi-agent docs example.
|
||||||
- UI: show gateway auth guidance + doc link on unauthorized Control UI connections.
|
- UI: show gateway auth guidance + doc link on unauthorized Control UI connections.
|
||||||
|
- Security: warn on weak model tiers (Haiku, below GPT-5, below Claude 4.5) in `clawdbot security audit`.
|
||||||
- Fix: list model picker entries as provider/model pairs for explicit selection. (#970) — thanks @mcinteerj.
|
- Fix: list model picker entries as provider/model pairs for explicit selection. (#970) — thanks @mcinteerj.
|
||||||
- Fix: align OpenAI image-gen defaults with DALL-E 3 standard quality and document output formats. (#880) — thanks @mkbehr.
|
- Fix: align OpenAI image-gen defaults with DALL-E 3 standard quality and document output formats. (#880) — thanks @mkbehr.
|
||||||
- Fix: persist `gateway.mode=local` after selecting Local run mode in `clawdbot configure`, even if no other sections are chosen.
|
- Fix: persist `gateway.mode=local` after selecting Local run mode in `clawdbot configure`, even if no other sections are chosen.
|
||||||
|
|||||||
@@ -161,6 +161,15 @@ Even with strong system prompts, **prompt injection is not solved**. What helps
|
|||||||
- Run sensitive tool execution in a sandbox; keep secrets out of the agent’s reachable filesystem.
|
- Run sensitive tool execution in a sandbox; keep secrets out of the agent’s reachable filesystem.
|
||||||
- **Model choice matters:** older/legacy models can be less robust against prompt injection and tool misuse. Prefer modern, instruction-hardened models for any bot with tools. We recommend Anthropic Opus 4.5 because it’s quite good at recognizing prompt injections (see [“A step forward on safety”](https://www.anthropic.com/news/claude-opus-4-5)).
|
- **Model choice matters:** older/legacy models can be less robust against prompt injection and tool misuse. Prefer modern, instruction-hardened models for any bot with tools. We recommend Anthropic Opus 4.5 because it’s quite good at recognizing prompt injections (see [“A step forward on safety”](https://www.anthropic.com/news/claude-opus-4-5)).
|
||||||
|
|
||||||
|
### Model strength (security note)
|
||||||
|
|
||||||
|
Prompt injection resistance is **not** uniform across model tiers. Smaller/cheaper models are generally more susceptible to tool misuse and instruction hijacking, especially under adversarial prompts.
|
||||||
|
|
||||||
|
Recommendations:
|
||||||
|
- **Use the latest generation, best-tier model** for any bot that can run tools or touch files/networks.
|
||||||
|
- **Avoid weaker tiers** (for example, Sonnet or Haiku) for tool-enabled agents or untrusted inboxes.
|
||||||
|
- If you must use a smaller model, **reduce blast radius** (read-only tools, strong sandboxing, minimal filesystem access, strict allowlists).
|
||||||
|
|
||||||
## Reasoning & verbose output in groups
|
## Reasoning & verbose output in groups
|
||||||
|
|
||||||
`/reasoning` and `/verbose` can expose internal reasoning or tool output that
|
`/reasoning` and `/verbose` can expose internal reasoning or tool output that
|
||||||
|
|||||||
@@ -260,11 +260,57 @@ const LEGACY_MODEL_PATTERNS: Array<{ id: string; re: RegExp; label: string }> =
|
|||||||
{ id: "openai.gpt4_legacy", re: /\bgpt-4-(0314|0613)\b/i, label: "Legacy GPT-4 snapshots" },
|
{ id: "openai.gpt4_legacy", re: /\bgpt-4-(0314|0613)\b/i, label: "Legacy GPT-4 snapshots" },
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const WEAK_TIER_MODEL_PATTERNS: Array<{ id: string; re: RegExp; label: string }> = [
|
||||||
|
{ id: "anthropic.haiku", re: /\bhaiku\b/i, label: "Haiku tier (smaller model)" },
|
||||||
|
];
|
||||||
|
|
||||||
|
function isGptModel(id: string): boolean {
|
||||||
|
return /\bgpt-/i.test(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isGpt5OrHigher(id: string): boolean {
|
||||||
|
return /\bgpt-5(?:\b|[.-])/i.test(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isClaudeModel(id: string): boolean {
|
||||||
|
return /\bclaude-/i.test(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isClaude45OrHigher(id: string): boolean {
|
||||||
|
return /\bclaude-[^\s/]*?(?:-4-5\b|4\.5\b)/i.test(id);
|
||||||
|
}
|
||||||
|
|
||||||
export function collectModelHygieneFindings(cfg: ClawdbotConfig): SecurityAuditFinding[] {
|
export function collectModelHygieneFindings(cfg: ClawdbotConfig): SecurityAuditFinding[] {
|
||||||
const findings: SecurityAuditFinding[] = [];
|
const findings: SecurityAuditFinding[] = [];
|
||||||
const models = collectModels(cfg);
|
const models = collectModels(cfg);
|
||||||
if (models.length === 0) return findings;
|
if (models.length === 0) return findings;
|
||||||
|
|
||||||
|
const weakMatches = new Map<string, { model: string; source: string; reasons: string[] }>();
|
||||||
|
const addWeakMatch = (model: string, source: string, reason: string) => {
|
||||||
|
const key = `${model}@@${source}`;
|
||||||
|
const existing = weakMatches.get(key);
|
||||||
|
if (!existing) {
|
||||||
|
weakMatches.set(key, { model, source, reasons: [reason] });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!existing.reasons.includes(reason)) existing.reasons.push(reason);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const entry of models) {
|
||||||
|
for (const pat of WEAK_TIER_MODEL_PATTERNS) {
|
||||||
|
if (pat.re.test(entry.id)) {
|
||||||
|
addWeakMatch(entry.id, entry.source, pat.label);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (isGptModel(entry.id) && !isGpt5OrHigher(entry.id)) {
|
||||||
|
addWeakMatch(entry.id, entry.source, "Below GPT-5 family");
|
||||||
|
}
|
||||||
|
if (isClaudeModel(entry.id) && !isClaude45OrHigher(entry.id)) {
|
||||||
|
addWeakMatch(entry.id, entry.source, "Below Claude 4.5");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const matches: Array<{ model: string; source: string; reason: string }> = [];
|
const matches: Array<{ model: string; source: string; reason: string }> = [];
|
||||||
for (const entry of models) {
|
for (const entry of models) {
|
||||||
for (const pat of LEGACY_MODEL_PATTERNS) {
|
for (const pat of LEGACY_MODEL_PATTERNS) {
|
||||||
@@ -293,6 +339,25 @@ export function collectModelHygieneFindings(cfg: ClawdbotConfig): SecurityAuditF
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (weakMatches.size > 0) {
|
||||||
|
const lines = Array.from(weakMatches.values())
|
||||||
|
.slice(0, 12)
|
||||||
|
.map((m) => `- ${m.model} (${m.reasons.join("; ")}) @ ${m.source}`)
|
||||||
|
.join("\n");
|
||||||
|
const more = weakMatches.size > 12 ? `\n…${weakMatches.size - 12} more` : "";
|
||||||
|
findings.push({
|
||||||
|
checkId: "models.weak_tier",
|
||||||
|
severity: "warn",
|
||||||
|
title: "Some configured models are below recommended tiers",
|
||||||
|
detail:
|
||||||
|
"Smaller/older models are generally more susceptible to prompt injection and tool misuse.\n" +
|
||||||
|
lines +
|
||||||
|
more,
|
||||||
|
remediation:
|
||||||
|
"Use the latest, top-tier model for any bot with tools or untrusted inboxes. Avoid Haiku tiers; prefer GPT-5+ and Claude 4.5+.",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
return findings;
|
return findings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -269,6 +269,24 @@ describe("security audit", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("warns on weak model tiers", async () => {
|
||||||
|
const cfg: ClawdbotConfig = {
|
||||||
|
agents: { defaults: { model: { primary: "anthropic/claude-haiku-4-5" } } },
|
||||||
|
};
|
||||||
|
|
||||||
|
const res = await runSecurityAudit({
|
||||||
|
config: cfg,
|
||||||
|
includeFilesystem: false,
|
||||||
|
includeChannelSecurity: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(res.findings).toEqual(
|
||||||
|
expect.arrayContaining([
|
||||||
|
expect.objectContaining({ checkId: "models.weak_tier", severity: "warn" }),
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it("warns when hooks token looks short", async () => {
|
it("warns when hooks token looks short", async () => {
|
||||||
const cfg: ClawdbotConfig = {
|
const cfg: ClawdbotConfig = {
|
||||||
hooks: { enabled: true, token: "short" },
|
hooks: { enabled: true, token: "short" },
|
||||||
|
|||||||
Reference in New Issue
Block a user