fix: default low thinking for reasoning models

This commit is contained in:
Peter Steinberger
2026-01-03 12:18:50 +00:00
parent 6e16c0699a
commit b6301c719b
14 changed files with 308 additions and 20 deletions

View File

@@ -14,6 +14,7 @@
- Telegram: chunk block-stream replies to avoid “message is too long” errors (#124) — thanks @mukhtharcm.
- Agent tools: scope the Discord tool to Discord surface runs.
- Agent tools: format verbose tool summaries without brackets, with unique emojis and `tool: detail` style.
- Thinking: default to low for reasoning-capable models when no /think or config default is set.
### Docs
- Skills: add Sheets/Docs examples to gog skill (#128) — thanks @mbelinky.

View File

@@ -18,7 +18,7 @@ read_when:
1. Inline directive on the message (applies only to that message).
2. Session override (set by sending a directive-only message).
3. Global default (`agent.thinkingDefault` in config).
4. Fallback: off.
4. Fallback: low for reasoning-capable models; off otherwise.
## Setting a session default
- Send a message that is **only** the directive (whitespace allowed), e.g. `/think:medium` or `/t high`.

View File

@@ -7,6 +7,7 @@ export type ModelCatalogEntry = {
name: string;
provider: string;
contextWindow?: number;
reasoning?: boolean;
};
type DiscoveredModel = {
@@ -14,6 +15,7 @@ type DiscoveredModel = {
name?: string;
provider: string;
contextWindow?: number;
reasoning?: boolean;
};
let modelCatalogPromise: Promise<ModelCatalogEntry[]> | null = null;
@@ -56,7 +58,9 @@ export async function loadModelCatalog(params?: {
typeof entry?.contextWindow === "number" && entry.contextWindow > 0
? entry.contextWindow
: undefined;
models.push({ id, name, provider, contextWindow });
const reasoning =
typeof entry?.reasoning === "boolean" ? entry.reasoning : undefined;
models.push({ id, name, provider, contextWindow, reasoning });
}
} catch {
// Leave models empty on discovery errors.

View File

@@ -6,6 +6,8 @@ export type ModelRef = {
model: string;
};
export type ThinkLevel = "off" | "minimal" | "low" | "medium" | "high";
export type ModelAliasIndex = {
byAlias: Map<string, { alias: string; ref: ModelRef }>;
byKey: Map<string, string[]>;
@@ -152,3 +154,19 @@ export function buildAllowedModelSet(params: {
return { allowAny: false, allowedCatalog, allowedKeys };
}
export function resolveThinkingDefault(params: {
cfg: ClawdisConfig;
provider: string;
model: string;
catalog?: ModelCatalogEntry[];
}): ThinkLevel {
const configured = params.cfg.agent?.thinkingDefault;
if (configured) return configured;
const candidate = params.catalog?.find(
(entry) =>
entry.provider === params.provider && entry.id === params.model,
);
if (candidate?.reasoning) return "low";
return "off";
}

View File

@@ -700,4 +700,48 @@ describe("directive parsing", () => {
expect(call?.model).toBe("gpt-4.1-mini");
});
});
it("defaults thinking to low for reasoning-capable models", async () => {
await withTempHome(async (home) => {
const storePath = path.join(home, "sessions.json");
vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
payloads: [{ text: "done" }],
meta: {
durationMs: 5,
agentMeta: { sessionId: "s", provider: "p", model: "m" },
},
});
vi.mocked(loadModelCatalog).mockResolvedValueOnce([
{
id: "claude-opus-4-5",
name: "Opus 4.5",
provider: "anthropic",
reasoning: true,
},
]);
await getReplyFromConfig(
{
Body: "hello",
From: "+1004",
To: "+2000",
},
{},
{
agent: {
model: "anthropic/claude-opus-4-5",
workspace: path.join(home, "clawd"),
},
whatsapp: {
allowFrom: ["*"],
},
session: { store: storePath },
},
);
expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
expect(call?.thinkLevel).toBe("low");
});
});
});

View File

@@ -13,6 +13,7 @@ import {
modelKey,
resolveConfiguredModelRef,
resolveModelRefFromString,
resolveThinkingDefault,
} from "../agents/model-selection.js";
import {
abortEmbeddedPiRun,
@@ -1094,13 +1095,14 @@ export async function getReplyFromConfig(
hasModelDirective || hasAllowlist || hasStoredOverride;
let allowedModelKeys = new Set<string>();
let allowedModelCatalog: Awaited<ReturnType<typeof loadModelCatalog>> = [];
let modelCatalog: Awaited<ReturnType<typeof loadModelCatalog>> | null = null;
let resetModelOverride = false;
if (needsModelCatalog) {
const catalog = await loadModelCatalog({ config: cfg });
modelCatalog = await loadModelCatalog({ config: cfg });
const allowed = buildAllowedModelSet({
cfg,
catalog,
catalog: modelCatalog,
defaultProvider,
});
allowedModelCatalog = allowed.allowedCatalog;
@@ -1134,6 +1136,22 @@ export async function getReplyFromConfig(
model = storedModelOverride;
}
}
let defaultThinkingLevel: ThinkLevel | undefined;
const resolveDefaultThinkingLevel = async () => {
if (defaultThinkingLevel) return defaultThinkingLevel;
let catalogForThinking = modelCatalog ?? allowedModelCatalog;
if (!catalogForThinking || catalogForThinking.length === 0) {
modelCatalog = await loadModelCatalog({ config: cfg });
catalogForThinking = modelCatalog;
}
defaultThinkingLevel = resolveThinkingDefault({
cfg,
provider,
model,
catalog: catalogForThinking,
});
return defaultThinkingLevel;
};
contextTokens =
agentCfg?.contextTokens ??
lookupContextTokens(model) ??
@@ -1589,7 +1607,8 @@ export async function getReplyFromConfig(
sessionScope,
storePath,
groupActivation,
resolvedThink: resolvedThinkLevel,
resolvedThink:
resolvedThinkLevel ?? (await resolveDefaultThinkingLevel()),
resolvedVerbose: resolvedVerboseLevel,
webLinked,
webAuthAgeMs,
@@ -1820,6 +1839,9 @@ export async function getReplyFromConfig(
commandBody = parts.slice(1).join(" ").trim();
}
}
if (!resolvedThinkLevel) {
resolvedThinkLevel = await resolveDefaultThinkingLevel();
}
const sessionIdFinal = sessionId ?? crypto.randomUUID();
const sessionFile = resolveSessionTranscriptPath(sessionIdFinal);

View File

@@ -17,8 +17,12 @@ vi.mock("../agents/pi-embedded.js", () => ({
resolveEmbeddedSessionLane: (key: string) =>
`session:${key.trim() || "main"}`,
}));
vi.mock("../agents/model-catalog.js", () => ({
loadModelCatalog: vi.fn(),
}));
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import { loadModelCatalog } from "../agents/model-catalog.js";
import type { ClawdisConfig } from "../config/config.js";
import * as configModule from "../config/config.js";
import type { RuntimeEnv } from "../runtime.js";
@@ -74,6 +78,7 @@ beforeEach(() => {
agentMeta: { sessionId: "s", provider: "p", model: "m" },
},
});
vi.mocked(loadModelCatalog).mockResolvedValue([]);
});
describe("agentCommand", () => {
@@ -162,6 +167,26 @@ describe("agentCommand", () => {
});
});
it("defaults thinking to low for reasoning-capable models", async () => {
await withTempHome(async (home) => {
const store = path.join(home, "sessions.json");
mockConfig(home, store);
vi.mocked(loadModelCatalog).mockResolvedValueOnce([
{
id: "claude-opus-4-5",
name: "Opus 4.5",
provider: "anthropic",
reasoning: true,
},
]);
await agentCommand({ message: "hi", to: "+1555" }, runtime);
const callArgs = vi.mocked(runEmbeddedPiAgent).mock.calls.at(-1)?.[0];
expect(callArgs?.thinkLevel).toBe("low");
});
});
it("prints JSON payload when requested", async () => {
await withTempHome(async (home) => {
vi.mocked(runEmbeddedPiAgent).mockResolvedValue({

View File

@@ -10,6 +10,7 @@ import {
buildAllowedModelSet,
modelKey,
resolveConfiguredModelRef,
resolveThinkingDefault,
} from "../agents/model-selection.js";
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import { buildWorkspaceSkillSnapshot } from "../agents/skills.js";
@@ -211,7 +212,7 @@ export async function agentCommand(
registerAgentRunContext(sessionId, { sessionKey });
}
const resolvedThinkLevel =
let resolvedThinkLevel =
thinkOnce ??
thinkOverride ??
persistedThinking ??
@@ -275,15 +276,18 @@ export async function agentCommand(
);
const needsModelCatalog = hasAllowlist || hasStoredOverride;
let allowedModelKeys = new Set<string>();
let allowedModelCatalog: Awaited<ReturnType<typeof loadModelCatalog>> = [];
let modelCatalog: Awaited<ReturnType<typeof loadModelCatalog>> | null = null;
if (needsModelCatalog) {
const catalog = await loadModelCatalog({ config: cfg });
modelCatalog = await loadModelCatalog({ config: cfg });
const allowed = buildAllowedModelSet({
cfg,
catalog,
catalog: modelCatalog,
defaultProvider,
});
allowedModelKeys = allowed.allowedKeys;
allowedModelCatalog = allowed.allowedCatalog;
}
if (sessionEntry && sessionStore && sessionKey && hasStoredOverride) {
@@ -312,6 +316,20 @@ export async function agentCommand(
model = storedModelOverride;
}
}
if (!resolvedThinkLevel) {
let catalogForThinking = modelCatalog ?? allowedModelCatalog;
if (!catalogForThinking || catalogForThinking.length === 0) {
modelCatalog = await loadModelCatalog({ config: cfg });
catalogForThinking = modelCatalog;
}
resolvedThinkLevel = resolveThinkingDefault({
cfg,
provider,
model,
catalog: catalogForThinking,
});
}
const sessionFile = resolveSessionTranscriptPath(sessionId);
const startedAt = Date.now();

View File

@@ -14,8 +14,12 @@ vi.mock("../agents/pi-embedded.js", () => ({
resolveEmbeddedSessionLane: (key: string) =>
`session:${key.trim() || "main"}`,
}));
vi.mock("../agents/model-catalog.js", () => ({
loadModelCatalog: vi.fn(),
}));
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import { loadModelCatalog } from "../agents/model-catalog.js";
import { runCronIsolatedAgentTurn } from "./isolated-agent.js";
async function withTempHome<T>(fn: (home: string) => Promise<T>): Promise<T> {
@@ -87,6 +91,7 @@ function makeJob(payload: CronJob["payload"]): CronJob {
describe("runCronIsolatedAgentTurn", () => {
beforeEach(() => {
vi.mocked(runEmbeddedPiAgent).mockReset();
vi.mocked(loadModelCatalog).mockResolvedValue([]);
});
it("uses last non-empty agent text as summary", async () => {
@@ -121,6 +126,46 @@ describe("runCronIsolatedAgentTurn", () => {
});
});
it("defaults thinking to low for reasoning-capable models", async () => {
await withTempHome(async (home) => {
const storePath = await writeSessionStore(home);
const deps: CliDeps = {
sendMessageWhatsApp: vi.fn(),
sendMessageTelegram: vi.fn(),
sendMessageDiscord: vi.fn(),
sendMessageSignal: vi.fn(),
sendMessageIMessage: vi.fn(),
};
vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
payloads: [{ text: "done" }],
meta: {
durationMs: 5,
agentMeta: { sessionId: "s", provider: "p", model: "m" },
},
});
vi.mocked(loadModelCatalog).mockResolvedValueOnce([
{
id: "claude-opus-4-5",
name: "Opus 4.5",
provider: "anthropic",
reasoning: true,
},
]);
await runCronIsolatedAgentTurn({
cfg: makeCfg(home, storePath),
deps,
job: makeJob({ kind: "agentTurn", message: "do it", deliver: false }),
message: "do it",
sessionKey: "cron:job-1",
lane: "cron",
});
const callArgs = vi.mocked(runEmbeddedPiAgent).mock.calls.at(-1)?.[0];
expect(callArgs?.thinkLevel).toBe("low");
});
});
it("truncates long summaries", async () => {
await withTempHome(async (home) => {
const storePath = await writeSessionStore(home);

View File

@@ -5,7 +5,11 @@ import {
DEFAULT_MODEL,
DEFAULT_PROVIDER,
} from "../agents/defaults.js";
import { resolveConfiguredModelRef } from "../agents/model-selection.js";
import { loadModelCatalog } from "../agents/model-catalog.js";
import {
resolveConfiguredModelRef,
resolveThinkingDefault,
} from "../agents/model-selection.js";
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import { buildWorkspaceSkillSnapshot } from "../agents/skills.js";
import {
@@ -189,7 +193,16 @@ export async function runCronIsolatedAgentTurn(params: {
? params.job.payload.thinking
: undefined) ?? undefined,
);
const thinkLevel = jobThink ?? thinkOverride;
let thinkLevel = jobThink ?? thinkOverride;
if (!thinkLevel) {
const catalog = await loadModelCatalog({ config: params.cfg });
thinkLevel = resolveThinkingDefault({
cfg: params.cfg,
provider,
model,
catalog,
});
}
const timeoutSecondsRaw =
params.job.payload.kind === "agentTurn" && params.job.payload.timeoutSeconds

View File

@@ -380,6 +380,7 @@ export const ModelChoiceSchema = Type.Object(
name: NonEmptyString,
provider: NonEmptyString,
contextWindow: Type.Optional(Type.Integer({ minimum: 1 })),
reasoning: Type.Optional(Type.Boolean()),
},
{ additionalProperties: false },
);

View File

@@ -82,6 +82,7 @@ const piSdkMock = vi.hoisted(() => ({
name?: string;
provider: string;
contextWindow?: number;
reasoning?: boolean;
}>,
}));
const cronIsolatedRun = vi.hoisted(() =>
@@ -2807,6 +2808,57 @@ describe("gateway server", () => {
await server.close();
});
test("chat.history defaults thinking to low for reasoning-capable models", async () => {
piSdkMock.enabled = true;
piSdkMock.models = [
{
id: "claude-opus-4-5",
name: "Opus 4.5",
provider: "anthropic",
reasoning: true,
},
];
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-gw-"));
testSessionStorePath = path.join(dir, "sessions.json");
await fs.writeFile(
testSessionStorePath,
JSON.stringify(
{
main: {
sessionId: "sess-main",
updatedAt: Date.now(),
},
},
null,
2,
),
"utf-8",
);
await fs.writeFile(
path.join(dir, "sess-main.jsonl"),
JSON.stringify({
message: {
role: "user",
content: [{ type: "text", text: "hello" }],
timestamp: Date.now(),
},
}),
"utf-8",
);
const { server, ws } = await startServerWithClient();
await connectOk(ws);
const res = await rpcReq<{ thinkingLevel?: string }>(ws, "chat.history", {
sessionKey: "main",
});
expect(res.ok).toBe(true);
expect(res.payload?.thinkingLevel).toBe("low");
ws.close();
await server.close();
});
test("chat.history caps payload bytes", { timeout: 15_000 }, async () => {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-gw-"));
testSessionStorePath = path.join(dir, "sessions.json");

View File

@@ -26,6 +26,7 @@ import {
modelKey,
resolveConfiguredModelRef,
resolveModelRefFromString,
resolveThinkingDefault,
} from "../agents/model-selection.js";
import { installSkill } from "../agents/skills-install.js";
import { buildWorkspaceSkillStatus } from "../agents/skills-status.js";
@@ -925,6 +926,25 @@ function getSessionDefaults(cfg: ClawdisConfig): GatewaySessionsDefaults {
};
}
function resolveSessionModelRef(
cfg: ClawdisConfig,
entry?: SessionEntry,
): { provider: string; model: string } {
const resolved = resolveConfiguredModelRef({
cfg,
defaultProvider: DEFAULT_PROVIDER,
defaultModel: DEFAULT_MODEL,
});
let provider = resolved.provider;
let model = resolved.model;
const storedModelOverride = entry?.modelOverride?.trim();
if (storedModelOverride) {
provider = entry?.providerOverride?.trim() || provider;
model = storedModelOverride;
}
return { provider, model };
}
function listSessionsFromStore(params: {
cfg: ClawdisConfig;
storePath: string;
@@ -3283,7 +3303,7 @@ export async function startGatewayServer(
sessionKey: string;
limit?: number;
};
const { storePath, entry } = loadSessionEntry(sessionKey);
const { cfg, storePath, entry } = loadSessionEntry(sessionKey);
const sessionId = entry?.sessionId;
const rawMessages =
sessionId && storePath
@@ -3296,10 +3316,22 @@ export async function startGatewayServer(
sliced,
MAX_CHAT_HISTORY_MESSAGES_BYTES,
).items;
const thinkingLevel =
entry?.thinkingLevel ??
loadConfig().agent?.thinkingDefault ??
"off";
let thinkingLevel = entry?.thinkingLevel;
if (!thinkingLevel) {
const configured = cfg.agent?.thinkingDefault;
if (configured) {
thinkingLevel = configured;
} else {
const { provider, model } = resolveSessionModelRef(cfg, entry);
const catalog = await loadGatewayModelCatalog();
thinkingLevel = resolveThinkingDefault({
cfg,
provider,
model,
catalog,
});
}
}
return {
ok: true,
payloadJSON: JSON.stringify({
@@ -4668,7 +4700,7 @@ export async function startGatewayServer(
sessionKey: string;
limit?: number;
};
const { storePath, entry } = loadSessionEntry(sessionKey);
const { cfg, storePath, entry } = loadSessionEntry(sessionKey);
const sessionId = entry?.sessionId;
const rawMessages =
sessionId && storePath
@@ -4687,10 +4719,22 @@ export async function startGatewayServer(
sliced,
MAX_CHAT_HISTORY_MESSAGES_BYTES,
).items;
const thinkingLevel =
entry?.thinkingLevel ??
loadConfig().agent?.thinkingDefault ??
"off";
let thinkingLevel = entry?.thinkingLevel;
if (!thinkingLevel) {
const configured = cfg.agent?.thinkingDefault;
if (configured) {
thinkingLevel = configured;
} else {
const { provider, model } = resolveSessionModelRef(cfg, entry);
const catalog = await loadGatewayModelCatalog();
thinkingLevel = resolveThinkingDefault({
cfg,
provider,
model,
catalog,
});
}
}
respond(true, {
sessionKey,
sessionId,

View File

@@ -52,6 +52,7 @@ export type GatewayModelChoice = {
name: string;
provider: string;
contextWindow?: number;
reasoning?: boolean;
};
export class GatewayChatClient {