diff --git a/CHANGELOG.md b/CHANGELOG.md index 07a77e411..913ded0c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ - Telegram: chunk block-stream replies to avoid “message is too long” errors (#124) — thanks @mukhtharcm. - Agent tools: scope the Discord tool to Discord surface runs. - Agent tools: format verbose tool summaries without brackets, with unique emojis and `tool: detail` style. +- Thinking: default to low for reasoning-capable models when no /think or config default is set. ### Docs - Skills: add Sheets/Docs examples to gog skill (#128) — thanks @mbelinky. diff --git a/docs/thinking.md b/docs/thinking.md index 0946b89f6..0a2a196fe 100644 --- a/docs/thinking.md +++ b/docs/thinking.md @@ -18,7 +18,7 @@ read_when: 1. Inline directive on the message (applies only to that message). 2. Session override (set by sending a directive-only message). 3. Global default (`agent.thinkingDefault` in config). -4. Fallback: off. +4. Fallback: low for reasoning-capable models; off otherwise. ## Setting a session default - Send a message that is **only** the directive (whitespace allowed), e.g. `/think:medium` or `/t high`. diff --git a/src/agents/model-catalog.ts b/src/agents/model-catalog.ts index f4aeb9dc2..6964fbbb8 100644 --- a/src/agents/model-catalog.ts +++ b/src/agents/model-catalog.ts @@ -7,6 +7,7 @@ export type ModelCatalogEntry = { name: string; provider: string; contextWindow?: number; + reasoning?: boolean; }; type DiscoveredModel = { @@ -14,6 +15,7 @@ type DiscoveredModel = { name?: string; provider: string; contextWindow?: number; + reasoning?: boolean; }; let modelCatalogPromise: Promise | null = null; @@ -56,7 +58,9 @@ export async function loadModelCatalog(params?: { typeof entry?.contextWindow === "number" && entry.contextWindow > 0 ? entry.contextWindow : undefined; - models.push({ id, name, provider, contextWindow }); + const reasoning = + typeof entry?.reasoning === "boolean" ? entry.reasoning : undefined; + models.push({ id, name, provider, contextWindow, reasoning }); } } catch { // Leave models empty on discovery errors. diff --git a/src/agents/model-selection.ts b/src/agents/model-selection.ts index e2568b11b..13b7b3a85 100644 --- a/src/agents/model-selection.ts +++ b/src/agents/model-selection.ts @@ -6,6 +6,8 @@ export type ModelRef = { model: string; }; +export type ThinkLevel = "off" | "minimal" | "low" | "medium" | "high"; + export type ModelAliasIndex = { byAlias: Map; byKey: Map; @@ -152,3 +154,19 @@ export function buildAllowedModelSet(params: { return { allowAny: false, allowedCatalog, allowedKeys }; } + +export function resolveThinkingDefault(params: { + cfg: ClawdisConfig; + provider: string; + model: string; + catalog?: ModelCatalogEntry[]; +}): ThinkLevel { + const configured = params.cfg.agent?.thinkingDefault; + if (configured) return configured; + const candidate = params.catalog?.find( + (entry) => + entry.provider === params.provider && entry.id === params.model, + ); + if (candidate?.reasoning) return "low"; + return "off"; +} diff --git a/src/auto-reply/reply.directive.test.ts b/src/auto-reply/reply.directive.test.ts index 0a3eb9ee2..1a2130503 100644 --- a/src/auto-reply/reply.directive.test.ts +++ b/src/auto-reply/reply.directive.test.ts @@ -700,4 +700,48 @@ describe("directive parsing", () => { expect(call?.model).toBe("gpt-4.1-mini"); }); }); + + it("defaults thinking to low for reasoning-capable models", async () => { + await withTempHome(async (home) => { + const storePath = path.join(home, "sessions.json"); + vi.mocked(runEmbeddedPiAgent).mockResolvedValue({ + payloads: [{ text: "done" }], + meta: { + durationMs: 5, + agentMeta: { sessionId: "s", provider: "p", model: "m" }, + }, + }); + vi.mocked(loadModelCatalog).mockResolvedValueOnce([ + { + id: "claude-opus-4-5", + name: "Opus 4.5", + provider: "anthropic", + reasoning: true, + }, + ]); + + await getReplyFromConfig( + { + Body: "hello", + From: "+1004", + To: "+2000", + }, + {}, + { + agent: { + model: "anthropic/claude-opus-4-5", + workspace: path.join(home, "clawd"), + }, + whatsapp: { + allowFrom: ["*"], + }, + session: { store: storePath }, + }, + ); + + expect(runEmbeddedPiAgent).toHaveBeenCalledOnce(); + const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0]; + expect(call?.thinkLevel).toBe("low"); + }); + }); }); diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index 8b0caa4e1..619590584 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -13,6 +13,7 @@ import { modelKey, resolveConfiguredModelRef, resolveModelRefFromString, + resolveThinkingDefault, } from "../agents/model-selection.js"; import { abortEmbeddedPiRun, @@ -1094,13 +1095,14 @@ export async function getReplyFromConfig( hasModelDirective || hasAllowlist || hasStoredOverride; let allowedModelKeys = new Set(); let allowedModelCatalog: Awaited> = []; + let modelCatalog: Awaited> | null = null; let resetModelOverride = false; if (needsModelCatalog) { - const catalog = await loadModelCatalog({ config: cfg }); + modelCatalog = await loadModelCatalog({ config: cfg }); const allowed = buildAllowedModelSet({ cfg, - catalog, + catalog: modelCatalog, defaultProvider, }); allowedModelCatalog = allowed.allowedCatalog; @@ -1134,6 +1136,22 @@ export async function getReplyFromConfig( model = storedModelOverride; } } + let defaultThinkingLevel: ThinkLevel | undefined; + const resolveDefaultThinkingLevel = async () => { + if (defaultThinkingLevel) return defaultThinkingLevel; + let catalogForThinking = modelCatalog ?? allowedModelCatalog; + if (!catalogForThinking || catalogForThinking.length === 0) { + modelCatalog = await loadModelCatalog({ config: cfg }); + catalogForThinking = modelCatalog; + } + defaultThinkingLevel = resolveThinkingDefault({ + cfg, + provider, + model, + catalog: catalogForThinking, + }); + return defaultThinkingLevel; + }; contextTokens = agentCfg?.contextTokens ?? lookupContextTokens(model) ?? @@ -1589,7 +1607,8 @@ export async function getReplyFromConfig( sessionScope, storePath, groupActivation, - resolvedThink: resolvedThinkLevel, + resolvedThink: + resolvedThinkLevel ?? (await resolveDefaultThinkingLevel()), resolvedVerbose: resolvedVerboseLevel, webLinked, webAuthAgeMs, @@ -1820,6 +1839,9 @@ export async function getReplyFromConfig( commandBody = parts.slice(1).join(" ").trim(); } } + if (!resolvedThinkLevel) { + resolvedThinkLevel = await resolveDefaultThinkingLevel(); + } const sessionIdFinal = sessionId ?? crypto.randomUUID(); const sessionFile = resolveSessionTranscriptPath(sessionIdFinal); diff --git a/src/commands/agent.test.ts b/src/commands/agent.test.ts index d36bf01ca..d91970f19 100644 --- a/src/commands/agent.test.ts +++ b/src/commands/agent.test.ts @@ -17,8 +17,12 @@ vi.mock("../agents/pi-embedded.js", () => ({ resolveEmbeddedSessionLane: (key: string) => `session:${key.trim() || "main"}`, })); +vi.mock("../agents/model-catalog.js", () => ({ + loadModelCatalog: vi.fn(), +})); import { runEmbeddedPiAgent } from "../agents/pi-embedded.js"; +import { loadModelCatalog } from "../agents/model-catalog.js"; import type { ClawdisConfig } from "../config/config.js"; import * as configModule from "../config/config.js"; import type { RuntimeEnv } from "../runtime.js"; @@ -74,6 +78,7 @@ beforeEach(() => { agentMeta: { sessionId: "s", provider: "p", model: "m" }, }, }); + vi.mocked(loadModelCatalog).mockResolvedValue([]); }); describe("agentCommand", () => { @@ -162,6 +167,26 @@ describe("agentCommand", () => { }); }); + it("defaults thinking to low for reasoning-capable models", async () => { + await withTempHome(async (home) => { + const store = path.join(home, "sessions.json"); + mockConfig(home, store); + vi.mocked(loadModelCatalog).mockResolvedValueOnce([ + { + id: "claude-opus-4-5", + name: "Opus 4.5", + provider: "anthropic", + reasoning: true, + }, + ]); + + await agentCommand({ message: "hi", to: "+1555" }, runtime); + + const callArgs = vi.mocked(runEmbeddedPiAgent).mock.calls.at(-1)?.[0]; + expect(callArgs?.thinkLevel).toBe("low"); + }); + }); + it("prints JSON payload when requested", async () => { await withTempHome(async (home) => { vi.mocked(runEmbeddedPiAgent).mockResolvedValue({ diff --git a/src/commands/agent.ts b/src/commands/agent.ts index fcb693992..e05e45a64 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -10,6 +10,7 @@ import { buildAllowedModelSet, modelKey, resolveConfiguredModelRef, + resolveThinkingDefault, } from "../agents/model-selection.js"; import { runEmbeddedPiAgent } from "../agents/pi-embedded.js"; import { buildWorkspaceSkillSnapshot } from "../agents/skills.js"; @@ -211,7 +212,7 @@ export async function agentCommand( registerAgentRunContext(sessionId, { sessionKey }); } - const resolvedThinkLevel = + let resolvedThinkLevel = thinkOnce ?? thinkOverride ?? persistedThinking ?? @@ -275,15 +276,18 @@ export async function agentCommand( ); const needsModelCatalog = hasAllowlist || hasStoredOverride; let allowedModelKeys = new Set(); + let allowedModelCatalog: Awaited> = []; + let modelCatalog: Awaited> | null = null; if (needsModelCatalog) { - const catalog = await loadModelCatalog({ config: cfg }); + modelCatalog = await loadModelCatalog({ config: cfg }); const allowed = buildAllowedModelSet({ cfg, - catalog, + catalog: modelCatalog, defaultProvider, }); allowedModelKeys = allowed.allowedKeys; + allowedModelCatalog = allowed.allowedCatalog; } if (sessionEntry && sessionStore && sessionKey && hasStoredOverride) { @@ -312,6 +316,20 @@ export async function agentCommand( model = storedModelOverride; } } + + if (!resolvedThinkLevel) { + let catalogForThinking = modelCatalog ?? allowedModelCatalog; + if (!catalogForThinking || catalogForThinking.length === 0) { + modelCatalog = await loadModelCatalog({ config: cfg }); + catalogForThinking = modelCatalog; + } + resolvedThinkLevel = resolveThinkingDefault({ + cfg, + provider, + model, + catalog: catalogForThinking, + }); + } const sessionFile = resolveSessionTranscriptPath(sessionId); const startedAt = Date.now(); diff --git a/src/cron/isolated-agent.test.ts b/src/cron/isolated-agent.test.ts index 10bb46c74..685776ba3 100644 --- a/src/cron/isolated-agent.test.ts +++ b/src/cron/isolated-agent.test.ts @@ -14,8 +14,12 @@ vi.mock("../agents/pi-embedded.js", () => ({ resolveEmbeddedSessionLane: (key: string) => `session:${key.trim() || "main"}`, })); +vi.mock("../agents/model-catalog.js", () => ({ + loadModelCatalog: vi.fn(), +})); import { runEmbeddedPiAgent } from "../agents/pi-embedded.js"; +import { loadModelCatalog } from "../agents/model-catalog.js"; import { runCronIsolatedAgentTurn } from "./isolated-agent.js"; async function withTempHome(fn: (home: string) => Promise): Promise { @@ -87,6 +91,7 @@ function makeJob(payload: CronJob["payload"]): CronJob { describe("runCronIsolatedAgentTurn", () => { beforeEach(() => { vi.mocked(runEmbeddedPiAgent).mockReset(); + vi.mocked(loadModelCatalog).mockResolvedValue([]); }); it("uses last non-empty agent text as summary", async () => { @@ -121,6 +126,46 @@ describe("runCronIsolatedAgentTurn", () => { }); }); + it("defaults thinking to low for reasoning-capable models", async () => { + await withTempHome(async (home) => { + const storePath = await writeSessionStore(home); + const deps: CliDeps = { + sendMessageWhatsApp: vi.fn(), + sendMessageTelegram: vi.fn(), + sendMessageDiscord: vi.fn(), + sendMessageSignal: vi.fn(), + sendMessageIMessage: vi.fn(), + }; + vi.mocked(runEmbeddedPiAgent).mockResolvedValue({ + payloads: [{ text: "done" }], + meta: { + durationMs: 5, + agentMeta: { sessionId: "s", provider: "p", model: "m" }, + }, + }); + vi.mocked(loadModelCatalog).mockResolvedValueOnce([ + { + id: "claude-opus-4-5", + name: "Opus 4.5", + provider: "anthropic", + reasoning: true, + }, + ]); + + await runCronIsolatedAgentTurn({ + cfg: makeCfg(home, storePath), + deps, + job: makeJob({ kind: "agentTurn", message: "do it", deliver: false }), + message: "do it", + sessionKey: "cron:job-1", + lane: "cron", + }); + + const callArgs = vi.mocked(runEmbeddedPiAgent).mock.calls.at(-1)?.[0]; + expect(callArgs?.thinkLevel).toBe("low"); + }); + }); + it("truncates long summaries", async () => { await withTempHome(async (home) => { const storePath = await writeSessionStore(home); diff --git a/src/cron/isolated-agent.ts b/src/cron/isolated-agent.ts index 2eb59c440..7cdd331f3 100644 --- a/src/cron/isolated-agent.ts +++ b/src/cron/isolated-agent.ts @@ -5,7 +5,11 @@ import { DEFAULT_MODEL, DEFAULT_PROVIDER, } from "../agents/defaults.js"; -import { resolveConfiguredModelRef } from "../agents/model-selection.js"; +import { loadModelCatalog } from "../agents/model-catalog.js"; +import { + resolveConfiguredModelRef, + resolveThinkingDefault, +} from "../agents/model-selection.js"; import { runEmbeddedPiAgent } from "../agents/pi-embedded.js"; import { buildWorkspaceSkillSnapshot } from "../agents/skills.js"; import { @@ -189,7 +193,16 @@ export async function runCronIsolatedAgentTurn(params: { ? params.job.payload.thinking : undefined) ?? undefined, ); - const thinkLevel = jobThink ?? thinkOverride; + let thinkLevel = jobThink ?? thinkOverride; + if (!thinkLevel) { + const catalog = await loadModelCatalog({ config: params.cfg }); + thinkLevel = resolveThinkingDefault({ + cfg: params.cfg, + provider, + model, + catalog, + }); + } const timeoutSecondsRaw = params.job.payload.kind === "agentTurn" && params.job.payload.timeoutSeconds diff --git a/src/gateway/protocol/schema.ts b/src/gateway/protocol/schema.ts index 104d1cecc..5f92796f1 100644 --- a/src/gateway/protocol/schema.ts +++ b/src/gateway/protocol/schema.ts @@ -380,6 +380,7 @@ export const ModelChoiceSchema = Type.Object( name: NonEmptyString, provider: NonEmptyString, contextWindow: Type.Optional(Type.Integer({ minimum: 1 })), + reasoning: Type.Optional(Type.Boolean()), }, { additionalProperties: false }, ); diff --git a/src/gateway/server.test.ts b/src/gateway/server.test.ts index 8bfe26d78..3f2d35bd4 100644 --- a/src/gateway/server.test.ts +++ b/src/gateway/server.test.ts @@ -82,6 +82,7 @@ const piSdkMock = vi.hoisted(() => ({ name?: string; provider: string; contextWindow?: number; + reasoning?: boolean; }>, })); const cronIsolatedRun = vi.hoisted(() => @@ -2807,6 +2808,57 @@ describe("gateway server", () => { await server.close(); }); + test("chat.history defaults thinking to low for reasoning-capable models", async () => { + piSdkMock.enabled = true; + piSdkMock.models = [ + { + id: "claude-opus-4-5", + name: "Opus 4.5", + provider: "anthropic", + reasoning: true, + }, + ]; + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-gw-")); + testSessionStorePath = path.join(dir, "sessions.json"); + await fs.writeFile( + testSessionStorePath, + JSON.stringify( + { + main: { + sessionId: "sess-main", + updatedAt: Date.now(), + }, + }, + null, + 2, + ), + "utf-8", + ); + await fs.writeFile( + path.join(dir, "sess-main.jsonl"), + JSON.stringify({ + message: { + role: "user", + content: [{ type: "text", text: "hello" }], + timestamp: Date.now(), + }, + }), + "utf-8", + ); + + const { server, ws } = await startServerWithClient(); + await connectOk(ws); + + const res = await rpcReq<{ thinkingLevel?: string }>(ws, "chat.history", { + sessionKey: "main", + }); + expect(res.ok).toBe(true); + expect(res.payload?.thinkingLevel).toBe("low"); + + ws.close(); + await server.close(); + }); + test("chat.history caps payload bytes", { timeout: 15_000 }, async () => { const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdis-gw-")); testSessionStorePath = path.join(dir, "sessions.json"); diff --git a/src/gateway/server.ts b/src/gateway/server.ts index c920d2c03..119b0e9b1 100644 --- a/src/gateway/server.ts +++ b/src/gateway/server.ts @@ -26,6 +26,7 @@ import { modelKey, resolveConfiguredModelRef, resolveModelRefFromString, + resolveThinkingDefault, } from "../agents/model-selection.js"; import { installSkill } from "../agents/skills-install.js"; import { buildWorkspaceSkillStatus } from "../agents/skills-status.js"; @@ -925,6 +926,25 @@ function getSessionDefaults(cfg: ClawdisConfig): GatewaySessionsDefaults { }; } +function resolveSessionModelRef( + cfg: ClawdisConfig, + entry?: SessionEntry, +): { provider: string; model: string } { + const resolved = resolveConfiguredModelRef({ + cfg, + defaultProvider: DEFAULT_PROVIDER, + defaultModel: DEFAULT_MODEL, + }); + let provider = resolved.provider; + let model = resolved.model; + const storedModelOverride = entry?.modelOverride?.trim(); + if (storedModelOverride) { + provider = entry?.providerOverride?.trim() || provider; + model = storedModelOverride; + } + return { provider, model }; +} + function listSessionsFromStore(params: { cfg: ClawdisConfig; storePath: string; @@ -3283,7 +3303,7 @@ export async function startGatewayServer( sessionKey: string; limit?: number; }; - const { storePath, entry } = loadSessionEntry(sessionKey); + const { cfg, storePath, entry } = loadSessionEntry(sessionKey); const sessionId = entry?.sessionId; const rawMessages = sessionId && storePath @@ -3296,10 +3316,22 @@ export async function startGatewayServer( sliced, MAX_CHAT_HISTORY_MESSAGES_BYTES, ).items; - const thinkingLevel = - entry?.thinkingLevel ?? - loadConfig().agent?.thinkingDefault ?? - "off"; + let thinkingLevel = entry?.thinkingLevel; + if (!thinkingLevel) { + const configured = cfg.agent?.thinkingDefault; + if (configured) { + thinkingLevel = configured; + } else { + const { provider, model } = resolveSessionModelRef(cfg, entry); + const catalog = await loadGatewayModelCatalog(); + thinkingLevel = resolveThinkingDefault({ + cfg, + provider, + model, + catalog, + }); + } + } return { ok: true, payloadJSON: JSON.stringify({ @@ -4668,7 +4700,7 @@ export async function startGatewayServer( sessionKey: string; limit?: number; }; - const { storePath, entry } = loadSessionEntry(sessionKey); + const { cfg, storePath, entry } = loadSessionEntry(sessionKey); const sessionId = entry?.sessionId; const rawMessages = sessionId && storePath @@ -4687,10 +4719,22 @@ export async function startGatewayServer( sliced, MAX_CHAT_HISTORY_MESSAGES_BYTES, ).items; - const thinkingLevel = - entry?.thinkingLevel ?? - loadConfig().agent?.thinkingDefault ?? - "off"; + let thinkingLevel = entry?.thinkingLevel; + if (!thinkingLevel) { + const configured = cfg.agent?.thinkingDefault; + if (configured) { + thinkingLevel = configured; + } else { + const { provider, model } = resolveSessionModelRef(cfg, entry); + const catalog = await loadGatewayModelCatalog(); + thinkingLevel = resolveThinkingDefault({ + cfg, + provider, + model, + catalog, + }); + } + } respond(true, { sessionKey, sessionId, diff --git a/src/tui/gateway-chat.ts b/src/tui/gateway-chat.ts index 4d81f5d69..1483d2320 100644 --- a/src/tui/gateway-chat.ts +++ b/src/tui/gateway-chat.ts @@ -52,6 +52,7 @@ export type GatewayModelChoice = { name: string; provider: string; contextWindow?: number; + reasoning?: boolean; }; export class GatewayChatClient {