fix: default low thinking for reasoning models

This commit is contained in:
Peter Steinberger
2026-01-03 12:18:50 +00:00
parent 6e16c0699a
commit b6301c719b
14 changed files with 308 additions and 20 deletions

View File

@@ -14,8 +14,12 @@ vi.mock("../agents/pi-embedded.js", () => ({
resolveEmbeddedSessionLane: (key: string) =>
`session:${key.trim() || "main"}`,
}));
vi.mock("../agents/model-catalog.js", () => ({
loadModelCatalog: vi.fn(),
}));
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import { loadModelCatalog } from "../agents/model-catalog.js";
import { runCronIsolatedAgentTurn } from "./isolated-agent.js";
async function withTempHome<T>(fn: (home: string) => Promise<T>): Promise<T> {
@@ -87,6 +91,7 @@ function makeJob(payload: CronJob["payload"]): CronJob {
describe("runCronIsolatedAgentTurn", () => {
beforeEach(() => {
vi.mocked(runEmbeddedPiAgent).mockReset();
vi.mocked(loadModelCatalog).mockResolvedValue([]);
});
it("uses last non-empty agent text as summary", async () => {
@@ -121,6 +126,46 @@ describe("runCronIsolatedAgentTurn", () => {
});
});
it("defaults thinking to low for reasoning-capable models", async () => {
await withTempHome(async (home) => {
const storePath = await writeSessionStore(home);
const deps: CliDeps = {
sendMessageWhatsApp: vi.fn(),
sendMessageTelegram: vi.fn(),
sendMessageDiscord: vi.fn(),
sendMessageSignal: vi.fn(),
sendMessageIMessage: vi.fn(),
};
vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
payloads: [{ text: "done" }],
meta: {
durationMs: 5,
agentMeta: { sessionId: "s", provider: "p", model: "m" },
},
});
vi.mocked(loadModelCatalog).mockResolvedValueOnce([
{
id: "claude-opus-4-5",
name: "Opus 4.5",
provider: "anthropic",
reasoning: true,
},
]);
await runCronIsolatedAgentTurn({
cfg: makeCfg(home, storePath),
deps,
job: makeJob({ kind: "agentTurn", message: "do it", deliver: false }),
message: "do it",
sessionKey: "cron:job-1",
lane: "cron",
});
const callArgs = vi.mocked(runEmbeddedPiAgent).mock.calls.at(-1)?.[0];
expect(callArgs?.thinkLevel).toBe("low");
});
});
it("truncates long summaries", async () => {
await withTempHome(async (home) => {
const storePath = await writeSessionStore(home);

View File

@@ -5,7 +5,11 @@ import {
DEFAULT_MODEL,
DEFAULT_PROVIDER,
} from "../agents/defaults.js";
import { resolveConfiguredModelRef } from "../agents/model-selection.js";
import { loadModelCatalog } from "../agents/model-catalog.js";
import {
resolveConfiguredModelRef,
resolveThinkingDefault,
} from "../agents/model-selection.js";
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import { buildWorkspaceSkillSnapshot } from "../agents/skills.js";
import {
@@ -189,7 +193,16 @@ export async function runCronIsolatedAgentTurn(params: {
? params.job.payload.thinking
: undefined) ?? undefined,
);
const thinkLevel = jobThink ?? thinkOverride;
let thinkLevel = jobThink ?? thinkOverride;
if (!thinkLevel) {
const catalog = await loadModelCatalog({ config: params.cfg });
thinkLevel = resolveThinkingDefault({
cfg: params.cfg,
provider,
model,
catalog,
});
}
const timeoutSecondsRaw =
params.job.payload.kind === "agentTurn" && params.job.payload.timeoutSeconds