Files
clawdbot/src/auto-reply/reply.directive.directive-behavior.accepts-thinking-xhigh-codex-models.e2e.test.ts
2026-01-23 18:34:33 +00:00

331 lines
10 KiB
TypeScript

import fs from "node:fs/promises";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { withTempHome as withTempHomeBase } from "../../test/helpers/temp-home.js";
import { loadModelCatalog } from "../agents/model-catalog.js";
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import { loadSessionStore } from "../config/sessions.js";
import { getReplyFromConfig } from "./reply.js";
const MAIN_SESSION_KEY = "agent:main:main";
async function writeSkill(params: { workspaceDir: string; name: string; description: string }) {
const { workspaceDir, name, description } = params;
const skillDir = path.join(workspaceDir, "skills", name);
await fs.mkdir(skillDir, { recursive: true });
await fs.writeFile(
path.join(skillDir, "SKILL.md"),
`---\nname: ${name}\ndescription: ${description}\n---\n\n# ${name}\n`,
"utf-8",
);
}
vi.mock("../agents/pi-embedded.js", () => ({
abortEmbeddedPiRun: vi.fn().mockReturnValue(false),
runEmbeddedPiAgent: vi.fn(),
queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
resolveEmbeddedSessionLane: (key: string) => `session:${key.trim() || "main"}`,
isEmbeddedPiRunActive: vi.fn().mockReturnValue(false),
isEmbeddedPiRunStreaming: vi.fn().mockReturnValue(false),
}));
vi.mock("../agents/model-catalog.js", () => ({
loadModelCatalog: vi.fn(),
}));
async function withTempHome<T>(fn: (home: string) => Promise<T>): Promise<T> {
return withTempHomeBase(
async (home) => {
return await fn(home);
},
{
env: {
CLAWDBOT_AGENT_DIR: (home) => path.join(home, ".clawdbot", "agent"),
PI_CODING_AGENT_DIR: (home) => path.join(home, ".clawdbot", "agent"),
},
prefix: "clawdbot-reply-",
},
);
}
function _assertModelSelection(
storePath: string,
selection: { model?: string; provider?: string } = {},
) {
const store = loadSessionStore(storePath);
const entry = store[MAIN_SESSION_KEY];
expect(entry).toBeDefined();
expect(entry?.modelOverride).toBe(selection.model);
expect(entry?.providerOverride).toBe(selection.provider);
}
describe("directive behavior", () => {
beforeEach(() => {
vi.mocked(runEmbeddedPiAgent).mockReset();
vi.mocked(loadModelCatalog).mockResolvedValue([
{ id: "claude-opus-4-5", name: "Opus 4.5", provider: "anthropic" },
{ id: "claude-sonnet-4-1", name: "Sonnet 4.1", provider: "anthropic" },
{ id: "gpt-4.1-mini", name: "GPT-4.1 Mini", provider: "openai" },
]);
});
afterEach(() => {
vi.restoreAllMocks();
});
it("accepts /thinking xhigh for codex models", async () => {
await withTempHome(async (home) => {
const storePath = path.join(home, "sessions.json");
const res = await getReplyFromConfig(
{
Body: "/thinking xhigh",
From: "+1004",
To: "+2000",
CommandAuthorized: true,
},
{},
{
agents: {
defaults: {
model: "openai-codex/gpt-5.2-codex",
workspace: path.join(home, "clawd"),
},
},
channels: { whatsapp: { allowFrom: ["*"] } },
session: { store: storePath },
},
);
const texts = (Array.isArray(res) ? res : [res]).map((entry) => entry?.text).filter(Boolean);
expect(texts).toContain("Thinking level set to xhigh.");
});
});
it("accepts /thinking xhigh for openai gpt-5.2", async () => {
await withTempHome(async (home) => {
const storePath = path.join(home, "sessions.json");
const res = await getReplyFromConfig(
{
Body: "/thinking xhigh",
From: "+1004",
To: "+2000",
CommandAuthorized: true,
},
{},
{
agents: {
defaults: {
model: "openai/gpt-5.2",
workspace: path.join(home, "clawd"),
},
},
channels: { whatsapp: { allowFrom: ["*"] } },
session: { store: storePath },
},
);
const texts = (Array.isArray(res) ? res : [res]).map((entry) => entry?.text).filter(Boolean);
expect(texts).toContain("Thinking level set to xhigh.");
});
});
it("rejects /thinking xhigh for non-codex models", async () => {
await withTempHome(async (home) => {
const storePath = path.join(home, "sessions.json");
const res = await getReplyFromConfig(
{
Body: "/thinking xhigh",
From: "+1004",
To: "+2000",
CommandAuthorized: true,
},
{},
{
agents: {
defaults: {
model: "openai/gpt-4.1-mini",
workspace: path.join(home, "clawd"),
},
},
channels: { whatsapp: { allowFrom: ["*"] } },
session: { store: storePath },
},
);
const texts = (Array.isArray(res) ? res : [res]).map((entry) => entry?.text).filter(Boolean);
expect(texts).toContain(
'Thinking level "xhigh" is only supported for openai/gpt-5.2, openai-codex/gpt-5.2-codex or openai-codex/gpt-5.1-codex.',
);
});
});
it("keeps reserved command aliases from matching after trimming", async () => {
await withTempHome(async (home) => {
vi.mocked(runEmbeddedPiAgent).mockReset();
const res = await getReplyFromConfig(
{
Body: "/help",
From: "+1222",
To: "+1222",
CommandAuthorized: true,
},
{},
{
agents: {
defaults: {
model: "anthropic/claude-opus-4-5",
workspace: path.join(home, "clawd"),
models: {
"anthropic/claude-opus-4-5": { alias: " help " },
},
},
},
channels: { whatsapp: { allowFrom: ["*"] } },
session: { store: path.join(home, "sessions.json") },
},
);
const text = Array.isArray(res) ? res[0]?.text : res?.text;
expect(text).toContain("Help");
expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
});
});
it("treats skill commands as reserved for model aliases", async () => {
await withTempHome(async (home) => {
vi.mocked(runEmbeddedPiAgent).mockReset();
const workspace = path.join(home, "clawd");
await writeSkill({
workspaceDir: workspace,
name: "demo-skill",
description: "Demo skill",
});
await getReplyFromConfig(
{
Body: "/demo_skill",
From: "+1222",
To: "+1222",
CommandAuthorized: true,
},
{},
{
agents: {
defaults: {
model: "anthropic/claude-opus-4-5",
workspace,
models: {
"anthropic/claude-opus-4-5": { alias: "demo_skill" },
},
},
},
channels: { whatsapp: { allowFrom: ["*"] } },
session: { store: path.join(home, "sessions.json") },
},
);
expect(runEmbeddedPiAgent).toHaveBeenCalled();
const prompt = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0]?.prompt ?? "";
expect(prompt).toContain('Use the "demo-skill" skill');
});
});
it("errors on invalid queue options", async () => {
await withTempHome(async (home) => {
vi.mocked(runEmbeddedPiAgent).mockReset();
const res = await getReplyFromConfig(
{
Body: "/queue collect debounce:bogus cap:zero drop:maybe",
From: "+1222",
To: "+1222",
CommandAuthorized: true,
},
{},
{
agents: {
defaults: {
model: "anthropic/claude-opus-4-5",
workspace: path.join(home, "clawd"),
},
},
channels: { whatsapp: { allowFrom: ["*"] } },
session: { store: path.join(home, "sessions.json") },
},
);
const text = Array.isArray(res) ? res[0]?.text : res?.text;
expect(text).toContain("Invalid debounce");
expect(text).toContain("Invalid cap");
expect(text).toContain("Invalid drop policy");
expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
});
});
it("shows current queue settings when /queue has no arguments", async () => {
await withTempHome(async (home) => {
vi.mocked(runEmbeddedPiAgent).mockReset();
const res = await getReplyFromConfig(
{
Body: "/queue",
From: "+1222",
To: "+1222",
Provider: "whatsapp",
CommandAuthorized: true,
},
{},
{
agents: {
defaults: {
model: "anthropic/claude-opus-4-5",
workspace: path.join(home, "clawd"),
},
},
messages: {
queue: {
mode: "collect",
debounceMs: 1500,
cap: 9,
drop: "summarize",
},
},
channels: { whatsapp: { allowFrom: ["*"] } },
session: { store: path.join(home, "sessions.json") },
},
);
const text = Array.isArray(res) ? res[0]?.text : res?.text;
expect(text).toContain(
"Current queue settings: mode=collect, debounce=1500ms, cap=9, drop=summarize.",
);
expect(text).toContain(
"Options: modes steer, followup, collect, steer+backlog, interrupt; debounce:<ms|s|m>, cap:<n>, drop:old|new|summarize.",
);
expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
});
});
it("shows current think level when /think has no argument", async () => {
await withTempHome(async (home) => {
vi.mocked(runEmbeddedPiAgent).mockReset();
const res = await getReplyFromConfig(
{ Body: "/think", From: "+1222", To: "+1222", CommandAuthorized: true },
{},
{
agents: {
defaults: {
model: "anthropic/claude-opus-4-5",
workspace: path.join(home, "clawd"),
thinkingDefault: "high",
},
},
session: { store: path.join(home, "sessions.json") },
},
);
const text = Array.isArray(res) ? res[0]?.text : res?.text;
expect(text).toContain("Current thinking level: high");
expect(text).toContain("Options: off, minimal, low, medium, high.");
expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
});
});
});