fix: enforce reasoning tags on fallback providers (#810) (thanks @mcinteerj)
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
- Tools: allow Claude/Gemini tool param aliases (`file_path`, `old_string`, `new_string`) while enforcing required params at runtime. (#793 — thanks @hsrvc)
|
||||
- Gemini: downgrade tool-call history missing `thought_signature` to avoid INVALID_ARGUMENT errors. (#793 — thanks @hsrvc)
|
||||
- Messaging: enforce context isolation for message tool sends across providers (normalized targets + tests). (#793 — thanks @hsrvc)
|
||||
- Auto-reply: re-evaluate reasoning tag enforcement on fallback providers to prevent leaked reasoning. (#810 — thanks @mcinteerj)
|
||||
|
||||
## 2026.1.12-3
|
||||
|
||||
|
||||
178
src/auto-reply/reply/agent-runner.reasoning-tags.test.ts
Normal file
178
src/auto-reply/reply/agent-runner.reasoning-tags.test.ts
Normal file
@@ -0,0 +1,178 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
import type { SessionEntry } from "../../config/sessions.js";
|
||||
import type { TemplateContext } from "../templating.js";
|
||||
import { DEFAULT_MEMORY_FLUSH_PROMPT } from "./memory-flush.js";
|
||||
import type { FollowupRun, QueueSettings } from "./queue.js";
|
||||
import { createMockTypingController } from "./test-helpers.js";
|
||||
|
||||
const runEmbeddedPiAgentMock = vi.fn();
|
||||
const runWithModelFallbackMock = vi.fn();
|
||||
|
||||
vi.mock("../../agents/model-fallback.js", () => ({
|
||||
runWithModelFallback: (params: {
|
||||
provider: string;
|
||||
model: string;
|
||||
run: (provider: string, model: string) => Promise<unknown>;
|
||||
}) => runWithModelFallbackMock(params),
|
||||
}));
|
||||
|
||||
vi.mock("../../agents/pi-embedded.js", () => ({
|
||||
queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
|
||||
runEmbeddedPiAgent: (params: unknown) => runEmbeddedPiAgentMock(params),
|
||||
}));
|
||||
|
||||
vi.mock("./queue.js", async () => {
|
||||
const actual =
|
||||
await vi.importActual<typeof import("./queue.js")>("./queue.js");
|
||||
return {
|
||||
...actual,
|
||||
enqueueFollowupRun: vi.fn(),
|
||||
scheduleFollowupDrain: vi.fn(),
|
||||
};
|
||||
});
|
||||
|
||||
import { runReplyAgent } from "./agent-runner.js";
|
||||
|
||||
type EmbeddedPiAgentParams = {
|
||||
enforceFinalTag?: boolean;
|
||||
prompt?: string;
|
||||
};
|
||||
|
||||
function createRun(params?: {
|
||||
sessionEntry?: SessionEntry;
|
||||
sessionKey?: string;
|
||||
agentCfgContextTokens?: number;
|
||||
}) {
|
||||
const typing = createMockTypingController();
|
||||
const sessionCtx = {
|
||||
Provider: "whatsapp",
|
||||
OriginatingTo: "+15550001111",
|
||||
AccountId: "primary",
|
||||
MessageSid: "msg",
|
||||
} as unknown as TemplateContext;
|
||||
const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings;
|
||||
const sessionKey = params?.sessionKey ?? "main";
|
||||
const followupRun = {
|
||||
prompt: "hello",
|
||||
summaryLine: "hello",
|
||||
enqueuedAt: Date.now(),
|
||||
run: {
|
||||
agentId: "main",
|
||||
agentDir: "/tmp/agent",
|
||||
sessionId: "session",
|
||||
sessionKey,
|
||||
messageProvider: "whatsapp",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: "/tmp",
|
||||
config: {},
|
||||
skillsSnapshot: {},
|
||||
provider: "anthropic",
|
||||
model: "claude",
|
||||
thinkLevel: "low",
|
||||
verboseLevel: "off",
|
||||
elevatedLevel: "off",
|
||||
bashElevated: {
|
||||
enabled: false,
|
||||
allowed: false,
|
||||
defaultLevel: "off",
|
||||
},
|
||||
timeoutMs: 1_000,
|
||||
blockReplyBreak: "message_end",
|
||||
},
|
||||
} as unknown as FollowupRun;
|
||||
|
||||
return runReplyAgent({
|
||||
commandBody: "hello",
|
||||
followupRun,
|
||||
queueKey: "main",
|
||||
resolvedQueue,
|
||||
shouldSteer: false,
|
||||
shouldFollowup: false,
|
||||
isActive: false,
|
||||
isStreaming: false,
|
||||
typing,
|
||||
sessionCtx,
|
||||
sessionEntry: params?.sessionEntry,
|
||||
sessionKey,
|
||||
defaultModel: "anthropic/claude-opus-4-5",
|
||||
agentCfgContextTokens: params?.agentCfgContextTokens,
|
||||
resolvedVerboseLevel: "off",
|
||||
isNewSession: false,
|
||||
blockStreamingEnabled: false,
|
||||
resolvedBlockStreamingBreak: "message_end",
|
||||
shouldInjectGroupIntro: false,
|
||||
typingMode: "instant",
|
||||
});
|
||||
}
|
||||
|
||||
describe("runReplyAgent fallback reasoning tags", () => {
|
||||
beforeEach(() => {
|
||||
runEmbeddedPiAgentMock.mockReset();
|
||||
runWithModelFallbackMock.mockReset();
|
||||
});
|
||||
|
||||
it("enforces <final> when the fallback provider requires reasoning tags", async () => {
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "ok" }],
|
||||
meta: {},
|
||||
});
|
||||
runWithModelFallbackMock.mockImplementationOnce(
|
||||
async ({
|
||||
run,
|
||||
}: {
|
||||
run: (provider: string, model: string) => Promise<unknown>;
|
||||
}) => ({
|
||||
result: await run("google-antigravity", "gemini-3"),
|
||||
provider: "google-antigravity",
|
||||
model: "gemini-3",
|
||||
}),
|
||||
);
|
||||
|
||||
await createRun();
|
||||
|
||||
const call = runEmbeddedPiAgentMock.mock.calls[0]?.[0] as
|
||||
| EmbeddedPiAgentParams
|
||||
| undefined;
|
||||
expect(call?.enforceFinalTag).toBe(true);
|
||||
});
|
||||
|
||||
it("enforces <final> during memory flush on fallback providers", async () => {
|
||||
runEmbeddedPiAgentMock.mockImplementation(
|
||||
async (params: EmbeddedPiAgentParams) => {
|
||||
if (params.prompt === DEFAULT_MEMORY_FLUSH_PROMPT) {
|
||||
return { payloads: [], meta: {} };
|
||||
}
|
||||
return { payloads: [{ text: "ok" }], meta: {} };
|
||||
},
|
||||
);
|
||||
runWithModelFallbackMock.mockImplementation(
|
||||
async ({
|
||||
run,
|
||||
}: {
|
||||
run: (provider: string, model: string) => Promise<unknown>;
|
||||
}) => ({
|
||||
result: await run("google-antigravity", "gemini-3"),
|
||||
provider: "google-antigravity",
|
||||
model: "gemini-3",
|
||||
}),
|
||||
);
|
||||
|
||||
await createRun({
|
||||
sessionEntry: {
|
||||
sessionId: "session",
|
||||
updatedAt: Date.now(),
|
||||
totalTokens: 1_000_000,
|
||||
compactionCount: 0,
|
||||
},
|
||||
});
|
||||
|
||||
const flushCall = runEmbeddedPiAgentMock.mock.calls.find(
|
||||
([params]) =>
|
||||
(params as EmbeddedPiAgentParams | undefined)?.prompt ===
|
||||
DEFAULT_MEMORY_FLUSH_PROMPT,
|
||||
)?.[0] as EmbeddedPiAgentParams | undefined;
|
||||
|
||||
expect(flushCall?.enforceFinalTag).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -40,13 +40,13 @@ import { getProviderDock } from "../../providers/dock.js";
|
||||
import type { ProviderThreadingToolContext } from "../../providers/plugins/types.js";
|
||||
import { normalizeProviderId } from "../../providers/registry.js";
|
||||
import { defaultRuntime } from "../../runtime.js";
|
||||
import { isReasoningTagProvider } from "../../utils/provider-utils.js";
|
||||
import {
|
||||
estimateUsageCost,
|
||||
formatTokenCount,
|
||||
formatUsd,
|
||||
resolveModelCostConfig,
|
||||
} from "../../utils/usage-format.js";
|
||||
import { isReasoningTagProvider } from "../../utils/provider-utils.js";
|
||||
import { stripHeartbeatToken } from "../heartbeat.js";
|
||||
import type { OriginatingChannelType, TemplateContext } from "../templating.js";
|
||||
import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
|
||||
|
||||
Reference in New Issue
Block a user