fix: OpenAI tool schema compatibility
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
- macOS: treat location permission as always-only to avoid iOS-only enums. (#165) — thanks @Nachx639
|
||||
- macOS: make generated gateway protocol models `Sendable` for Swift 6 strict concurrency. (#195) — thanks @andranik-sahakyan
|
||||
- WhatsApp: suppress typing indicator during heartbeat background tasks. (#190) — thanks @mcinteerj
|
||||
- Agent tools: OpenAI-compatible tool JSON Schemas (fix `browser`, normalize union schemas).
|
||||
- Onboarding: when running from source, auto-build missing Control UI assets (`pnpm ui:build`).
|
||||
- Discord/Slack: route reaction + system notifications to the correct session (no main-session bleed).
|
||||
- Agent tools: honor `agent.tools` allow/deny policy even when sandbox is off.
|
||||
|
||||
@@ -364,6 +364,8 @@ export async function runEmbeddedPiAgent(params: {
|
||||
await loadWorkspaceBootstrapFiles(resolvedWorkspace);
|
||||
const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
|
||||
const promptSkills = resolvePromptSkills(skillsSnapshot, skillEntries);
|
||||
// Tool schemas must be provider-compatible (OpenAI requires top-level `type: "object"`).
|
||||
// `createClawdbotCodingTools()` normalizes schemas so the session can pass them through unchanged.
|
||||
const tools = createClawdbotCodingTools({
|
||||
bash: {
|
||||
...params.config?.agent?.bash,
|
||||
|
||||
@@ -5,8 +5,16 @@ import path from "node:path";
|
||||
import sharp from "sharp";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { createClawdbotCodingTools } from "./pi-tools.js";
|
||||
import { createBrowserTool } from "./tools/browser-tool.js";
|
||||
|
||||
describe("createClawdbotCodingTools", () => {
|
||||
it("keeps browser tool schema OpenAI-compatible without normalization", () => {
|
||||
const browser = createBrowserTool();
|
||||
const schema = browser.parameters as { type?: unknown; anyOf?: unknown };
|
||||
expect(schema.type).toBe("object");
|
||||
expect(schema.anyOf).toBeUndefined();
|
||||
});
|
||||
|
||||
it("merges properties for union tool schemas", () => {
|
||||
const tools = createClawdbotCodingTools();
|
||||
const browser = tools.find((tool) => tool.name === "browser");
|
||||
@@ -23,54 +31,47 @@ describe("createClawdbotCodingTools", () => {
|
||||
expect(parameters.required ?? []).toContain("action");
|
||||
});
|
||||
|
||||
it("preserves union action values in merged schema", () => {
|
||||
it("preserves action enums in normalized schemas", () => {
|
||||
const tools = createClawdbotCodingTools();
|
||||
const toolNames = ["browser", "canvas", "nodes", "cron", "gateway"];
|
||||
|
||||
const collectActionValues = (
|
||||
schema: unknown,
|
||||
values: Set<string>,
|
||||
): void => {
|
||||
if (!schema || typeof schema !== "object") return;
|
||||
const record = schema as Record<string, unknown>;
|
||||
if (typeof record.const === "string") values.add(record.const);
|
||||
if (Array.isArray(record.enum)) {
|
||||
for (const value of record.enum) {
|
||||
if (typeof value === "string") values.add(value);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(record.anyOf)) {
|
||||
for (const variant of record.anyOf) {
|
||||
collectActionValues(variant, values);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (const name of toolNames) {
|
||||
const tool = tools.find((candidate) => candidate.name === name);
|
||||
expect(tool).toBeDefined();
|
||||
const parameters = tool?.parameters as {
|
||||
anyOf?: Array<{ properties?: Record<string, unknown> }>;
|
||||
properties?: Record<string, unknown>;
|
||||
};
|
||||
if (!Array.isArray(parameters.anyOf) || parameters.anyOf.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const actionValues = new Set<string>();
|
||||
for (const variant of parameters.anyOf ?? []) {
|
||||
const action = variant?.properties?.action as
|
||||
| { const?: unknown; enum?: unknown[] }
|
||||
| undefined;
|
||||
if (typeof action?.const === "string") actionValues.add(action.const);
|
||||
if (Array.isArray(action?.enum)) {
|
||||
for (const value of action.enum) {
|
||||
if (typeof value === "string") actionValues.add(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (actionValues.size <= 1) {
|
||||
continue;
|
||||
}
|
||||
const mergedAction = parameters.properties?.action as
|
||||
const action = parameters.properties?.action as
|
||||
| { const?: unknown; enum?: unknown[] }
|
||||
| undefined;
|
||||
const mergedValues = new Set<string>();
|
||||
if (typeof mergedAction?.const === "string") {
|
||||
mergedValues.add(mergedAction.const);
|
||||
}
|
||||
if (Array.isArray(mergedAction?.enum)) {
|
||||
for (const value of mergedAction.enum) {
|
||||
if (typeof value === "string") mergedValues.add(value);
|
||||
}
|
||||
}
|
||||
const values = new Set<string>();
|
||||
collectActionValues(action, values);
|
||||
|
||||
expect(actionValues.size).toBeGreaterThan(1);
|
||||
expect(mergedValues.size).toBe(actionValues.size);
|
||||
for (const value of actionValues) {
|
||||
expect(mergedValues.has(value)).toBe(true);
|
||||
}
|
||||
const min =
|
||||
name === "gateway"
|
||||
? 1
|
||||
: // Most tools expose multiple actions; keep this signal so schemas stay useful to models.
|
||||
2;
|
||||
expect(values.size).toBeGreaterThanOrEqual(min);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -80,6 +81,25 @@ describe("createClawdbotCodingTools", () => {
|
||||
expect(tools.some((tool) => tool.name === "process")).toBe(true);
|
||||
});
|
||||
|
||||
it("provides top-level object schemas for all tools", () => {
|
||||
const tools = createClawdbotCodingTools();
|
||||
const offenders = tools
|
||||
.map((tool) => {
|
||||
const schema =
|
||||
tool.parameters && typeof tool.parameters === "object"
|
||||
? (tool.parameters as Record<string, unknown>)
|
||||
: null;
|
||||
return {
|
||||
name: tool.name,
|
||||
type: schema?.type,
|
||||
keys: schema ? Object.keys(schema).sort() : null,
|
||||
};
|
||||
})
|
||||
.filter((entry) => entry.type !== "object");
|
||||
|
||||
expect(offenders).toEqual([]);
|
||||
});
|
||||
|
||||
it("scopes discord tool to discord surface", () => {
|
||||
const other = createClawdbotCodingTools({ surface: "whatsapp" });
|
||||
expect(other.some((tool) => tool.name === "discord")).toBe(false);
|
||||
|
||||
@@ -218,6 +218,13 @@ function normalizeToolParameters(tool: AnyAgentTool): AnyAgentTool {
|
||||
: undefined;
|
||||
if (!schema) return tool;
|
||||
|
||||
// Provider quirks:
|
||||
// - Gemini rejects several JSON Schema keywords, so we scrub those.
|
||||
// - OpenAI rejects function tool schemas unless the *top-level* is `type: "object"`.
|
||||
// (TypeBox root unions compile to `{ anyOf: [...] }` without `type`).
|
||||
//
|
||||
// Normalize once here so callers can always pass `tools` through unchanged.
|
||||
|
||||
// If schema already has type + properties (no top-level anyOf to merge),
|
||||
// still clean it for Gemini compatibility
|
||||
if (
|
||||
@@ -231,12 +238,33 @@ function normalizeToolParameters(tool: AnyAgentTool): AnyAgentTool {
|
||||
};
|
||||
}
|
||||
|
||||
if (!Array.isArray(schema.anyOf)) return tool;
|
||||
// Some tool schemas (esp. unions) may omit `type` at the top-level. If we see
|
||||
// object-ish fields, force `type: "object"` so OpenAI accepts the schema.
|
||||
if (
|
||||
!("type" in schema) &&
|
||||
(typeof schema.properties === "object" ||
|
||||
Array.isArray(schema.required)) &&
|
||||
!Array.isArray(schema.anyOf) &&
|
||||
!Array.isArray(schema.oneOf)
|
||||
) {
|
||||
return {
|
||||
...tool,
|
||||
parameters: cleanSchemaForGemini({ ...schema, type: "object" }),
|
||||
};
|
||||
}
|
||||
|
||||
const variantKey = Array.isArray(schema.anyOf)
|
||||
? "anyOf"
|
||||
: Array.isArray(schema.oneOf)
|
||||
? "oneOf"
|
||||
: null;
|
||||
if (!variantKey) return tool;
|
||||
const variants = schema[variantKey] as unknown[];
|
||||
const mergedProperties: Record<string, unknown> = {};
|
||||
const requiredCounts = new Map<string, number>();
|
||||
let objectVariants = 0;
|
||||
|
||||
for (const entry of schema.anyOf) {
|
||||
for (const entry of variants) {
|
||||
if (!entry || typeof entry !== "object") continue;
|
||||
const props = (entry as { properties?: unknown }).properties;
|
||||
if (!props || typeof props !== "object") continue;
|
||||
@@ -277,9 +305,16 @@ function normalizeToolParameters(tool: AnyAgentTool): AnyAgentTool {
|
||||
const nextSchema: Record<string, unknown> = { ...schema };
|
||||
return {
|
||||
...tool,
|
||||
// Flatten union schemas into a single object schema:
|
||||
// - Gemini doesn't allow top-level `type` together with `anyOf`.
|
||||
// - OpenAI rejects schemas without top-level `type: "object"`.
|
||||
// Merging properties preserves useful enums like `action` while keeping schemas portable.
|
||||
parameters: cleanSchemaForGemini({
|
||||
...nextSchema,
|
||||
type: nextSchema.type ?? "object",
|
||||
type: "object",
|
||||
...(typeof nextSchema.title === "string" ? { title: nextSchema.title } : {}),
|
||||
...(typeof nextSchema.description === "string"
|
||||
? { description: nextSchema.description }
|
||||
: {}),
|
||||
properties:
|
||||
Object.keys(mergedProperties).length > 0
|
||||
? mergedProperties
|
||||
@@ -518,5 +553,7 @@ export function createClawdbotCodingTools(options?: {
|
||||
const sandboxed = sandbox
|
||||
? filterToolsByPolicy(globallyFiltered, sandbox.tools)
|
||||
: globallyFiltered;
|
||||
// Always normalize tool JSON Schemas before handing them to pi-agent/pi-ai.
|
||||
// Without this, some providers (notably OpenAI) will reject root-level union schemas.
|
||||
return sandboxed.map(normalizeToolParameters);
|
||||
}
|
||||
|
||||
@@ -97,99 +97,44 @@ const BrowserActSchema = Type.Union([
|
||||
}),
|
||||
]);
|
||||
|
||||
const BrowserToolSchema = Type.Union([
|
||||
Type.Object({
|
||||
action: Type.Literal("status"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("start"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("stop"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("tabs"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("open"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
targetUrl: Type.String(),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("focus"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
targetId: Type.String(),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("close"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
targetId: Type.Optional(Type.String()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("snapshot"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
format: Type.Optional(
|
||||
Type.Union([Type.Literal("aria"), Type.Literal("ai")]),
|
||||
),
|
||||
targetId: Type.Optional(Type.String()),
|
||||
limit: Type.Optional(Type.Number()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("screenshot"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
targetId: Type.Optional(Type.String()),
|
||||
fullPage: Type.Optional(Type.Boolean()),
|
||||
ref: Type.Optional(Type.String()),
|
||||
element: Type.Optional(Type.String()),
|
||||
type: Type.Optional(
|
||||
Type.Union([Type.Literal("png"), Type.Literal("jpeg")]),
|
||||
),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("navigate"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
targetUrl: Type.String(),
|
||||
targetId: Type.Optional(Type.String()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("console"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
level: Type.Optional(Type.String()),
|
||||
targetId: Type.Optional(Type.String()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("pdf"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
targetId: Type.Optional(Type.String()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("upload"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
paths: Type.Array(Type.String()),
|
||||
ref: Type.Optional(Type.String()),
|
||||
inputRef: Type.Optional(Type.String()),
|
||||
element: Type.Optional(Type.String()),
|
||||
targetId: Type.Optional(Type.String()),
|
||||
timeoutMs: Type.Optional(Type.Number()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("dialog"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
accept: Type.Boolean(),
|
||||
promptText: Type.Optional(Type.String()),
|
||||
targetId: Type.Optional(Type.String()),
|
||||
timeoutMs: Type.Optional(Type.Number()),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("act"),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
request: BrowserActSchema,
|
||||
}),
|
||||
]);
|
||||
// IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`.
|
||||
// A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`),
|
||||
// which OpenAI rejects ("Invalid schema ... type: None"). Keep this schema an object.
|
||||
const BrowserToolSchema = Type.Object({
|
||||
action: Type.Union([
|
||||
Type.Literal("status"),
|
||||
Type.Literal("start"),
|
||||
Type.Literal("stop"),
|
||||
Type.Literal("tabs"),
|
||||
Type.Literal("open"),
|
||||
Type.Literal("focus"),
|
||||
Type.Literal("close"),
|
||||
Type.Literal("snapshot"),
|
||||
Type.Literal("screenshot"),
|
||||
Type.Literal("navigate"),
|
||||
Type.Literal("console"),
|
||||
Type.Literal("pdf"),
|
||||
Type.Literal("upload"),
|
||||
Type.Literal("dialog"),
|
||||
Type.Literal("act"),
|
||||
]),
|
||||
controlUrl: Type.Optional(Type.String()),
|
||||
targetUrl: Type.Optional(Type.String()),
|
||||
targetId: Type.Optional(Type.String()),
|
||||
limit: Type.Optional(Type.Number()),
|
||||
format: Type.Optional(Type.Union([Type.Literal("aria"), Type.Literal("ai")])),
|
||||
fullPage: Type.Optional(Type.Boolean()),
|
||||
ref: Type.Optional(Type.String()),
|
||||
element: Type.Optional(Type.String()),
|
||||
type: Type.Optional(Type.Union([Type.Literal("png"), Type.Literal("jpeg")])),
|
||||
level: Type.Optional(Type.String()),
|
||||
paths: Type.Optional(Type.Array(Type.String())),
|
||||
inputRef: Type.Optional(Type.String()),
|
||||
timeoutMs: Type.Optional(Type.Number()),
|
||||
accept: Type.Optional(Type.Boolean()),
|
||||
promptText: Type.Optional(Type.String()),
|
||||
request: Type.Optional(BrowserActSchema),
|
||||
});
|
||||
|
||||
function resolveBrowserBaseUrl(controlUrl?: string) {
|
||||
const cfg = loadConfig();
|
||||
|
||||
Reference in New Issue
Block a user