fix: OpenAI tool schema compatibility
This commit is contained in:
@@ -14,6 +14,7 @@
|
|||||||
- macOS: treat location permission as always-only to avoid iOS-only enums. (#165) — thanks @Nachx639
|
- macOS: treat location permission as always-only to avoid iOS-only enums. (#165) — thanks @Nachx639
|
||||||
- macOS: make generated gateway protocol models `Sendable` for Swift 6 strict concurrency. (#195) — thanks @andranik-sahakyan
|
- macOS: make generated gateway protocol models `Sendable` for Swift 6 strict concurrency. (#195) — thanks @andranik-sahakyan
|
||||||
- WhatsApp: suppress typing indicator during heartbeat background tasks. (#190) — thanks @mcinteerj
|
- WhatsApp: suppress typing indicator during heartbeat background tasks. (#190) — thanks @mcinteerj
|
||||||
|
- Agent tools: OpenAI-compatible tool JSON Schemas (fix `browser`, normalize union schemas).
|
||||||
- Onboarding: when running from source, auto-build missing Control UI assets (`pnpm ui:build`).
|
- Onboarding: when running from source, auto-build missing Control UI assets (`pnpm ui:build`).
|
||||||
- Discord/Slack: route reaction + system notifications to the correct session (no main-session bleed).
|
- Discord/Slack: route reaction + system notifications to the correct session (no main-session bleed).
|
||||||
- Agent tools: honor `agent.tools` allow/deny policy even when sandbox is off.
|
- Agent tools: honor `agent.tools` allow/deny policy even when sandbox is off.
|
||||||
|
|||||||
@@ -364,6 +364,8 @@ export async function runEmbeddedPiAgent(params: {
|
|||||||
await loadWorkspaceBootstrapFiles(resolvedWorkspace);
|
await loadWorkspaceBootstrapFiles(resolvedWorkspace);
|
||||||
const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
|
const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
|
||||||
const promptSkills = resolvePromptSkills(skillsSnapshot, skillEntries);
|
const promptSkills = resolvePromptSkills(skillsSnapshot, skillEntries);
|
||||||
|
// Tool schemas must be provider-compatible (OpenAI requires top-level `type: "object"`).
|
||||||
|
// `createClawdbotCodingTools()` normalizes schemas so the session can pass them through unchanged.
|
||||||
const tools = createClawdbotCodingTools({
|
const tools = createClawdbotCodingTools({
|
||||||
bash: {
|
bash: {
|
||||||
...params.config?.agent?.bash,
|
...params.config?.agent?.bash,
|
||||||
|
|||||||
@@ -5,8 +5,16 @@ import path from "node:path";
|
|||||||
import sharp from "sharp";
|
import sharp from "sharp";
|
||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import { createClawdbotCodingTools } from "./pi-tools.js";
|
import { createClawdbotCodingTools } from "./pi-tools.js";
|
||||||
|
import { createBrowserTool } from "./tools/browser-tool.js";
|
||||||
|
|
||||||
describe("createClawdbotCodingTools", () => {
|
describe("createClawdbotCodingTools", () => {
|
||||||
|
it("keeps browser tool schema OpenAI-compatible without normalization", () => {
|
||||||
|
const browser = createBrowserTool();
|
||||||
|
const schema = browser.parameters as { type?: unknown; anyOf?: unknown };
|
||||||
|
expect(schema.type).toBe("object");
|
||||||
|
expect(schema.anyOf).toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
it("merges properties for union tool schemas", () => {
|
it("merges properties for union tool schemas", () => {
|
||||||
const tools = createClawdbotCodingTools();
|
const tools = createClawdbotCodingTools();
|
||||||
const browser = tools.find((tool) => tool.name === "browser");
|
const browser = tools.find((tool) => tool.name === "browser");
|
||||||
@@ -23,54 +31,47 @@ describe("createClawdbotCodingTools", () => {
|
|||||||
expect(parameters.required ?? []).toContain("action");
|
expect(parameters.required ?? []).toContain("action");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("preserves union action values in merged schema", () => {
|
it("preserves action enums in normalized schemas", () => {
|
||||||
const tools = createClawdbotCodingTools();
|
const tools = createClawdbotCodingTools();
|
||||||
const toolNames = ["browser", "canvas", "nodes", "cron", "gateway"];
|
const toolNames = ["browser", "canvas", "nodes", "cron", "gateway"];
|
||||||
|
|
||||||
|
const collectActionValues = (
|
||||||
|
schema: unknown,
|
||||||
|
values: Set<string>,
|
||||||
|
): void => {
|
||||||
|
if (!schema || typeof schema !== "object") return;
|
||||||
|
const record = schema as Record<string, unknown>;
|
||||||
|
if (typeof record.const === "string") values.add(record.const);
|
||||||
|
if (Array.isArray(record.enum)) {
|
||||||
|
for (const value of record.enum) {
|
||||||
|
if (typeof value === "string") values.add(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Array.isArray(record.anyOf)) {
|
||||||
|
for (const variant of record.anyOf) {
|
||||||
|
collectActionValues(variant, values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
for (const name of toolNames) {
|
for (const name of toolNames) {
|
||||||
const tool = tools.find((candidate) => candidate.name === name);
|
const tool = tools.find((candidate) => candidate.name === name);
|
||||||
expect(tool).toBeDefined();
|
expect(tool).toBeDefined();
|
||||||
const parameters = tool?.parameters as {
|
const parameters = tool?.parameters as {
|
||||||
anyOf?: Array<{ properties?: Record<string, unknown> }>;
|
|
||||||
properties?: Record<string, unknown>;
|
properties?: Record<string, unknown>;
|
||||||
};
|
};
|
||||||
if (!Array.isArray(parameters.anyOf) || parameters.anyOf.length === 0) {
|
const action = parameters.properties?.action as
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const actionValues = new Set<string>();
|
|
||||||
for (const variant of parameters.anyOf ?? []) {
|
|
||||||
const action = variant?.properties?.action as
|
|
||||||
| { const?: unknown; enum?: unknown[] }
|
| { const?: unknown; enum?: unknown[] }
|
||||||
| undefined;
|
| undefined;
|
||||||
if (typeof action?.const === "string") actionValues.add(action.const);
|
const values = new Set<string>();
|
||||||
if (Array.isArray(action?.enum)) {
|
collectActionValues(action, values);
|
||||||
for (const value of action.enum) {
|
|
||||||
if (typeof value === "string") actionValues.add(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (actionValues.size <= 1) {
|
const min =
|
||||||
continue;
|
name === "gateway"
|
||||||
}
|
? 1
|
||||||
const mergedAction = parameters.properties?.action as
|
: // Most tools expose multiple actions; keep this signal so schemas stay useful to models.
|
||||||
| { const?: unknown; enum?: unknown[] }
|
2;
|
||||||
| undefined;
|
expect(values.size).toBeGreaterThanOrEqual(min);
|
||||||
const mergedValues = new Set<string>();
|
|
||||||
if (typeof mergedAction?.const === "string") {
|
|
||||||
mergedValues.add(mergedAction.const);
|
|
||||||
}
|
|
||||||
if (Array.isArray(mergedAction?.enum)) {
|
|
||||||
for (const value of mergedAction.enum) {
|
|
||||||
if (typeof value === "string") mergedValues.add(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(actionValues.size).toBeGreaterThan(1);
|
|
||||||
expect(mergedValues.size).toBe(actionValues.size);
|
|
||||||
for (const value of actionValues) {
|
|
||||||
expect(mergedValues.has(value)).toBe(true);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -80,6 +81,25 @@ describe("createClawdbotCodingTools", () => {
|
|||||||
expect(tools.some((tool) => tool.name === "process")).toBe(true);
|
expect(tools.some((tool) => tool.name === "process")).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("provides top-level object schemas for all tools", () => {
|
||||||
|
const tools = createClawdbotCodingTools();
|
||||||
|
const offenders = tools
|
||||||
|
.map((tool) => {
|
||||||
|
const schema =
|
||||||
|
tool.parameters && typeof tool.parameters === "object"
|
||||||
|
? (tool.parameters as Record<string, unknown>)
|
||||||
|
: null;
|
||||||
|
return {
|
||||||
|
name: tool.name,
|
||||||
|
type: schema?.type,
|
||||||
|
keys: schema ? Object.keys(schema).sort() : null,
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((entry) => entry.type !== "object");
|
||||||
|
|
||||||
|
expect(offenders).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
it("scopes discord tool to discord surface", () => {
|
it("scopes discord tool to discord surface", () => {
|
||||||
const other = createClawdbotCodingTools({ surface: "whatsapp" });
|
const other = createClawdbotCodingTools({ surface: "whatsapp" });
|
||||||
expect(other.some((tool) => tool.name === "discord")).toBe(false);
|
expect(other.some((tool) => tool.name === "discord")).toBe(false);
|
||||||
|
|||||||
@@ -218,6 +218,13 @@ function normalizeToolParameters(tool: AnyAgentTool): AnyAgentTool {
|
|||||||
: undefined;
|
: undefined;
|
||||||
if (!schema) return tool;
|
if (!schema) return tool;
|
||||||
|
|
||||||
|
// Provider quirks:
|
||||||
|
// - Gemini rejects several JSON Schema keywords, so we scrub those.
|
||||||
|
// - OpenAI rejects function tool schemas unless the *top-level* is `type: "object"`.
|
||||||
|
// (TypeBox root unions compile to `{ anyOf: [...] }` without `type`).
|
||||||
|
//
|
||||||
|
// Normalize once here so callers can always pass `tools` through unchanged.
|
||||||
|
|
||||||
// If schema already has type + properties (no top-level anyOf to merge),
|
// If schema already has type + properties (no top-level anyOf to merge),
|
||||||
// still clean it for Gemini compatibility
|
// still clean it for Gemini compatibility
|
||||||
if (
|
if (
|
||||||
@@ -231,12 +238,33 @@ function normalizeToolParameters(tool: AnyAgentTool): AnyAgentTool {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Array.isArray(schema.anyOf)) return tool;
|
// Some tool schemas (esp. unions) may omit `type` at the top-level. If we see
|
||||||
|
// object-ish fields, force `type: "object"` so OpenAI accepts the schema.
|
||||||
|
if (
|
||||||
|
!("type" in schema) &&
|
||||||
|
(typeof schema.properties === "object" ||
|
||||||
|
Array.isArray(schema.required)) &&
|
||||||
|
!Array.isArray(schema.anyOf) &&
|
||||||
|
!Array.isArray(schema.oneOf)
|
||||||
|
) {
|
||||||
|
return {
|
||||||
|
...tool,
|
||||||
|
parameters: cleanSchemaForGemini({ ...schema, type: "object" }),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const variantKey = Array.isArray(schema.anyOf)
|
||||||
|
? "anyOf"
|
||||||
|
: Array.isArray(schema.oneOf)
|
||||||
|
? "oneOf"
|
||||||
|
: null;
|
||||||
|
if (!variantKey) return tool;
|
||||||
|
const variants = schema[variantKey] as unknown[];
|
||||||
const mergedProperties: Record<string, unknown> = {};
|
const mergedProperties: Record<string, unknown> = {};
|
||||||
const requiredCounts = new Map<string, number>();
|
const requiredCounts = new Map<string, number>();
|
||||||
let objectVariants = 0;
|
let objectVariants = 0;
|
||||||
|
|
||||||
for (const entry of schema.anyOf) {
|
for (const entry of variants) {
|
||||||
if (!entry || typeof entry !== "object") continue;
|
if (!entry || typeof entry !== "object") continue;
|
||||||
const props = (entry as { properties?: unknown }).properties;
|
const props = (entry as { properties?: unknown }).properties;
|
||||||
if (!props || typeof props !== "object") continue;
|
if (!props || typeof props !== "object") continue;
|
||||||
@@ -277,9 +305,16 @@ function normalizeToolParameters(tool: AnyAgentTool): AnyAgentTool {
|
|||||||
const nextSchema: Record<string, unknown> = { ...schema };
|
const nextSchema: Record<string, unknown> = { ...schema };
|
||||||
return {
|
return {
|
||||||
...tool,
|
...tool,
|
||||||
|
// Flatten union schemas into a single object schema:
|
||||||
|
// - Gemini doesn't allow top-level `type` together with `anyOf`.
|
||||||
|
// - OpenAI rejects schemas without top-level `type: "object"`.
|
||||||
|
// Merging properties preserves useful enums like `action` while keeping schemas portable.
|
||||||
parameters: cleanSchemaForGemini({
|
parameters: cleanSchemaForGemini({
|
||||||
...nextSchema,
|
type: "object",
|
||||||
type: nextSchema.type ?? "object",
|
...(typeof nextSchema.title === "string" ? { title: nextSchema.title } : {}),
|
||||||
|
...(typeof nextSchema.description === "string"
|
||||||
|
? { description: nextSchema.description }
|
||||||
|
: {}),
|
||||||
properties:
|
properties:
|
||||||
Object.keys(mergedProperties).length > 0
|
Object.keys(mergedProperties).length > 0
|
||||||
? mergedProperties
|
? mergedProperties
|
||||||
@@ -518,5 +553,7 @@ export function createClawdbotCodingTools(options?: {
|
|||||||
const sandboxed = sandbox
|
const sandboxed = sandbox
|
||||||
? filterToolsByPolicy(globallyFiltered, sandbox.tools)
|
? filterToolsByPolicy(globallyFiltered, sandbox.tools)
|
||||||
: globallyFiltered;
|
: globallyFiltered;
|
||||||
|
// Always normalize tool JSON Schemas before handing them to pi-agent/pi-ai.
|
||||||
|
// Without this, some providers (notably OpenAI) will reject root-level union schemas.
|
||||||
return sandboxed.map(normalizeToolParameters);
|
return sandboxed.map(normalizeToolParameters);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -97,99 +97,44 @@ const BrowserActSchema = Type.Union([
|
|||||||
}),
|
}),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const BrowserToolSchema = Type.Union([
|
// IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`.
|
||||||
Type.Object({
|
// A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`),
|
||||||
action: Type.Literal("status"),
|
// which OpenAI rejects ("Invalid schema ... type: None"). Keep this schema an object.
|
||||||
|
const BrowserToolSchema = Type.Object({
|
||||||
|
action: Type.Union([
|
||||||
|
Type.Literal("status"),
|
||||||
|
Type.Literal("start"),
|
||||||
|
Type.Literal("stop"),
|
||||||
|
Type.Literal("tabs"),
|
||||||
|
Type.Literal("open"),
|
||||||
|
Type.Literal("focus"),
|
||||||
|
Type.Literal("close"),
|
||||||
|
Type.Literal("snapshot"),
|
||||||
|
Type.Literal("screenshot"),
|
||||||
|
Type.Literal("navigate"),
|
||||||
|
Type.Literal("console"),
|
||||||
|
Type.Literal("pdf"),
|
||||||
|
Type.Literal("upload"),
|
||||||
|
Type.Literal("dialog"),
|
||||||
|
Type.Literal("act"),
|
||||||
|
]),
|
||||||
controlUrl: Type.Optional(Type.String()),
|
controlUrl: Type.Optional(Type.String()),
|
||||||
}),
|
targetUrl: Type.Optional(Type.String()),
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("start"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("stop"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("tabs"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("open"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
targetUrl: Type.String(),
|
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("focus"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
targetId: Type.String(),
|
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("close"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
targetId: Type.Optional(Type.String()),
|
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("snapshot"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
format: Type.Optional(
|
|
||||||
Type.Union([Type.Literal("aria"), Type.Literal("ai")]),
|
|
||||||
),
|
|
||||||
targetId: Type.Optional(Type.String()),
|
targetId: Type.Optional(Type.String()),
|
||||||
limit: Type.Optional(Type.Number()),
|
limit: Type.Optional(Type.Number()),
|
||||||
}),
|
format: Type.Optional(Type.Union([Type.Literal("aria"), Type.Literal("ai")])),
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("screenshot"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
targetId: Type.Optional(Type.String()),
|
|
||||||
fullPage: Type.Optional(Type.Boolean()),
|
fullPage: Type.Optional(Type.Boolean()),
|
||||||
ref: Type.Optional(Type.String()),
|
ref: Type.Optional(Type.String()),
|
||||||
element: Type.Optional(Type.String()),
|
element: Type.Optional(Type.String()),
|
||||||
type: Type.Optional(
|
type: Type.Optional(Type.Union([Type.Literal("png"), Type.Literal("jpeg")])),
|
||||||
Type.Union([Type.Literal("png"), Type.Literal("jpeg")]),
|
|
||||||
),
|
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("navigate"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
targetUrl: Type.String(),
|
|
||||||
targetId: Type.Optional(Type.String()),
|
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("console"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
level: Type.Optional(Type.String()),
|
level: Type.Optional(Type.String()),
|
||||||
targetId: Type.Optional(Type.String()),
|
paths: Type.Optional(Type.Array(Type.String())),
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("pdf"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
targetId: Type.Optional(Type.String()),
|
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("upload"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
paths: Type.Array(Type.String()),
|
|
||||||
ref: Type.Optional(Type.String()),
|
|
||||||
inputRef: Type.Optional(Type.String()),
|
inputRef: Type.Optional(Type.String()),
|
||||||
element: Type.Optional(Type.String()),
|
|
||||||
targetId: Type.Optional(Type.String()),
|
|
||||||
timeoutMs: Type.Optional(Type.Number()),
|
timeoutMs: Type.Optional(Type.Number()),
|
||||||
}),
|
accept: Type.Optional(Type.Boolean()),
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("dialog"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
accept: Type.Boolean(),
|
|
||||||
promptText: Type.Optional(Type.String()),
|
promptText: Type.Optional(Type.String()),
|
||||||
targetId: Type.Optional(Type.String()),
|
request: Type.Optional(BrowserActSchema),
|
||||||
timeoutMs: Type.Optional(Type.Number()),
|
});
|
||||||
}),
|
|
||||||
Type.Object({
|
|
||||||
action: Type.Literal("act"),
|
|
||||||
controlUrl: Type.Optional(Type.String()),
|
|
||||||
request: BrowserActSchema,
|
|
||||||
}),
|
|
||||||
]);
|
|
||||||
|
|
||||||
function resolveBrowserBaseUrl(controlUrl?: string) {
|
function resolveBrowserBaseUrl(controlUrl?: string) {
|
||||||
const cfg = loadConfig();
|
const cfg = loadConfig();
|
||||||
|
|||||||
Reference in New Issue
Block a user