fix: OpenAI tool schema compatibility

This commit is contained in:
Peter Steinberger
2026-01-05 00:15:42 +01:00
parent c3c9dee65e
commit 435edaf997
5 changed files with 138 additions and 133 deletions

View File

@@ -14,6 +14,7 @@
- macOS: treat location permission as always-only to avoid iOS-only enums. (#165) — thanks @Nachx639
- macOS: make generated gateway protocol models `Sendable` for Swift 6 strict concurrency. (#195) — thanks @andranik-sahakyan
- WhatsApp: suppress typing indicator during heartbeat background tasks. (#190) — thanks @mcinteerj
- Agent tools: OpenAI-compatible tool JSON Schemas (fix `browser`, normalize union schemas).
- Onboarding: when running from source, auto-build missing Control UI assets (`pnpm ui:build`).
- Discord/Slack: route reaction + system notifications to the correct session (no main-session bleed).
- Agent tools: honor `agent.tools` allow/deny policy even when sandbox is off.

View File

@@ -364,6 +364,8 @@ export async function runEmbeddedPiAgent(params: {
await loadWorkspaceBootstrapFiles(resolvedWorkspace);
const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
const promptSkills = resolvePromptSkills(skillsSnapshot, skillEntries);
// Tool schemas must be provider-compatible (OpenAI requires top-level `type: "object"`).
// `createClawdbotCodingTools()` normalizes schemas so the session can pass them through unchanged.
const tools = createClawdbotCodingTools({
bash: {
...params.config?.agent?.bash,

View File

@@ -5,8 +5,16 @@ import path from "node:path";
import sharp from "sharp";
import { describe, expect, it } from "vitest";
import { createClawdbotCodingTools } from "./pi-tools.js";
import { createBrowserTool } from "./tools/browser-tool.js";
describe("createClawdbotCodingTools", () => {
it("keeps browser tool schema OpenAI-compatible without normalization", () => {
const browser = createBrowserTool();
const schema = browser.parameters as { type?: unknown; anyOf?: unknown };
expect(schema.type).toBe("object");
expect(schema.anyOf).toBeUndefined();
});
it("merges properties for union tool schemas", () => {
const tools = createClawdbotCodingTools();
const browser = tools.find((tool) => tool.name === "browser");
@@ -23,54 +31,47 @@ describe("createClawdbotCodingTools", () => {
expect(parameters.required ?? []).toContain("action");
});
it("preserves union action values in merged schema", () => {
it("preserves action enums in normalized schemas", () => {
const tools = createClawdbotCodingTools();
const toolNames = ["browser", "canvas", "nodes", "cron", "gateway"];
const collectActionValues = (
schema: unknown,
values: Set<string>,
): void => {
if (!schema || typeof schema !== "object") return;
const record = schema as Record<string, unknown>;
if (typeof record.const === "string") values.add(record.const);
if (Array.isArray(record.enum)) {
for (const value of record.enum) {
if (typeof value === "string") values.add(value);
}
}
if (Array.isArray(record.anyOf)) {
for (const variant of record.anyOf) {
collectActionValues(variant, values);
}
}
};
for (const name of toolNames) {
const tool = tools.find((candidate) => candidate.name === name);
expect(tool).toBeDefined();
const parameters = tool?.parameters as {
anyOf?: Array<{ properties?: Record<string, unknown> }>;
properties?: Record<string, unknown>;
};
if (!Array.isArray(parameters.anyOf) || parameters.anyOf.length === 0) {
continue;
}
const actionValues = new Set<string>();
for (const variant of parameters.anyOf ?? []) {
const action = variant?.properties?.action as
| { const?: unknown; enum?: unknown[] }
| undefined;
if (typeof action?.const === "string") actionValues.add(action.const);
if (Array.isArray(action?.enum)) {
for (const value of action.enum) {
if (typeof value === "string") actionValues.add(value);
}
}
}
if (actionValues.size <= 1) {
continue;
}
const mergedAction = parameters.properties?.action as
const action = parameters.properties?.action as
| { const?: unknown; enum?: unknown[] }
| undefined;
const mergedValues = new Set<string>();
if (typeof mergedAction?.const === "string") {
mergedValues.add(mergedAction.const);
}
if (Array.isArray(mergedAction?.enum)) {
for (const value of mergedAction.enum) {
if (typeof value === "string") mergedValues.add(value);
}
}
const values = new Set<string>();
collectActionValues(action, values);
expect(actionValues.size).toBeGreaterThan(1);
expect(mergedValues.size).toBe(actionValues.size);
for (const value of actionValues) {
expect(mergedValues.has(value)).toBe(true);
}
const min =
name === "gateway"
? 1
: // Most tools expose multiple actions; keep this signal so schemas stay useful to models.
2;
expect(values.size).toBeGreaterThanOrEqual(min);
}
});
@@ -80,6 +81,25 @@ describe("createClawdbotCodingTools", () => {
expect(tools.some((tool) => tool.name === "process")).toBe(true);
});
it("provides top-level object schemas for all tools", () => {
const tools = createClawdbotCodingTools();
const offenders = tools
.map((tool) => {
const schema =
tool.parameters && typeof tool.parameters === "object"
? (tool.parameters as Record<string, unknown>)
: null;
return {
name: tool.name,
type: schema?.type,
keys: schema ? Object.keys(schema).sort() : null,
};
})
.filter((entry) => entry.type !== "object");
expect(offenders).toEqual([]);
});
it("scopes discord tool to discord surface", () => {
const other = createClawdbotCodingTools({ surface: "whatsapp" });
expect(other.some((tool) => tool.name === "discord")).toBe(false);

View File

@@ -218,6 +218,13 @@ function normalizeToolParameters(tool: AnyAgentTool): AnyAgentTool {
: undefined;
if (!schema) return tool;
// Provider quirks:
// - Gemini rejects several JSON Schema keywords, so we scrub those.
// - OpenAI rejects function tool schemas unless the *top-level* is `type: "object"`.
// (TypeBox root unions compile to `{ anyOf: [...] }` without `type`).
//
// Normalize once here so callers can always pass `tools` through unchanged.
// If schema already has type + properties (no top-level anyOf to merge),
// still clean it for Gemini compatibility
if (
@@ -231,12 +238,33 @@ function normalizeToolParameters(tool: AnyAgentTool): AnyAgentTool {
};
}
if (!Array.isArray(schema.anyOf)) return tool;
// Some tool schemas (esp. unions) may omit `type` at the top-level. If we see
// object-ish fields, force `type: "object"` so OpenAI accepts the schema.
if (
!("type" in schema) &&
(typeof schema.properties === "object" ||
Array.isArray(schema.required)) &&
!Array.isArray(schema.anyOf) &&
!Array.isArray(schema.oneOf)
) {
return {
...tool,
parameters: cleanSchemaForGemini({ ...schema, type: "object" }),
};
}
const variantKey = Array.isArray(schema.anyOf)
? "anyOf"
: Array.isArray(schema.oneOf)
? "oneOf"
: null;
if (!variantKey) return tool;
const variants = schema[variantKey] as unknown[];
const mergedProperties: Record<string, unknown> = {};
const requiredCounts = new Map<string, number>();
let objectVariants = 0;
for (const entry of schema.anyOf) {
for (const entry of variants) {
if (!entry || typeof entry !== "object") continue;
const props = (entry as { properties?: unknown }).properties;
if (!props || typeof props !== "object") continue;
@@ -277,9 +305,16 @@ function normalizeToolParameters(tool: AnyAgentTool): AnyAgentTool {
const nextSchema: Record<string, unknown> = { ...schema };
return {
...tool,
// Flatten union schemas into a single object schema:
// - Gemini doesn't allow top-level `type` together with `anyOf`.
// - OpenAI rejects schemas without top-level `type: "object"`.
// Merging properties preserves useful enums like `action` while keeping schemas portable.
parameters: cleanSchemaForGemini({
...nextSchema,
type: nextSchema.type ?? "object",
type: "object",
...(typeof nextSchema.title === "string" ? { title: nextSchema.title } : {}),
...(typeof nextSchema.description === "string"
? { description: nextSchema.description }
: {}),
properties:
Object.keys(mergedProperties).length > 0
? mergedProperties
@@ -518,5 +553,7 @@ export function createClawdbotCodingTools(options?: {
const sandboxed = sandbox
? filterToolsByPolicy(globallyFiltered, sandbox.tools)
: globallyFiltered;
// Always normalize tool JSON Schemas before handing them to pi-agent/pi-ai.
// Without this, some providers (notably OpenAI) will reject root-level union schemas.
return sandboxed.map(normalizeToolParameters);
}

View File

@@ -97,99 +97,44 @@ const BrowserActSchema = Type.Union([
}),
]);
const BrowserToolSchema = Type.Union([
Type.Object({
action: Type.Literal("status"),
controlUrl: Type.Optional(Type.String()),
}),
Type.Object({
action: Type.Literal("start"),
controlUrl: Type.Optional(Type.String()),
}),
Type.Object({
action: Type.Literal("stop"),
controlUrl: Type.Optional(Type.String()),
}),
Type.Object({
action: Type.Literal("tabs"),
controlUrl: Type.Optional(Type.String()),
}),
Type.Object({
action: Type.Literal("open"),
controlUrl: Type.Optional(Type.String()),
targetUrl: Type.String(),
}),
Type.Object({
action: Type.Literal("focus"),
controlUrl: Type.Optional(Type.String()),
targetId: Type.String(),
}),
Type.Object({
action: Type.Literal("close"),
controlUrl: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
action: Type.Literal("snapshot"),
controlUrl: Type.Optional(Type.String()),
format: Type.Optional(
Type.Union([Type.Literal("aria"), Type.Literal("ai")]),
),
targetId: Type.Optional(Type.String()),
limit: Type.Optional(Type.Number()),
}),
Type.Object({
action: Type.Literal("screenshot"),
controlUrl: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
fullPage: Type.Optional(Type.Boolean()),
ref: Type.Optional(Type.String()),
element: Type.Optional(Type.String()),
type: Type.Optional(
Type.Union([Type.Literal("png"), Type.Literal("jpeg")]),
),
}),
Type.Object({
action: Type.Literal("navigate"),
controlUrl: Type.Optional(Type.String()),
targetUrl: Type.String(),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
action: Type.Literal("console"),
controlUrl: Type.Optional(Type.String()),
level: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
action: Type.Literal("pdf"),
controlUrl: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
action: Type.Literal("upload"),
controlUrl: Type.Optional(Type.String()),
paths: Type.Array(Type.String()),
ref: Type.Optional(Type.String()),
inputRef: Type.Optional(Type.String()),
element: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
timeoutMs: Type.Optional(Type.Number()),
}),
Type.Object({
action: Type.Literal("dialog"),
controlUrl: Type.Optional(Type.String()),
accept: Type.Boolean(),
promptText: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
timeoutMs: Type.Optional(Type.Number()),
}),
Type.Object({
action: Type.Literal("act"),
controlUrl: Type.Optional(Type.String()),
request: BrowserActSchema,
}),
]);
// IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`.
// A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`),
// which OpenAI rejects ("Invalid schema ... type: None"). Keep this schema an object.
const BrowserToolSchema = Type.Object({
action: Type.Union([
Type.Literal("status"),
Type.Literal("start"),
Type.Literal("stop"),
Type.Literal("tabs"),
Type.Literal("open"),
Type.Literal("focus"),
Type.Literal("close"),
Type.Literal("snapshot"),
Type.Literal("screenshot"),
Type.Literal("navigate"),
Type.Literal("console"),
Type.Literal("pdf"),
Type.Literal("upload"),
Type.Literal("dialog"),
Type.Literal("act"),
]),
controlUrl: Type.Optional(Type.String()),
targetUrl: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
limit: Type.Optional(Type.Number()),
format: Type.Optional(Type.Union([Type.Literal("aria"), Type.Literal("ai")])),
fullPage: Type.Optional(Type.Boolean()),
ref: Type.Optional(Type.String()),
element: Type.Optional(Type.String()),
type: Type.Optional(Type.Union([Type.Literal("png"), Type.Literal("jpeg")])),
level: Type.Optional(Type.String()),
paths: Type.Optional(Type.Array(Type.String())),
inputRef: Type.Optional(Type.String()),
timeoutMs: Type.Optional(Type.Number()),
accept: Type.Optional(Type.Boolean()),
promptText: Type.Optional(Type.String()),
request: Type.Optional(BrowserActSchema),
});
function resolveBrowserBaseUrl(controlUrl?: string) {
const cfg = loadConfig();