fix(tools): flatten nested anyOf schemas for Vertex AI compatibility

Claude API on Vertex AI (Cloud Code Assist) rejects nested anyOf schemas
as invalid JSON Schema draft 2020-12. This change:

- Add tryFlattenLiteralAnyOf() to convert Type.Union([Type.Literal(...)])
  patterns from anyOf with const values to flat enum arrays
- Update stringEnum helper in bash-tools to use Type.Unsafe with flat enum
- Flatten BrowserActSchema from discriminated union to single object
- Simplify TelegramToolSchema to use Type.String() for IDs

Fixes 400 errors when sending messages through WhatsApp/Telegram providers.
This commit is contained in:
Kit
2026-01-07 16:54:13 +00:00
committed by Peter Steinberger
parent de55f4e111
commit a2b3f2c18a
4 changed files with 132 additions and 80 deletions

View File

@@ -39,17 +39,19 @@ const DEFAULT_PATH =
process.env.PATH ?? process.env.PATH ??
"/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"; "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
const stringEnum = ( // NOTE: Using Type.Unsafe with enum instead of Type.Union([Type.Literal(...)])
values: readonly string[], // because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema.
options?: Parameters<typeof Type.Union>[1], // Type.Union of literals compiles to { anyOf: [{enum:["a"]}, {enum:["b"]}, ...] }
// which is valid but not accepted. A flat enum { type: "string", enum: [...] } works.
const stringEnum = <T extends readonly string[]>(
values: T,
options?: { description?: string },
) => ) =>
Type.Union( Type.Unsafe<T[number]>({
values.map((value) => Type.Literal(value)) as [ type: "string",
ReturnType<typeof Type.Literal>, enum: values as unknown as string[],
...ReturnType<typeof Type.Literal>[], ...options,
], });
options,
);
export type BashToolDefaults = { export type BashToolDefaults = {
backgroundMs?: number; backgroundMs?: number;

View File

@@ -154,12 +154,73 @@ function mergePropertySchemas(existing: unknown, incoming: unknown): unknown {
return existing; return existing;
} }
// Check if an anyOf array contains only literal values that can be flattened
// TypeBox Type.Literal generates { const: "value", type: "string" }
// Some schemas may use { enum: ["value"], type: "string" }
// Both patterns are flattened to { type: "string", enum: ["a", "b", ...] }
function tryFlattenLiteralAnyOf(
anyOf: unknown[],
): { type: string; enum: unknown[] } | null {
if (anyOf.length === 0) return null;
const allValues: unknown[] = [];
let commonType: string | null = null;
for (const variant of anyOf) {
if (!variant || typeof variant !== "object") return null;
const v = variant as Record<string, unknown>;
// Extract the literal value - either from const or single-element enum
let literalValue: unknown;
if ("const" in v) {
literalValue = v.const;
} else if (Array.isArray(v.enum) && v.enum.length === 1) {
literalValue = v.enum[0];
} else {
return null; // Not a literal pattern
}
// Must have consistent type (usually "string")
const variantType = typeof v.type === "string" ? v.type : null;
if (!variantType) return null;
if (commonType === null) commonType = variantType;
else if (commonType !== variantType) return null;
allValues.push(literalValue);
}
if (commonType && allValues.length > 0) {
return { type: commonType, enum: allValues };
}
return null;
}
function cleanSchemaForGemini(schema: unknown): unknown { function cleanSchemaForGemini(schema: unknown): unknown {
if (!schema || typeof schema !== "object") return schema; if (!schema || typeof schema !== "object") return schema;
if (Array.isArray(schema)) return schema.map(cleanSchemaForGemini); if (Array.isArray(schema)) return schema.map(cleanSchemaForGemini);
const obj = schema as Record<string, unknown>; const obj = schema as Record<string, unknown>;
const hasAnyOf = "anyOf" in obj && Array.isArray(obj.anyOf); const hasAnyOf = "anyOf" in obj && Array.isArray(obj.anyOf);
// Try to flatten anyOf of literals to a single enum BEFORE processing
// This handles Type.Union([Type.Literal("a"), Type.Literal("b")]) patterns
if (hasAnyOf) {
const flattened = tryFlattenLiteralAnyOf(obj.anyOf as unknown[]);
if (flattened) {
// Return flattened enum, preserving metadata (description, title, default, examples)
const result: Record<string, unknown> = {
type: flattened.type,
enum: flattened.enum,
};
for (const key of ["description", "title", "default", "examples"]) {
if (key in obj && obj[key] !== undefined) {
result[key] = obj[key];
}
}
return result;
}
}
const cleaned: Record<string, unknown> = {}; const cleaned: Record<string, unknown> = {};
for (const [key, value] of Object.entries(obj)) { for (const [key, value] of Object.entries(obj)) {
@@ -409,8 +470,13 @@ function createWhatsAppLoginTool(): AnyAgentTool {
name: "whatsapp_login", name: "whatsapp_login",
description: description:
"Generate a WhatsApp QR code for linking, or wait for the scan to complete.", "Generate a WhatsApp QR code for linking, or wait for the scan to complete.",
// NOTE: Using Type.Unsafe for action enum instead of Type.Union([Type.Literal(...)])
// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema.
parameters: Type.Object({ parameters: Type.Object({
action: Type.Union([Type.Literal("start"), Type.Literal("wait")]), action: Type.Unsafe<"start" | "wait">({
type: "string",
enum: ["start", "wait"],
}),
timeoutMs: Type.Optional(Type.Number()), timeoutMs: Type.Optional(Type.Number()),
force: Type.Optional(Type.Boolean()), force: Type.Optional(Type.Boolean()),
}), }),

View File

@@ -28,74 +28,55 @@ import {
readStringParam, readStringParam,
} from "./common.js"; } from "./common.js";
const BrowserActSchema = Type.Union([ // NOTE: Using a flattened object schema instead of Type.Union([Type.Object(...), ...])
Type.Object({ // because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema.
kind: Type.Literal("click"), // The discriminator (kind) determines which properties are relevant; runtime validates.
ref: Type.String(), const BrowserActSchema = Type.Object({
targetId: Type.Optional(Type.String()), kind: Type.Unsafe<string>({
doubleClick: Type.Optional(Type.Boolean()), type: "string",
button: Type.Optional(Type.String()), enum: [
modifiers: Type.Optional(Type.Array(Type.String())), "click",
"type",
"press",
"hover",
"drag",
"select",
"fill",
"resize",
"wait",
"evaluate",
"close",
],
}), }),
Type.Object({ // Common fields
kind: Type.Literal("type"), targetId: Type.Optional(Type.String()),
ref: Type.String(), ref: Type.Optional(Type.String()),
text: Type.String(), // click
targetId: Type.Optional(Type.String()), doubleClick: Type.Optional(Type.Boolean()),
submit: Type.Optional(Type.Boolean()), button: Type.Optional(Type.String()),
slowly: Type.Optional(Type.Boolean()), modifiers: Type.Optional(Type.Array(Type.String())),
}), // type
Type.Object({ text: Type.Optional(Type.String()),
kind: Type.Literal("press"), submit: Type.Optional(Type.Boolean()),
key: Type.String(), slowly: Type.Optional(Type.Boolean()),
targetId: Type.Optional(Type.String()), // press
}), key: Type.Optional(Type.String()),
Type.Object({ // drag
kind: Type.Literal("hover"), startRef: Type.Optional(Type.String()),
ref: Type.String(), endRef: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()), // select
}), values: Type.Optional(Type.Array(Type.String())),
Type.Object({ // fill - use permissive array of objects
kind: Type.Literal("drag"), fields: Type.Optional(Type.Array(Type.Object({}, { additionalProperties: true }))),
startRef: Type.String(), // resize
endRef: Type.String(), width: Type.Optional(Type.Number()),
targetId: Type.Optional(Type.String()), height: Type.Optional(Type.Number()),
}), // wait
Type.Object({ timeMs: Type.Optional(Type.Number()),
kind: Type.Literal("select"), textGone: Type.Optional(Type.String()),
ref: Type.String(), // evaluate
values: Type.Array(Type.String()), fn: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()), });
}),
Type.Object({
kind: Type.Literal("fill"),
fields: Type.Array(Type.Record(Type.String(), Type.Unknown())),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("resize"),
width: Type.Number(),
height: Type.Number(),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("wait"),
timeMs: Type.Optional(Type.Number()),
text: Type.Optional(Type.String()),
textGone: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("evaluate"),
fn: Type.String(),
ref: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("close"),
targetId: Type.Optional(Type.String()),
}),
]);
// IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`. // IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`.
// A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`), // A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`),

View File

@@ -2,11 +2,14 @@ import { Type } from "@sinclair/typebox";
import { createReactionSchema } from "./reaction-schema.js"; import { createReactionSchema } from "./reaction-schema.js";
// NOTE: chatId and messageId use Type.String() instead of Type.Union([Type.String(), Type.Number()])
// because nested anyOf schemas cause JSON Schema validation failures with Claude API on Vertex AI.
// Telegram IDs are coerced to strings at runtime in telegram-actions.ts.
export const TelegramToolSchema = Type.Union([ export const TelegramToolSchema = Type.Union([
createReactionSchema({ createReactionSchema({
ids: { ids: {
chatId: Type.Union([Type.String(), Type.Number()]), chatId: Type.String(),
messageId: Type.Union([Type.String(), Type.Number()]), messageId: Type.String(),
}, },
includeRemove: true, includeRemove: true,
}), }),