fix(tools): flatten nested anyOf schemas for Vertex AI compatibility

Claude API on Vertex AI (Cloud Code Assist) rejects nested anyOf schemas
as invalid JSON Schema draft 2020-12. This change:

- Add tryFlattenLiteralAnyOf() to convert Type.Union([Type.Literal(...)])
  patterns from anyOf with const values to flat enum arrays
- Update stringEnum helper in bash-tools to use Type.Unsafe with flat enum
- Flatten BrowserActSchema from discriminated union to single object
- Simplify TelegramToolSchema to use Type.String() for IDs

Fixes 400 errors when sending messages through WhatsApp/Telegram providers.
This commit is contained in:
Kit
2026-01-07 16:54:13 +00:00
committed by Peter Steinberger
parent de55f4e111
commit a2b3f2c18a
4 changed files with 132 additions and 80 deletions

View File

@@ -39,17 +39,19 @@ const DEFAULT_PATH =
process.env.PATH ??
"/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
const stringEnum = (
values: readonly string[],
options?: Parameters<typeof Type.Union>[1],
// NOTE: Using Type.Unsafe with enum instead of Type.Union([Type.Literal(...)])
// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema.
// Type.Union of literals compiles to { anyOf: [{enum:["a"]}, {enum:["b"]}, ...] }
// which is valid but not accepted. A flat enum { type: "string", enum: [...] } works.
const stringEnum = <T extends readonly string[]>(
values: T,
options?: { description?: string },
) =>
Type.Union(
values.map((value) => Type.Literal(value)) as [
ReturnType<typeof Type.Literal>,
...ReturnType<typeof Type.Literal>[],
],
options,
);
Type.Unsafe<T[number]>({
type: "string",
enum: values as unknown as string[],
...options,
});
export type BashToolDefaults = {
backgroundMs?: number;

View File

@@ -154,12 +154,73 @@ function mergePropertySchemas(existing: unknown, incoming: unknown): unknown {
return existing;
}
// Check if an anyOf array contains only literal values that can be flattened
// TypeBox Type.Literal generates { const: "value", type: "string" }
// Some schemas may use { enum: ["value"], type: "string" }
// Both patterns are flattened to { type: "string", enum: ["a", "b", ...] }
function tryFlattenLiteralAnyOf(
anyOf: unknown[],
): { type: string; enum: unknown[] } | null {
if (anyOf.length === 0) return null;
const allValues: unknown[] = [];
let commonType: string | null = null;
for (const variant of anyOf) {
if (!variant || typeof variant !== "object") return null;
const v = variant as Record<string, unknown>;
// Extract the literal value - either from const or single-element enum
let literalValue: unknown;
if ("const" in v) {
literalValue = v.const;
} else if (Array.isArray(v.enum) && v.enum.length === 1) {
literalValue = v.enum[0];
} else {
return null; // Not a literal pattern
}
// Must have consistent type (usually "string")
const variantType = typeof v.type === "string" ? v.type : null;
if (!variantType) return null;
if (commonType === null) commonType = variantType;
else if (commonType !== variantType) return null;
allValues.push(literalValue);
}
if (commonType && allValues.length > 0) {
return { type: commonType, enum: allValues };
}
return null;
}
function cleanSchemaForGemini(schema: unknown): unknown {
if (!schema || typeof schema !== "object") return schema;
if (Array.isArray(schema)) return schema.map(cleanSchemaForGemini);
const obj = schema as Record<string, unknown>;
const hasAnyOf = "anyOf" in obj && Array.isArray(obj.anyOf);
// Try to flatten anyOf of literals to a single enum BEFORE processing
// This handles Type.Union([Type.Literal("a"), Type.Literal("b")]) patterns
if (hasAnyOf) {
const flattened = tryFlattenLiteralAnyOf(obj.anyOf as unknown[]);
if (flattened) {
// Return flattened enum, preserving metadata (description, title, default, examples)
const result: Record<string, unknown> = {
type: flattened.type,
enum: flattened.enum,
};
for (const key of ["description", "title", "default", "examples"]) {
if (key in obj && obj[key] !== undefined) {
result[key] = obj[key];
}
}
return result;
}
}
const cleaned: Record<string, unknown> = {};
for (const [key, value] of Object.entries(obj)) {
@@ -409,8 +470,13 @@ function createWhatsAppLoginTool(): AnyAgentTool {
name: "whatsapp_login",
description:
"Generate a WhatsApp QR code for linking, or wait for the scan to complete.",
// NOTE: Using Type.Unsafe for action enum instead of Type.Union([Type.Literal(...)])
// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema.
parameters: Type.Object({
action: Type.Union([Type.Literal("start"), Type.Literal("wait")]),
action: Type.Unsafe<"start" | "wait">({
type: "string",
enum: ["start", "wait"],
}),
timeoutMs: Type.Optional(Type.Number()),
force: Type.Optional(Type.Boolean()),
}),

View File

@@ -28,74 +28,55 @@ import {
readStringParam,
} from "./common.js";
const BrowserActSchema = Type.Union([
Type.Object({
kind: Type.Literal("click"),
ref: Type.String(),
targetId: Type.Optional(Type.String()),
doubleClick: Type.Optional(Type.Boolean()),
button: Type.Optional(Type.String()),
modifiers: Type.Optional(Type.Array(Type.String())),
// NOTE: Using a flattened object schema instead of Type.Union([Type.Object(...), ...])
// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema.
// The discriminator (kind) determines which properties are relevant; runtime validates.
const BrowserActSchema = Type.Object({
kind: Type.Unsafe<string>({
type: "string",
enum: [
"click",
"type",
"press",
"hover",
"drag",
"select",
"fill",
"resize",
"wait",
"evaluate",
"close",
],
}),
Type.Object({
kind: Type.Literal("type"),
ref: Type.String(),
text: Type.String(),
targetId: Type.Optional(Type.String()),
submit: Type.Optional(Type.Boolean()),
slowly: Type.Optional(Type.Boolean()),
}),
Type.Object({
kind: Type.Literal("press"),
key: Type.String(),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("hover"),
ref: Type.String(),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("drag"),
startRef: Type.String(),
endRef: Type.String(),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("select"),
ref: Type.String(),
values: Type.Array(Type.String()),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("fill"),
fields: Type.Array(Type.Record(Type.String(), Type.Unknown())),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("resize"),
width: Type.Number(),
height: Type.Number(),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("wait"),
timeMs: Type.Optional(Type.Number()),
text: Type.Optional(Type.String()),
textGone: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("evaluate"),
fn: Type.String(),
ref: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
}),
Type.Object({
kind: Type.Literal("close"),
targetId: Type.Optional(Type.String()),
}),
]);
// Common fields
targetId: Type.Optional(Type.String()),
ref: Type.Optional(Type.String()),
// click
doubleClick: Type.Optional(Type.Boolean()),
button: Type.Optional(Type.String()),
modifiers: Type.Optional(Type.Array(Type.String())),
// type
text: Type.Optional(Type.String()),
submit: Type.Optional(Type.Boolean()),
slowly: Type.Optional(Type.Boolean()),
// press
key: Type.Optional(Type.String()),
// drag
startRef: Type.Optional(Type.String()),
endRef: Type.Optional(Type.String()),
// select
values: Type.Optional(Type.Array(Type.String())),
// fill - use permissive array of objects
fields: Type.Optional(Type.Array(Type.Object({}, { additionalProperties: true }))),
// resize
width: Type.Optional(Type.Number()),
height: Type.Optional(Type.Number()),
// wait
timeMs: Type.Optional(Type.Number()),
textGone: Type.Optional(Type.String()),
// evaluate
fn: Type.Optional(Type.String()),
});
// IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`.
// A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`),

View File

@@ -2,11 +2,14 @@ import { Type } from "@sinclair/typebox";
import { createReactionSchema } from "./reaction-schema.js";
// NOTE: chatId and messageId use Type.String() instead of Type.Union([Type.String(), Type.Number()])
// because nested anyOf schemas cause JSON Schema validation failures with Claude API on Vertex AI.
// Telegram IDs are coerced to strings at runtime in telegram-actions.ts.
export const TelegramToolSchema = Type.Union([
createReactionSchema({
ids: {
chatId: Type.Union([Type.String(), Type.Number()]),
messageId: Type.Union([Type.String(), Type.Number()]),
chatId: Type.String(),
messageId: Type.String(),
},
includeRemove: true,
}),