diff --git a/src/agents/bash-tools.ts b/src/agents/bash-tools.ts index c380710a3..6aedf1e13 100644 --- a/src/agents/bash-tools.ts +++ b/src/agents/bash-tools.ts @@ -39,17 +39,19 @@ const DEFAULT_PATH = process.env.PATH ?? "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"; -const stringEnum = ( - values: readonly string[], - options?: Parameters[1], +// NOTE: Using Type.Unsafe with enum instead of Type.Union([Type.Literal(...)]) +// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema. +// Type.Union of literals compiles to { anyOf: [{enum:["a"]}, {enum:["b"]}, ...] } +// which is valid but not accepted. A flat enum { type: "string", enum: [...] } works. +const stringEnum = ( + values: T, + options?: { description?: string }, ) => - Type.Union( - values.map((value) => Type.Literal(value)) as [ - ReturnType, - ...ReturnType[], - ], - options, - ); + Type.Unsafe({ + type: "string", + enum: values as unknown as string[], + ...options, + }); export type BashToolDefaults = { backgroundMs?: number; diff --git a/src/agents/pi-tools.ts b/src/agents/pi-tools.ts index 80de703fd..ffbd038e8 100644 --- a/src/agents/pi-tools.ts +++ b/src/agents/pi-tools.ts @@ -154,12 +154,73 @@ function mergePropertySchemas(existing: unknown, incoming: unknown): unknown { return existing; } +// Check if an anyOf array contains only literal values that can be flattened +// TypeBox Type.Literal generates { const: "value", type: "string" } +// Some schemas may use { enum: ["value"], type: "string" } +// Both patterns are flattened to { type: "string", enum: ["a", "b", ...] } +function tryFlattenLiteralAnyOf( + anyOf: unknown[], +): { type: string; enum: unknown[] } | null { + if (anyOf.length === 0) return null; + + const allValues: unknown[] = []; + let commonType: string | null = null; + + for (const variant of anyOf) { + if (!variant || typeof variant !== "object") return null; + const v = variant as Record; + + // Extract the literal value - either from const or single-element enum + let literalValue: unknown; + if ("const" in v) { + literalValue = v.const; + } else if (Array.isArray(v.enum) && v.enum.length === 1) { + literalValue = v.enum[0]; + } else { + return null; // Not a literal pattern + } + + // Must have consistent type (usually "string") + const variantType = typeof v.type === "string" ? v.type : null; + if (!variantType) return null; + if (commonType === null) commonType = variantType; + else if (commonType !== variantType) return null; + + allValues.push(literalValue); + } + + if (commonType && allValues.length > 0) { + return { type: commonType, enum: allValues }; + } + return null; +} + function cleanSchemaForGemini(schema: unknown): unknown { if (!schema || typeof schema !== "object") return schema; if (Array.isArray(schema)) return schema.map(cleanSchemaForGemini); const obj = schema as Record; const hasAnyOf = "anyOf" in obj && Array.isArray(obj.anyOf); + + // Try to flatten anyOf of literals to a single enum BEFORE processing + // This handles Type.Union([Type.Literal("a"), Type.Literal("b")]) patterns + if (hasAnyOf) { + const flattened = tryFlattenLiteralAnyOf(obj.anyOf as unknown[]); + if (flattened) { + // Return flattened enum, preserving metadata (description, title, default, examples) + const result: Record = { + type: flattened.type, + enum: flattened.enum, + }; + for (const key of ["description", "title", "default", "examples"]) { + if (key in obj && obj[key] !== undefined) { + result[key] = obj[key]; + } + } + return result; + } + } + const cleaned: Record = {}; for (const [key, value] of Object.entries(obj)) { @@ -409,8 +470,13 @@ function createWhatsAppLoginTool(): AnyAgentTool { name: "whatsapp_login", description: "Generate a WhatsApp QR code for linking, or wait for the scan to complete.", + // NOTE: Using Type.Unsafe for action enum instead of Type.Union([Type.Literal(...)]) + // because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema. parameters: Type.Object({ - action: Type.Union([Type.Literal("start"), Type.Literal("wait")]), + action: Type.Unsafe<"start" | "wait">({ + type: "string", + enum: ["start", "wait"], + }), timeoutMs: Type.Optional(Type.Number()), force: Type.Optional(Type.Boolean()), }), diff --git a/src/agents/tools/browser-tool.ts b/src/agents/tools/browser-tool.ts index 8681e3abb..12adc177a 100644 --- a/src/agents/tools/browser-tool.ts +++ b/src/agents/tools/browser-tool.ts @@ -28,74 +28,55 @@ import { readStringParam, } from "./common.js"; -const BrowserActSchema = Type.Union([ - Type.Object({ - kind: Type.Literal("click"), - ref: Type.String(), - targetId: Type.Optional(Type.String()), - doubleClick: Type.Optional(Type.Boolean()), - button: Type.Optional(Type.String()), - modifiers: Type.Optional(Type.Array(Type.String())), +// NOTE: Using a flattened object schema instead of Type.Union([Type.Object(...), ...]) +// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema. +// The discriminator (kind) determines which properties are relevant; runtime validates. +const BrowserActSchema = Type.Object({ + kind: Type.Unsafe({ + type: "string", + enum: [ + "click", + "type", + "press", + "hover", + "drag", + "select", + "fill", + "resize", + "wait", + "evaluate", + "close", + ], }), - Type.Object({ - kind: Type.Literal("type"), - ref: Type.String(), - text: Type.String(), - targetId: Type.Optional(Type.String()), - submit: Type.Optional(Type.Boolean()), - slowly: Type.Optional(Type.Boolean()), - }), - Type.Object({ - kind: Type.Literal("press"), - key: Type.String(), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("hover"), - ref: Type.String(), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("drag"), - startRef: Type.String(), - endRef: Type.String(), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("select"), - ref: Type.String(), - values: Type.Array(Type.String()), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("fill"), - fields: Type.Array(Type.Record(Type.String(), Type.Unknown())), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("resize"), - width: Type.Number(), - height: Type.Number(), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("wait"), - timeMs: Type.Optional(Type.Number()), - text: Type.Optional(Type.String()), - textGone: Type.Optional(Type.String()), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("evaluate"), - fn: Type.String(), - ref: Type.Optional(Type.String()), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("close"), - targetId: Type.Optional(Type.String()), - }), -]); + // Common fields + targetId: Type.Optional(Type.String()), + ref: Type.Optional(Type.String()), + // click + doubleClick: Type.Optional(Type.Boolean()), + button: Type.Optional(Type.String()), + modifiers: Type.Optional(Type.Array(Type.String())), + // type + text: Type.Optional(Type.String()), + submit: Type.Optional(Type.Boolean()), + slowly: Type.Optional(Type.Boolean()), + // press + key: Type.Optional(Type.String()), + // drag + startRef: Type.Optional(Type.String()), + endRef: Type.Optional(Type.String()), + // select + values: Type.Optional(Type.Array(Type.String())), + // fill - use permissive array of objects + fields: Type.Optional(Type.Array(Type.Object({}, { additionalProperties: true }))), + // resize + width: Type.Optional(Type.Number()), + height: Type.Optional(Type.Number()), + // wait + timeMs: Type.Optional(Type.Number()), + textGone: Type.Optional(Type.String()), + // evaluate + fn: Type.Optional(Type.String()), +}); // IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`. // A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`), diff --git a/src/agents/tools/telegram-schema.ts b/src/agents/tools/telegram-schema.ts index b8d999817..a19bb4683 100644 --- a/src/agents/tools/telegram-schema.ts +++ b/src/agents/tools/telegram-schema.ts @@ -2,11 +2,14 @@ import { Type } from "@sinclair/typebox"; import { createReactionSchema } from "./reaction-schema.js"; +// NOTE: chatId and messageId use Type.String() instead of Type.Union([Type.String(), Type.Number()]) +// because nested anyOf schemas cause JSON Schema validation failures with Claude API on Vertex AI. +// Telegram IDs are coerced to strings at runtime in telegram-actions.ts. export const TelegramToolSchema = Type.Union([ createReactionSchema({ ids: { - chatId: Type.Union([Type.String(), Type.Number()]), - messageId: Type.Union([Type.String(), Type.Number()]), + chatId: Type.String(), + messageId: Type.String(), }, includeRemove: true, }),