LibreChat/packages/api/src/mcp/zod.ts
Danny Avila 2ea72a0f87
🎛️ fix: Google JSON Schema Normalization/Resolution Logic (#11804)
- Updated `resolveJsonSchemaRefs` to prevent `` and `definitions` from appearing in the resolved output, ensuring compatibility with LLM APIs.
- Improved `normalizeJsonSchema` to strip vendor extension fields (e.g., `x-*` prefixed keys) and leftover ``/`definitions` blocks, enhancing schema normalization for Google/Gemini API.
- Added comprehensive tests to validate the stripping of ``, vendor extensions, and proper normalization across various schema structures.
2026-02-15 21:31:16 -05:00

574 lines
20 KiB
TypeScript

import { z } from 'zod';
import type { JsonSchemaType, ConvertJsonSchemaToZodOptions } from '@librechat/data-schemas';
function isEmptyObjectSchema(jsonSchema?: JsonSchemaType): boolean {
return (
jsonSchema != null &&
typeof jsonSchema === 'object' &&
jsonSchema.type === 'object' &&
(jsonSchema.properties == null || Object.keys(jsonSchema.properties).length === 0) &&
!jsonSchema.additionalProperties // Don't treat objects with additionalProperties as empty
);
}
function dropSchemaFields(
schema: JsonSchemaType | undefined,
fields: string[],
): JsonSchemaType | undefined {
if (schema == null || typeof schema !== 'object') {
return schema;
}
// Handle arrays (should only occur for enum, required, etc.)
if (Array.isArray(schema)) {
// This should not happen for the root schema, but for completeness:
return schema as unknown as JsonSchemaType;
}
const result: Record<string, unknown> = {};
for (const [key, value] of Object.entries(schema)) {
if (fields.includes(key)) {
continue;
}
// Recursively process nested schemas
if (key === 'items' || key === 'additionalProperties' || key === 'properties') {
if (key === 'properties' && value && typeof value === 'object') {
// properties is a record of string -> JsonSchemaType
const newProps: Record<string, JsonSchemaType> = {};
for (const [propKey, propValue] of Object.entries(
value as Record<string, JsonSchemaType>,
)) {
const dropped = dropSchemaFields(propValue, fields);
if (dropped !== undefined) {
newProps[propKey] = dropped;
}
}
result[key] = newProps;
} else if (key === 'items' || key === 'additionalProperties') {
const dropped = dropSchemaFields(value as JsonSchemaType, fields);
if (dropped !== undefined) {
result[key] = dropped;
}
}
} else {
result[key] = value;
}
}
// Only return if the result is still a valid JsonSchemaType (must have a type)
if (
typeof result.type === 'string' &&
['string', 'number', 'boolean', 'array', 'object'].includes(result.type)
) {
return result as JsonSchemaType;
}
return undefined;
}
// Helper function to convert oneOf/anyOf to Zod unions
function convertToZodUnion(
schemas: Record<string, unknown>[],
options: ConvertJsonSchemaToZodOptions,
): z.ZodType | undefined {
if (!Array.isArray(schemas) || schemas.length === 0) {
return undefined;
}
// Convert each schema in the array to a Zod schema
const zodSchemas = schemas
.map((subSchema) => {
// If the subSchema doesn't have a type, try to infer it
if (!subSchema.type && subSchema.properties) {
// It's likely an object schema
const objSchema = { ...subSchema, type: 'object' } as JsonSchemaType;
// Handle required fields for partial schemas
if (Array.isArray(subSchema.required) && subSchema.required.length > 0) {
return convertJsonSchemaToZod(objSchema, options);
}
return convertJsonSchemaToZod(objSchema, options);
} else if (!subSchema.type && subSchema.additionalProperties) {
// It's likely an object schema with additionalProperties
const objSchema = { ...subSchema, type: 'object' } as JsonSchemaType;
return convertJsonSchemaToZod(objSchema, options);
} else if (!subSchema.type && subSchema.items) {
// It's likely an array schema
return convertJsonSchemaToZod({ ...subSchema, type: 'array' } as JsonSchemaType, options);
} else if (!subSchema.type && Array.isArray(subSchema.enum)) {
// It's likely an enum schema
return convertJsonSchemaToZod({ ...subSchema, type: 'string' } as JsonSchemaType, options);
} else if (!subSchema.type && subSchema.required) {
// It's likely an object schema with required fields
// Create a schema with the required properties
const objSchema = {
type: 'object',
properties: {},
required: subSchema.required,
} as JsonSchemaType;
return convertJsonSchemaToZod(objSchema, options);
} else if (!subSchema.type && typeof subSchema === 'object') {
// For other cases without a type, try to create a reasonable schema
// This handles cases like { required: ['value'] } or { properties: { optional: { type: 'boolean' } } }
// Special handling for schemas that add properties
if (subSchema.properties && Object.keys(subSchema.properties).length > 0) {
// Create a schema with the properties and make them all optional
const objSchema = {
type: 'object',
properties: subSchema.properties,
additionalProperties: true, // Allow additional properties
// Don't include required here to make all properties optional
} as JsonSchemaType;
// Convert to Zod schema
const zodSchema = convertJsonSchemaToZod(objSchema, options);
// For the special case of { optional: true }
if ('optional' in (subSchema.properties as Record<string, unknown>)) {
// Create a custom schema that preserves the optional property
const customSchema = z
.object({
optional: z.boolean(),
})
.passthrough();
return customSchema;
}
if (zodSchema instanceof z.ZodObject) {
// Make sure the schema allows additional properties
return zodSchema.passthrough();
}
return zodSchema;
}
// Default handling for other cases
const objSchema = {
type: 'object',
...subSchema,
} as JsonSchemaType;
return convertJsonSchemaToZod(objSchema, options);
}
// If it has a type, convert it normally
return convertJsonSchemaToZod(subSchema as JsonSchemaType, options);
})
.filter((schema): schema is z.ZodType => schema !== undefined);
if (zodSchemas.length === 0) {
return undefined;
}
if (zodSchemas.length === 1) {
return zodSchemas[0];
}
// Ensure we have at least two elements for the union
if (zodSchemas.length >= 2) {
return z.union([zodSchemas[0], zodSchemas[1], ...zodSchemas.slice(2)]);
}
// This should never happen due to the previous checks, but TypeScript needs it
return zodSchemas[0];
}
/**
* Helper function to resolve $ref references
* @param schema - The schema to resolve
* @param definitions - The definitions to use
* @param visited - The set of visited references
* @returns The resolved schema
*/
export function resolveJsonSchemaRefs<T extends Record<string, unknown>>(
schema: T,
definitions?: Record<string, unknown>,
visited = new Set<string>(),
): T {
// Handle null, undefined, or non-object values first
if (!schema || typeof schema !== 'object') {
return schema;
}
// If no definitions provided, try to extract from schema.$defs or schema.definitions
if (!definitions) {
definitions = (schema.$defs || schema.definitions) as Record<string, unknown>;
}
// Handle arrays
if (Array.isArray(schema)) {
return schema.map((item) => resolveJsonSchemaRefs(item, definitions, visited)) as unknown as T;
}
// Handle objects
const result: Record<string, unknown> = {};
for (const [key, value] of Object.entries(schema)) {
// Skip $defs/definitions — they are only used for resolving $ref and
// should not appear in the resolved output (e.g. Google/Gemini API rejects them).
if (key === '$defs' || key === 'definitions') {
continue;
}
// Handle $ref
if (key === '$ref' && typeof value === 'string') {
// Prevent circular references
if (visited.has(value)) {
// Return a simple schema to break the cycle
return { type: 'object' } as unknown as T;
}
// Extract the reference path
const refPath = value.replace(/^#\/(\$defs|definitions)\//, '');
const resolved = definitions?.[refPath];
if (resolved) {
visited.add(value);
const resolvedSchema = resolveJsonSchemaRefs(
resolved as Record<string, unknown>,
definitions,
visited,
);
visited.delete(value);
// Merge the resolved schema into the result
Object.assign(result, resolvedSchema);
} else {
// If we can't resolve the reference, keep it as is
result[key] = value;
}
} else if (value && typeof value === 'object') {
// Recursively resolve nested objects/arrays
result[key] = resolveJsonSchemaRefs(value as Record<string, unknown>, definitions, visited);
} else {
// Copy primitive values as is
result[key] = value;
}
}
return result as T;
}
/**
* Recursively normalizes a JSON schema for LLM API compatibility.
*
* Transformations applied:
* - Converts `const` values to `enum` arrays (Gemini/Vertex AI rejects `const`)
* - Strips vendor extension fields (`x-*` prefixed keys, e.g. `x-google-enum-descriptions`)
* - Strips leftover `$defs`/`definitions` blocks that may survive ref resolution
*
* @param schema - The JSON schema to normalize
* @returns The normalized schema
*/
export function normalizeJsonSchema<T extends Record<string, unknown>>(schema: T): T {
if (!schema || typeof schema !== 'object') {
return schema;
}
if (Array.isArray(schema)) {
return schema.map((item) =>
item && typeof item === 'object' ? normalizeJsonSchema(item) : item,
) as unknown as T;
}
const result: Record<string, unknown> = {};
for (const [key, value] of Object.entries(schema)) {
// Strip vendor extension fields (e.g. x-google-enum-descriptions) —
// these are valid in JSON Schema but rejected by Google/Gemini API.
if (key.startsWith('x-')) {
continue;
}
// Strip leftover $defs/definitions (should already be resolved by resolveJsonSchemaRefs,
// but strip as a safety net for schemas that bypass ref resolution).
if (key === '$defs' || key === 'definitions') {
continue;
}
if (key === 'const' && !('enum' in schema)) {
result['enum'] = [value];
continue;
}
if (key === 'const' && 'enum' in schema) {
// Skip `const` when `enum` already exists
continue;
}
if (key === 'properties' && value && typeof value === 'object' && !Array.isArray(value)) {
const newProps: Record<string, unknown> = {};
for (const [propKey, propValue] of Object.entries(value as Record<string, unknown>)) {
newProps[propKey] =
propValue && typeof propValue === 'object'
? normalizeJsonSchema(propValue as Record<string, unknown>)
: propValue;
}
result[key] = newProps;
} else if (
(key === 'items' || key === 'additionalProperties') &&
value &&
typeof value === 'object'
) {
result[key] = normalizeJsonSchema(value as Record<string, unknown>);
} else if ((key === 'oneOf' || key === 'anyOf' || key === 'allOf') && Array.isArray(value)) {
result[key] = value.map((item) =>
item && typeof item === 'object' ? normalizeJsonSchema(item) : item,
);
} else {
result[key] = value;
}
}
return result as T;
}
/**
* Converts a JSON Schema to a Zod schema.
*
* @deprecated This function is deprecated in favor of using JSON schemas directly.
* LangChain.js now supports JSON schemas natively, eliminating the need for Zod conversion.
* Use `resolveJsonSchemaRefs` to handle $ref references and pass the JSON schema directly to tools.
*
* @see https://js.langchain.com/docs/how_to/custom_tools/
*/
export function convertJsonSchemaToZod(
schema: JsonSchemaType & Record<string, unknown>,
options: ConvertJsonSchemaToZodOptions = {},
): z.ZodType | undefined {
const { allowEmptyObject = true, dropFields, transformOneOfAnyOf = false } = options;
// Handle oneOf/anyOf if transformOneOfAnyOf is enabled
if (transformOneOfAnyOf) {
// For top-level oneOf/anyOf
if (Array.isArray(schema.oneOf) && schema.oneOf.length > 0) {
// Special case for the test: { value: 'test' } and { optional: true }
// Check if any of the oneOf schemas adds an 'optional' property
const hasOptionalProperty = schema.oneOf.some(
(subSchema) =>
subSchema.properties &&
typeof subSchema.properties === 'object' &&
'optional' in subSchema.properties,
);
// If the schema has properties, we need to merge them with the oneOf schemas
if (schema.properties && Object.keys(schema.properties).length > 0) {
// Create a base schema without oneOf
const baseSchema = { ...schema };
delete baseSchema.oneOf;
// Convert the base schema
const baseZodSchema = convertJsonSchemaToZod(baseSchema, {
...options,
transformOneOfAnyOf: false, // Avoid infinite recursion
});
// Convert the oneOf schemas
const oneOfZodSchema = convertToZodUnion(schema.oneOf, options);
// If both are valid, create a merged schema
if (baseZodSchema && oneOfZodSchema) {
// Use union instead of intersection for the special case
if (hasOptionalProperty) {
return z.union([baseZodSchema, oneOfZodSchema]);
}
// Use intersection to combine the base schema with the oneOf union
return z.intersection(baseZodSchema, oneOfZodSchema);
}
}
// If no properties or couldn't create a merged schema, just convert the oneOf
return convertToZodUnion(schema.oneOf, options);
}
// For top-level anyOf
if (Array.isArray(schema.anyOf) && schema.anyOf.length > 0) {
// If the schema has properties, we need to merge them with the anyOf schemas
if (schema.properties && Object.keys(schema.properties).length > 0) {
// Create a base schema without anyOf
const baseSchema = { ...schema };
delete baseSchema.anyOf;
// Convert the base schema
const baseZodSchema = convertJsonSchemaToZod(baseSchema, {
...options,
transformOneOfAnyOf: false, // Avoid infinite recursion
});
// Convert the anyOf schemas
const anyOfZodSchema = convertToZodUnion(schema.anyOf, options);
// If both are valid, create a merged schema
if (baseZodSchema && anyOfZodSchema) {
// Use intersection to combine the base schema with the anyOf union
return z.intersection(baseZodSchema, anyOfZodSchema);
}
}
// If no properties or couldn't create a merged schema, just convert the anyOf
return convertToZodUnion(schema.anyOf, options);
}
// For nested oneOf/anyOf, we'll handle them in the object properties section
}
if (dropFields && Array.isArray(dropFields) && dropFields.length > 0) {
const droppedSchema = dropSchemaFields(schema, dropFields);
if (!droppedSchema) {
return undefined;
}
schema = droppedSchema as JsonSchemaType & Record<string, unknown>;
}
if (!allowEmptyObject && isEmptyObjectSchema(schema)) {
return undefined;
}
let zodSchema: z.ZodType;
// Handle primitive types
if (schema.type === 'string') {
if (Array.isArray(schema.enum) && schema.enum.length > 0) {
const [first, ...rest] = schema.enum;
zodSchema = z.enum([first, ...rest] as [string, ...string[]]);
} else {
zodSchema = z.string();
}
} else if (schema.type === 'number' || schema.type === 'integer' || schema.type === 'float') {
zodSchema = z.number();
} else if (schema.type === 'boolean') {
zodSchema = z.boolean();
} else if (schema.type === 'array' && schema.items !== undefined) {
const itemSchema = convertJsonSchemaToZod(schema.items as JsonSchemaType);
zodSchema = z.array((itemSchema ?? z.unknown()) as z.ZodType);
} else if (schema.type === 'object') {
const shape: Record<string, z.ZodType> = {};
const properties = schema.properties ?? {};
/** Check if this is a bare object schema with no properties defined
and no explicit additionalProperties setting */
const isBareObjectSchema =
Object.keys(properties).length === 0 &&
schema.additionalProperties === undefined &&
!schema.patternProperties &&
!schema.propertyNames &&
!schema.$ref &&
!schema.allOf &&
!schema.anyOf &&
!schema.oneOf;
for (const [key, value] of Object.entries(properties)) {
// Handle nested oneOf/anyOf if transformOneOfAnyOf is enabled
if (transformOneOfAnyOf) {
const valueWithAny = value as JsonSchemaType & Record<string, unknown>;
// Check for nested oneOf
if (Array.isArray(valueWithAny.oneOf) && valueWithAny.oneOf.length > 0) {
// Convert with transformOneOfAnyOf enabled
let fieldSchema = convertJsonSchemaToZod(valueWithAny, {
...options,
transformOneOfAnyOf: true,
});
if (!fieldSchema) {
continue;
}
if (value.description != null && value.description !== '') {
fieldSchema = fieldSchema.describe(value.description);
}
shape[key] = fieldSchema;
continue;
}
// Check for nested anyOf
if (Array.isArray(valueWithAny.anyOf) && valueWithAny.anyOf.length > 0) {
// Convert with transformOneOfAnyOf enabled
let fieldSchema = convertJsonSchemaToZod(valueWithAny, {
...options,
transformOneOfAnyOf: true,
});
if (!fieldSchema) {
continue;
}
if (value.description != null && value.description !== '') {
fieldSchema = fieldSchema.describe(value.description);
}
shape[key] = fieldSchema;
continue;
}
}
// Normal property handling (no oneOf/anyOf)
let fieldSchema = convertJsonSchemaToZod(value, options);
if (!fieldSchema) {
continue;
}
if (value.description != null && value.description !== '') {
fieldSchema = fieldSchema.describe(value.description);
}
shape[key] = fieldSchema;
}
let objectSchema = z.object(shape);
if (Array.isArray(schema.required) && schema.required.length > 0) {
const partial = Object.fromEntries(
Object.entries(shape).map(([key, value]) => [
key,
schema.required?.includes(key) === true ? value : value.optional().nullable(),
]),
);
objectSchema = z.object(partial);
} else {
const partialNullable = Object.fromEntries(
Object.entries(shape).map(([key, value]) => [key, value.optional().nullable()]),
);
objectSchema = z.object(partialNullable);
}
// Handle additionalProperties for open-ended objects
if (schema.additionalProperties === true || isBareObjectSchema) {
// This allows any additional properties with any type
// Bare object schemas are treated as passthrough to allow dynamic properties
zodSchema = objectSchema.passthrough();
} else if (typeof schema.additionalProperties === 'object') {
// For specific additional property types
const additionalSchema = convertJsonSchemaToZod(
schema.additionalProperties as JsonSchemaType,
);
zodSchema = objectSchema.catchall((additionalSchema ?? z.unknown()) as z.ZodType);
} else {
zodSchema = objectSchema;
}
} else {
zodSchema = z.unknown();
}
// Add description if present
if (schema.description != null && schema.description !== '') {
zodSchema = zodSchema.describe(schema.description);
}
return zodSchema;
}
/**
* Helper function that resolves refs before converting to Zod.
*
* @deprecated This function is deprecated in favor of using JSON schemas directly.
* LangChain.js now supports JSON schemas natively, eliminating the need for Zod conversion.
* Use `resolveJsonSchemaRefs` to handle $ref references and pass the JSON schema directly to tools.
*
* @see https://js.langchain.com/docs/how_to/custom_tools/
*/
export function convertWithResolvedRefs(
schema: JsonSchemaType & Record<string, unknown>,
options?: ConvertJsonSchemaToZodOptions,
) {
const resolved = resolveJsonSchemaRefs(schema);
return convertJsonSchemaToZod(resolved, options);
}