LibreChat/packages/data-provider/src/file-config.ts
Danny Avila 046e92217f
🧩 feat: OpenDocument Format File Upload and Native ODS Parsing (#11959)
*  feat: Add support for OpenDocument MIME types in file configuration

Updated the applicationMimeTypes regex to include support for OASIS OpenDocument formats, enhancing the file type recognition capabilities of the data provider.

* feat: document processing with OpenDocument support

Added support for OpenDocument Spreadsheet (ODS) MIME type in the file processing service and updated the document parser to handle ODS files. Included tests to verify correct parsing of ODS documents and updated file configuration to recognize OpenDocument formats.

* refactor: Enhance document processing to support additional Excel MIME types

Updated the document processing logic to utilize a regex for matching Excel MIME types, improving flexibility in handling various Excel file formats. Added tests to ensure correct parsing of new MIME types, including multiple Excel variants and OpenDocument formats. Adjusted file configuration to include these MIME types for better recognition in the file processing service.

* feat: Add support for additional OpenDocument MIME types in file processing

Enhanced the document processing service to support ODT, ODP, and ODG MIME types. Updated tests to verify correct routing through the OCR strategy for these new formats. Adjusted documentation to reflect changes in handled MIME types for improved clarity.
2026-02-26 14:39:49 -05:00

756 lines
25 KiB
TypeScript

import { z } from 'zod';
import type { EndpointFileConfig, FileConfig } from './types/files';
import { EModelEndpoint, isAgentsEndpoint, isDocumentSupportedProvider } from './schemas';
import { normalizeEndpointName } from './utils';
export const supportsFiles = {
[EModelEndpoint.openAI]: true,
[EModelEndpoint.google]: true,
[EModelEndpoint.assistants]: true,
[EModelEndpoint.azureAssistants]: true,
[EModelEndpoint.agents]: true,
[EModelEndpoint.azureOpenAI]: true,
[EModelEndpoint.anthropic]: true,
[EModelEndpoint.custom]: true,
[EModelEndpoint.bedrock]: true,
};
export const excelFileTypes = [
'application/vnd.ms-excel',
'application/msexcel',
'application/x-msexcel',
'application/x-ms-excel',
'application/x-excel',
'application/x-dos_ms_excel',
'application/xls',
'application/x-xls',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
];
export const fullMimeTypesList = [
'text/x-c',
'text/x-c++',
'application/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'text/html',
'text/x-java',
'application/json',
'text/markdown',
'application/pdf',
'text/x-php',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'text/x-python',
'text/x-script.python',
'text/x-ruby',
'text/x-tex',
'text/plain',
'text/css',
'text/vtt',
'image/jpeg',
'text/javascript',
'image/gif',
'image/png',
'image/heic',
'image/heif',
'application/x-tar',
'application/x-sh',
'application/typescript',
'application/sql',
'application/yaml',
'application/vnd.coffeescript',
'application/xml',
'application/zip',
'application/x-parquet',
'application/vnd.oasis.opendocument.text',
'application/vnd.oasis.opendocument.spreadsheet',
'application/vnd.oasis.opendocument.presentation',
'application/vnd.oasis.opendocument.graphics',
'image/svg',
'image/svg+xml',
// Video formats
'video/mp4',
'video/avi',
'video/mov',
'video/wmv',
'video/flv',
'video/webm',
'video/mkv',
'video/m4v',
'video/3gp',
'video/ogv',
// Audio formats
'audio/mp3',
'audio/wav',
'audio/ogg',
'audio/m4a',
'audio/aac',
'audio/flac',
'audio/wma',
'audio/opus',
'audio/mpeg',
...excelFileTypes,
];
export const codeInterpreterMimeTypesList = [
'text/x-c',
'text/x-c++',
'application/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'text/html',
'text/x-java',
'application/json',
'text/markdown',
'application/pdf',
'text/x-php',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'text/x-python',
'text/x-script.python',
'text/x-ruby',
'text/x-tex',
'text/plain',
'text/css',
'image/jpeg',
'text/javascript',
'image/gif',
'image/png',
'image/heic',
'image/heif',
'application/x-tar',
'application/typescript',
'application/xml',
'application/zip',
'application/x-parquet',
...excelFileTypes,
];
export const retrievalMimeTypesList = [
'text/x-c',
'text/x-c++',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'text/html',
'text/x-java',
'application/json',
'text/markdown',
'application/pdf',
'text/x-php',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'text/x-python',
'text/x-script.python',
'text/x-ruby',
'text/x-tex',
'text/plain',
];
export const imageExtRegex = /\.(jpg|jpeg|png|gif|webp|heic|heif)$/i;
/** @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_DocumentBlock.html */
export type BedrockDocumentFormat =
| 'pdf'
| 'csv'
| 'doc'
| 'docx'
| 'xls'
| 'xlsx'
| 'html'
| 'txt'
| 'md';
/** Maps MIME types to Bedrock Converse API document format values */
export const bedrockDocumentFormats: Record<string, BedrockDocumentFormat> = {
'application/pdf': 'pdf',
'text/csv': 'csv',
'application/csv': 'csv',
'application/msword': 'doc',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
'application/vnd.ms-excel': 'xls',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
'text/html': 'html',
'text/plain': 'txt',
'text/markdown': 'md',
};
export const isBedrockDocumentType = (mimeType?: string): boolean =>
mimeType != null && mimeType in bedrockDocumentFormats;
/** File extensions accepted by Bedrock document uploads (for input accept attributes) */
export const bedrockDocumentExtensions =
'.pdf,.csv,.doc,.docx,.xls,.xlsx,.html,.htm,.txt,.md,application/pdf,text/csv,application/csv,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,text/html,text/plain,text/markdown';
export const excelMimeTypes =
/^application\/(vnd\.ms-excel|msexcel|x-msexcel|x-ms-excel|x-excel|x-dos_ms_excel|xls|x-xls|vnd\.openxmlformats-officedocument\.spreadsheetml\.sheet)$/;
export const textMimeTypes =
/^(text\/(x-c|x-csharp|tab-separated-values|x-c\+\+|x-h|x-java|html|markdown|x-php|x-python|x-script\.python|x-ruby|x-tex|plain|css|vtt|javascript|csv|xml))$/;
export const applicationMimeTypes =
/^(application\/(epub\+zip|csv|json|msword|pdf|x-tar|x-sh|typescript|sql|yaml|x-parquet|vnd\.apache\.parquet|vnd\.coffeescript|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation|spreadsheetml\.sheet)|vnd\.oasis\.opendocument\.(text|spreadsheet|presentation|graphics)|xml|zip))$/;
export const imageMimeTypes = /^image\/(jpeg|gif|png|webp|heic|heif)$/;
export const audioMimeTypes =
/^audio\/(mp3|mpeg|mpeg3|wav|wave|x-wav|ogg|vorbis|mp4|m4a|x-m4a|flac|x-flac|webm|aac|wma|opus)$/;
export const videoMimeTypes = /^video\/(mp4|avi|mov|wmv|flv|webm|mkv|m4v|3gp|ogv)$/;
export const defaultOCRMimeTypes = [
imageMimeTypes,
excelMimeTypes,
/^application\/pdf$/,
/^application\/vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation)$/,
/^application\/vnd\.ms-(word|powerpoint)$/,
/^application\/epub\+zip$/,
/^application\/vnd\.oasis\.opendocument\.(text|spreadsheet|presentation|graphics)$/,
];
/** MIME types handled by the built-in document parser (pdf, docx, excel variants, ods) */
export const documentParserMimeTypes = [
excelMimeTypes,
/^application\/pdf$/,
/^application\/vnd\.openxmlformats-officedocument\.wordprocessingml\.document$/,
/^application\/vnd\.oasis\.opendocument\.spreadsheet$/,
];
export const defaultTextMimeTypes = [/^[\w.-]+\/[\w.-]+$/];
export const defaultSTTMimeTypes = [audioMimeTypes];
export const supportedMimeTypes = [
textMimeTypes,
excelMimeTypes,
applicationMimeTypes,
imageMimeTypes,
videoMimeTypes,
audioMimeTypes,
/** Supported by LC Code Interpreter API */
/^image\/(svg|svg\+xml)$/,
];
export const codeInterpreterMimeTypes = [
textMimeTypes,
excelMimeTypes,
applicationMimeTypes,
imageMimeTypes,
];
export const codeTypeMapping: { [key: string]: string } = {
c: 'text/x-c', // .c - C source
cs: 'text/x-csharp', // .cs - C# source
cpp: 'text/x-c++', // .cpp - C++ source
h: 'text/x-h', // .h - C/C++ header
md: 'text/markdown', // .md - Markdown
php: 'text/x-php', // .php - PHP source
py: 'text/x-python', // .py - Python source
rb: 'text/x-ruby', // .rb - Ruby source
tex: 'text/x-tex', // .tex - LaTeX source
js: 'text/javascript', // .js - JavaScript source
sh: 'application/x-sh', // .sh - Shell script
ts: 'application/typescript', // .ts - TypeScript source
tar: 'application/x-tar', // .tar - Tar archive
zip: 'application/zip', // .zip - ZIP archive
txt: 'text/plain', // .txt - Plain text file
log: 'text/plain', // .log - Log file
csv: 'text/csv', // .csv - Comma-separated values
tsv: 'text/tab-separated-values', // .tsv - Tab-separated values
parquet: 'application/x-parquet', // .parquet - Apache Parquet columnar storage
json: 'application/json', // .json - JSON file
xml: 'application/xml', // .xml - XML file
html: 'text/html', // .html - HTML file
htm: 'text/html', // .htm - HTML file
css: 'text/css', // .css - CSS file
yml: 'application/yaml', // .yml - YAML
yaml: 'application/yaml', // .yaml - YAML
sql: 'application/sql', // .sql - SQL (IANA registered)
dart: 'text/plain', // .dart - Dart source
coffee: 'application/vnd.coffeescript', // .coffee - CoffeeScript (IANA registered)
go: 'text/plain', // .go - Go source
rs: 'text/plain', // .rs - Rust source
swift: 'text/plain', // .swift - Swift source
kt: 'text/plain', // .kt - Kotlin source
kts: 'text/plain', // .kts - Kotlin script
scala: 'text/plain', // .scala - Scala source
lua: 'text/plain', // .lua - Lua source
r: 'text/plain', // .r - R source
pl: 'text/plain', // .pl - Perl source
pm: 'text/plain', // .pm - Perl module
groovy: 'text/plain', // .groovy - Groovy source
gradle: 'text/plain', // .gradle - Gradle build script
clj: 'text/plain', // .clj - Clojure source
cljs: 'text/plain', // .cljs - ClojureScript source
cljc: 'text/plain', // .cljc - Clojure common source
elm: 'text/plain', // .elm - Elm source
erl: 'text/plain', // .erl - Erlang source
hrl: 'text/plain', // .hrl - Erlang header
ex: 'text/plain', // .ex - Elixir source
exs: 'text/plain', // .exs - Elixir script
hs: 'text/plain', // .hs - Haskell source
lhs: 'text/plain', // .lhs - Literate Haskell source
ml: 'text/plain', // .ml - OCaml source
mli: 'text/plain', // .mli - OCaml interface
fs: 'text/plain', // .fs - F# source
fsx: 'text/plain', // .fsx - F# script
lisp: 'text/plain', // .lisp - Lisp source
cl: 'text/plain', // .cl - Common Lisp source
scm: 'text/plain', // .scm - Scheme source
rkt: 'text/plain', // .rkt - Racket source
jsx: 'text/plain', // .jsx - React JSX
tsx: 'text/plain', // .tsx - React TSX
vue: 'text/plain', // .vue - Vue component
svelte: 'text/plain', // .svelte - Svelte component
astro: 'text/plain', // .astro - Astro component
scss: 'text/plain', // .scss - SCSS source
sass: 'text/plain', // .sass - Sass source
less: 'text/plain', // .less - Less source
styl: 'text/plain', // .styl - Stylus source
toml: 'text/plain', // .toml - TOML config
ini: 'text/plain', // .ini - INI config
cfg: 'text/plain', // .cfg - Config file
conf: 'text/plain', // .conf - Config file
env: 'text/plain', // .env - Environment file
properties: 'text/plain', // .properties - Java properties
graphql: 'text/plain', // .graphql - GraphQL schema/query
gql: 'text/plain', // .gql - GraphQL schema/query
proto: 'text/plain', // .proto - Protocol Buffers
dockerfile: 'text/plain', // Dockerfile
makefile: 'text/plain', // Makefile
cmake: 'text/plain', // .cmake - CMake script
rake: 'text/plain', // .rake - Rake task
gemspec: 'text/plain', // .gemspec - Ruby gem spec
bash: 'text/plain', // .bash - Bash script
zsh: 'text/plain', // .zsh - Zsh script
fish: 'text/plain', // .fish - Fish script
ps1: 'text/plain', // .ps1 - PowerShell script
psm1: 'text/plain', // .psm1 - PowerShell module
bat: 'text/plain', // .bat - Batch script
cmd: 'text/plain', // .cmd - Windows command script
asm: 'text/plain', // .asm - Assembly source
s: 'text/plain', // .s - Assembly source
v: 'text/plain', // .v - V or Verilog source
zig: 'text/plain', // .zig - Zig source
nim: 'text/plain', // .nim - Nim source
cr: 'text/plain', // .cr - Crystal source
d: 'text/plain', // .d - D source
pas: 'text/plain', // .pas - Pascal source
pp: 'text/plain', // .pp - Pascal/Puppet source
f90: 'text/plain', // .f90 - Fortran 90 source
f95: 'text/plain', // .f95 - Fortran 95 source
f03: 'text/plain', // .f03 - Fortran 2003 source
jl: 'text/plain', // .jl - Julia source
m: 'text/plain', // .m - Objective-C/MATLAB source
mm: 'text/plain', // .mm - Objective-C++ source
ada: 'text/plain', // .ada - Ada source
adb: 'text/plain', // .adb - Ada body
ads: 'text/plain', // .ads - Ada spec
cob: 'text/plain', // .cob - COBOL source
cbl: 'text/plain', // .cbl - COBOL source
tcl: 'text/plain', // .tcl - Tcl source
awk: 'text/plain', // .awk - AWK script
sed: 'text/plain', // .sed - Sed script
odt: 'application/vnd.oasis.opendocument.text', // .odt - OpenDocument Text
ods: 'application/vnd.oasis.opendocument.spreadsheet', // .ods - OpenDocument Spreadsheet
odp: 'application/vnd.oasis.opendocument.presentation', // .odp - OpenDocument Presentation
odg: 'application/vnd.oasis.opendocument.graphics', // .odg - OpenDocument Graphics
};
/** Maps image extensions to MIME types for formats browsers may not recognize */
export const imageTypeMapping: { [key: string]: string } = {
heic: 'image/heic',
heif: 'image/heif',
};
/**
* Infers the MIME type from a file's extension when the browser doesn't recognize it
* @param fileName - The name of the file including extension
* @param currentType - The current MIME type reported by the browser (may be empty)
* @returns The inferred MIME type if browser didn't provide one, otherwise the original type
*/
export function inferMimeType(fileName: string, currentType: string): string {
if (currentType) {
return currentType;
}
const extension = fileName.split('.').pop()?.toLowerCase() ?? '';
return codeTypeMapping[extension] || imageTypeMapping[extension] || currentType;
}
export const retrievalMimeTypes = [
/^(text\/(x-c|x-c\+\+|x-h|html|x-java|markdown|x-php|x-python|x-script\.python|x-ruby|x-tex|plain|vtt|xml))$/,
/^(application\/(json|pdf|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation)))$/,
];
export const megabyte = 1024 * 1024;
/** Helper function to get megabytes value */
export const mbToBytes = (mb: number): number => mb * megabyte;
const defaultSizeLimit = mbToBytes(512);
const defaultTokenLimit = 100000;
const assistantsFileConfig = {
fileLimit: 10,
fileSizeLimit: defaultSizeLimit,
totalSizeLimit: defaultSizeLimit,
supportedMimeTypes,
disabled: false,
};
export const fileConfig = {
endpoints: {
[EModelEndpoint.assistants]: assistantsFileConfig,
[EModelEndpoint.azureAssistants]: assistantsFileConfig,
[EModelEndpoint.agents]: assistantsFileConfig,
[EModelEndpoint.anthropic]: {
fileLimit: 10,
fileSizeLimit: defaultSizeLimit,
totalSizeLimit: defaultSizeLimit,
supportedMimeTypes,
disabled: false,
},
default: {
fileLimit: 10,
fileSizeLimit: defaultSizeLimit,
totalSizeLimit: defaultSizeLimit,
supportedMimeTypes,
disabled: false,
},
},
serverFileSizeLimit: defaultSizeLimit,
avatarSizeLimit: mbToBytes(2),
fileTokenLimit: defaultTokenLimit,
clientImageResize: {
enabled: false,
maxWidth: 1900,
maxHeight: 1900,
quality: 0.92,
},
ocr: {
supportedMimeTypes: defaultOCRMimeTypes,
},
text: {
supportedMimeTypes: defaultTextMimeTypes,
},
stt: {
supportedMimeTypes: defaultSTTMimeTypes,
},
checkType: function (fileType: string, supportedTypes: RegExp[] = supportedMimeTypes) {
return supportedTypes.some((regex) => regex.test(fileType));
},
};
const supportedMimeTypesSchema = z
.array(z.any())
.optional()
.refine(
(mimeTypes) => {
if (!mimeTypes) {
return true;
}
return mimeTypes.every(
(mimeType) => mimeType instanceof RegExp || typeof mimeType === 'string',
);
},
{
message: 'Each mimeType must be a string or a RegExp object.',
},
);
export const endpointFileConfigSchema = z.object({
disabled: z.boolean().optional(),
fileLimit: z.number().min(0).optional(),
fileSizeLimit: z.number().min(0).optional(),
totalSizeLimit: z.number().min(0).optional(),
supportedMimeTypes: supportedMimeTypesSchema.optional(),
});
export const fileConfigSchema = z.object({
endpoints: z.record(endpointFileConfigSchema).optional(),
serverFileSizeLimit: z.number().min(0).optional(),
avatarSizeLimit: z.number().min(0).optional(),
fileTokenLimit: z.number().min(0).optional(),
imageGeneration: z
.object({
percentage: z.number().min(0).max(100).optional(),
px: z.number().min(0).optional(),
})
.optional(),
clientImageResize: z
.object({
enabled: z.boolean().optional(),
maxWidth: z.number().min(0).optional(),
maxHeight: z.number().min(0).optional(),
quality: z.number().min(0).max(1).optional(),
})
.optional(),
ocr: z
.object({
supportedMimeTypes: supportedMimeTypesSchema.optional(),
})
.optional(),
text: z
.object({
supportedMimeTypes: supportedMimeTypesSchema.optional(),
})
.optional(),
});
export type TFileConfig = z.infer<typeof fileConfigSchema>;
/** Helper function to safely convert string patterns to RegExp objects */
export const convertStringsToRegex = (patterns: string[]): RegExp[] =>
patterns.reduce((acc: RegExp[], pattern) => {
try {
const regex = new RegExp(pattern);
acc.push(regex);
} catch (error) {
console.error(`Invalid regex pattern "${pattern}" skipped.`, error);
}
return acc;
}, []);
/**
* Gets the appropriate endpoint file configuration with standardized lookup logic.
*
* @param params - Object containing fileConfig, endpoint, and optional conversationEndpoint
* @param params.fileConfig - The merged file configuration
* @param params.endpoint - The endpoint name to look up
* @param params.conversationEndpoint - Optional conversation endpoint for additional context
* @returns The endpoint file configuration or undefined
*/
/**
* Merges an endpoint config with the default config to ensure all fields are populated.
* For document-supported providers, uses the comprehensive MIME type list (includes videos/audio).
*/
function mergeWithDefault(
endpointConfig: EndpointFileConfig,
defaultConfig: EndpointFileConfig,
endpoint?: string | null,
): EndpointFileConfig {
/** Use comprehensive MIME types for document-supported providers */
const defaultMimeTypes = isDocumentSupportedProvider(endpoint)
? supportedMimeTypes
: defaultConfig.supportedMimeTypes;
return {
disabled: endpointConfig.disabled ?? defaultConfig.disabled,
fileLimit: endpointConfig.fileLimit ?? defaultConfig.fileLimit,
fileSizeLimit: endpointConfig.fileSizeLimit ?? defaultConfig.fileSizeLimit,
totalSizeLimit: endpointConfig.totalSizeLimit ?? defaultConfig.totalSizeLimit,
supportedMimeTypes: endpointConfig.supportedMimeTypes ?? defaultMimeTypes,
};
}
export function getEndpointFileConfig(params: {
fileConfig?: FileConfig | null;
endpoint?: string | null;
endpointType?: string | null;
}): EndpointFileConfig {
const { fileConfig: mergedFileConfig, endpoint, endpointType } = params;
if (!mergedFileConfig?.endpoints) {
return fileConfig.endpoints.default;
}
/** Compute an effective default by merging user-configured default over the base default */
const baseDefaultConfig = fileConfig.endpoints.default;
const userDefaultConfig = mergedFileConfig.endpoints.default;
const defaultConfig = userDefaultConfig
? mergeWithDefault(userDefaultConfig, baseDefaultConfig, 'default')
: baseDefaultConfig;
const normalizedEndpoint = normalizeEndpointName(endpoint ?? '');
const standardEndpoints = new Set([
'default',
EModelEndpoint.agents,
EModelEndpoint.assistants,
EModelEndpoint.azureAssistants,
EModelEndpoint.openAI,
EModelEndpoint.azureOpenAI,
EModelEndpoint.anthropic,
EModelEndpoint.google,
EModelEndpoint.bedrock,
]);
const normalizedEndpointType = normalizeEndpointName(endpointType ?? '');
const isCustomEndpoint =
endpointType === EModelEndpoint.custom ||
(!standardEndpoints.has(normalizedEndpointType) &&
normalizedEndpoint &&
!standardEndpoints.has(normalizedEndpoint));
if (isCustomEndpoint) {
/** 1. Check direct endpoint lookup (could be normalized or not) */
if (endpoint && mergedFileConfig.endpoints[endpoint]) {
return mergeWithDefault(mergedFileConfig.endpoints[endpoint], defaultConfig, endpoint);
}
/** 2. Check normalized endpoint lookup (skip standard endpoint keys) */
for (const key in mergedFileConfig.endpoints) {
if (!standardEndpoints.has(key) && normalizeEndpointName(key) === normalizedEndpoint) {
return mergeWithDefault(mergedFileConfig.endpoints[key], defaultConfig, key);
}
}
/** 3. Fallback to generic 'custom' config if any */
if (mergedFileConfig.endpoints[EModelEndpoint.custom]) {
return mergeWithDefault(
mergedFileConfig.endpoints[EModelEndpoint.custom],
defaultConfig,
endpoint,
);
}
/** 4. Fallback to 'agents' (all custom endpoints are non-assistants) */
if (mergedFileConfig.endpoints[EModelEndpoint.agents]) {
return mergeWithDefault(
mergedFileConfig.endpoints[EModelEndpoint.agents],
defaultConfig,
endpoint,
);
}
/** 5. Fallback to default */
return defaultConfig;
}
/** Check endpointType first (most reliable for standard endpoints) */
if (endpointType && mergedFileConfig.endpoints[endpointType]) {
return mergeWithDefault(mergedFileConfig.endpoints[endpointType], defaultConfig, endpointType);
}
/** Check direct endpoint lookup */
if (endpoint && mergedFileConfig.endpoints[endpoint]) {
return mergeWithDefault(mergedFileConfig.endpoints[endpoint], defaultConfig, endpoint);
}
/** Check normalized endpoint */
if (normalizedEndpoint && mergedFileConfig.endpoints[normalizedEndpoint]) {
return mergeWithDefault(
mergedFileConfig.endpoints[normalizedEndpoint],
defaultConfig,
normalizedEndpoint,
);
}
/** Fallback to agents if endpoint is explicitly agents */
const isAgents = isAgentsEndpoint(normalizedEndpointType || normalizedEndpoint);
if (isAgents && mergedFileConfig.endpoints[EModelEndpoint.agents]) {
return mergeWithDefault(
mergedFileConfig.endpoints[EModelEndpoint.agents],
defaultConfig,
EModelEndpoint.agents,
);
}
/** Return default config */
return defaultConfig;
}
export function mergeFileConfig(dynamic: z.infer<typeof fileConfigSchema> | undefined): FileConfig {
const mergedConfig: FileConfig = {
...fileConfig,
endpoints: {
...fileConfig.endpoints,
},
ocr: {
...fileConfig.ocr,
supportedMimeTypes: fileConfig.ocr?.supportedMimeTypes || [],
},
text: {
...fileConfig.text,
supportedMimeTypes: fileConfig.text?.supportedMimeTypes || [],
},
stt: {
...fileConfig.stt,
supportedMimeTypes: fileConfig.stt?.supportedMimeTypes || [],
},
};
if (!dynamic) {
return mergedConfig;
}
if (dynamic.serverFileSizeLimit !== undefined) {
mergedConfig.serverFileSizeLimit = mbToBytes(dynamic.serverFileSizeLimit);
}
if (dynamic.avatarSizeLimit !== undefined) {
mergedConfig.avatarSizeLimit = mbToBytes(dynamic.avatarSizeLimit);
}
if (dynamic.fileTokenLimit !== undefined) {
mergedConfig.fileTokenLimit = dynamic.fileTokenLimit;
}
// Merge clientImageResize configuration
if (dynamic.clientImageResize !== undefined) {
mergedConfig.clientImageResize = {
...mergedConfig.clientImageResize,
...dynamic.clientImageResize,
};
}
if (dynamic.ocr !== undefined) {
mergedConfig.ocr = {
...mergedConfig.ocr,
...dynamic.ocr,
};
if (dynamic.ocr.supportedMimeTypes) {
mergedConfig.ocr.supportedMimeTypes = convertStringsToRegex(dynamic.ocr.supportedMimeTypes);
}
}
if (dynamic.text !== undefined) {
mergedConfig.text = {
...mergedConfig.text,
...dynamic.text,
};
if (dynamic.text.supportedMimeTypes) {
mergedConfig.text.supportedMimeTypes = convertStringsToRegex(dynamic.text.supportedMimeTypes);
}
}
if (!dynamic.endpoints) {
return mergedConfig;
}
for (const key in dynamic.endpoints) {
const dynamicEndpoint = (dynamic.endpoints as Record<string, EndpointFileConfig>)[key];
/** Deep copy the base endpoint config if it exists to prevent mutation */
if (!mergedConfig.endpoints[key]) {
mergedConfig.endpoints[key] = {};
} else {
mergedConfig.endpoints[key] = { ...mergedConfig.endpoints[key] };
}
const mergedEndpoint = mergedConfig.endpoints[key];
if (dynamicEndpoint.disabled === true) {
mergedEndpoint.disabled = true;
mergedEndpoint.fileLimit = 0;
mergedEndpoint.fileSizeLimit = 0;
mergedEndpoint.totalSizeLimit = 0;
mergedEndpoint.supportedMimeTypes = [];
continue;
}
if (dynamicEndpoint.fileSizeLimit !== undefined) {
mergedEndpoint.fileSizeLimit = mbToBytes(dynamicEndpoint.fileSizeLimit);
}
if (dynamicEndpoint.totalSizeLimit !== undefined) {
mergedEndpoint.totalSizeLimit = mbToBytes(dynamicEndpoint.totalSizeLimit);
}
const configKeys = ['fileLimit'] as const;
configKeys.forEach((field) => {
if (dynamicEndpoint[field] !== undefined) {
mergedEndpoint[field] = dynamicEndpoint[field];
}
});
if (dynamicEndpoint.disabled !== undefined) {
mergedEndpoint.disabled = dynamicEndpoint.disabled;
}
if (dynamicEndpoint.supportedMimeTypes) {
mergedEndpoint.supportedMimeTypes = convertStringsToRegex(
dynamicEndpoint.supportedMimeTypes as unknown as string[],
);
}
}
return mergedConfig;
}