From 4b5b46604cb10694afd87f9abe0edea056f4fcda Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Fri, 26 Sep 2025 11:56:11 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=8D=20refactor:=20OCR=20Fully=20Option?= =?UTF-8?q?al=20with=20Defaults=20for=20"Upload=20as=20Text"=20(#9856)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: move `loadOCRConfig` from `packages/data-provider` to `packages/api` and return `undefined` if not explicitly configured * fix: loadOCRConfig import from @librechat/api * refactor: update defaultTextMimeTypes to support virtually all file types for text parsing * fix: improve OCR capability check and error message for unsupported file types * ci: remove unnecessary ocr expectation from AppService test --- api/server/services/AppService.js | 8 ++------ api/server/services/AppService.spec.js | 1 - api/server/services/Files/process.js | 9 ++++----- packages/api/src/files/index.ts | 5 +++-- packages/api/src/files/mistral/crud.ts | 2 +- packages/{data-provider/src => api/src/files}/ocr.ts | 7 ++++--- packages/data-provider/src/file-config.ts | 2 +- packages/data-provider/src/index.ts | 1 - 8 files changed, 15 insertions(+), 20 deletions(-) rename packages/{data-provider/src => api/src/files}/ocr.ts (51%) diff --git a/api/server/services/AppService.js b/api/server/services/AppService.js index 5c5bf186e0..49f9e324fb 100644 --- a/api/server/services/AppService.js +++ b/api/server/services/AppService.js @@ -1,16 +1,12 @@ +const { FileSources, EModelEndpoint, getConfigDefaults } = require('librechat-data-provider'); const { isEnabled, + loadOCRConfig, loadMemoryConfig, agentsConfigSetup, loadWebSearchConfig, loadDefaultInterface, } = require('@librechat/api'); -const { - FileSources, - loadOCRConfig, - EModelEndpoint, - getConfigDefaults, -} = require('librechat-data-provider'); const { checkWebSearchConfig, checkVariables, diff --git a/api/server/services/AppService.spec.js b/api/server/services/AppService.spec.js index 6243164ed4..1b540c96c0 100644 --- a/api/server/services/AppService.spec.js +++ b/api/server/services/AppService.spec.js @@ -142,7 +142,6 @@ describe('AppService', () => { turnstileConfig: mockedTurnstileConfig, modelSpecs: undefined, paths: expect.anything(), - ocr: expect.anything(), imageOutputType: expect.any(String), fileConfig: undefined, secureImageLinks: undefined, diff --git a/api/server/services/Files/process.js b/api/server/services/Files/process.js index 367e7bf348..c8221a6de5 100644 --- a/api/server/services/Files/process.js +++ b/api/server/services/Files/process.js @@ -594,10 +594,9 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { const fileConfig = mergeFileConfig(appConfig.fileConfig); - const shouldUseOCR = fileConfig.checkType( - file.mimetype, - fileConfig.ocr?.supportedMimeTypes || [], - ); + const shouldUseOCR = + appConfig?.ocr != null && + fileConfig.checkType(file.mimetype, fileConfig.ocr?.supportedMimeTypes || []); if (shouldUseOCR && !(await checkCapability(req, AgentCapabilities.ocr))) { throw new Error('OCR capability is not enabled for Agents'); @@ -626,7 +625,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { ); if (!shouldUseText) { - throw new Error(`File type ${file.mimetype} is not supported for OCR or text parsing`); + throw new Error(`File type ${file.mimetype} is not supported for text parsing.`); } const { text, bytes } = await parseText({ req, file, file_id }); diff --git a/packages/api/src/files/index.ts b/packages/api/src/files/index.ts index fa156f15f1..49e5bb4151 100644 --- a/packages/api/src/files/index.ts +++ b/packages/api/src/files/index.ts @@ -1,4 +1,5 @@ -export * from './mistral/crud'; export * from './audio'; -export * from './text'; +export * from './mistral/crud'; +export * from './ocr'; export * from './parse'; +export * from './text'; diff --git a/packages/api/src/files/mistral/crud.ts b/packages/api/src/files/mistral/crud.ts index e0ef69ab31..fefe4a4675 100644 --- a/packages/api/src/files/mistral/crud.ts +++ b/packages/api/src/files/mistral/crud.ts @@ -303,7 +303,7 @@ async function loadAuthConfig(context: OCRContext): Promise { /** * Gets the model configuration */ -function getModelConfig(ocrConfig: TCustomConfig['ocr']): string { +function getModelConfig(ocrConfig?: TCustomConfig['ocr']): string { const modelConfig = ocrConfig?.mistralModel || ''; if (!modelConfig.trim()) { diff --git a/packages/data-provider/src/ocr.ts b/packages/api/src/files/ocr.ts similarity index 51% rename from packages/data-provider/src/ocr.ts rename to packages/api/src/files/ocr.ts index cfde43025b..02060a858f 100644 --- a/packages/data-provider/src/ocr.ts +++ b/packages/api/src/files/ocr.ts @@ -1,7 +1,8 @@ -import type { TCustomConfig } from '../src/config'; -import { OCRStrategy } from '../src/config'; +import { OCRStrategy } from 'librechat-data-provider'; +import type { TCustomConfig } from 'librechat-data-provider'; -export function loadOCRConfig(config: TCustomConfig['ocr']): TCustomConfig['ocr'] { +export function loadOCRConfig(config?: TCustomConfig['ocr']): TCustomConfig['ocr'] | undefined { + if (!config) return; const baseURL = config?.baseURL ?? ''; const apiKey = config?.apiKey ?? ''; const mistralModel = config?.mistralModel ?? ''; diff --git a/packages/data-provider/src/file-config.ts b/packages/data-provider/src/file-config.ts index f6ede89ae2..d43308373f 100644 --- a/packages/data-provider/src/file-config.ts +++ b/packages/data-provider/src/file-config.ts @@ -133,7 +133,7 @@ export const defaultOCRMimeTypes = [ /^application\/epub\+zip$/, ]; -export const defaultTextMimeTypes = [textMimeTypes]; +export const defaultTextMimeTypes = [/^[\w.-]+\/[\w.-]+$/]; export const defaultSTTMimeTypes = [audioMimeTypes]; diff --git a/packages/data-provider/src/index.ts b/packages/data-provider/src/index.ts index 3f51d34c4e..f96f6b0249 100644 --- a/packages/data-provider/src/index.ts +++ b/packages/data-provider/src/index.ts @@ -9,7 +9,6 @@ export * from './messages'; export * from './artifacts'; /* schema helpers */ export * from './parsers'; -export * from './ocr'; /* custom/dynamic configurations */ export * from './generate'; export * from './models';