🔍 refactor: OCR Fully Optional with Defaults for "Upload as Text" (#9856)

* refactor: move `loadOCRConfig` from `packages/data-provider` to `packages/api` and return `undefined` if not explicitly configured * fix: loadOCRConfig import from @librechat/api * refactor: update defaultTextMimeTypes to support virtually all file types for text parsing * fix: improve OCR capability check and error message for unsupported file types * ci: remove unnecessary ocr expectation from AppService test
2026-03-18 21:56:33 +01:00 · 2025-09-26 11:56:11 -04:00 · 2025-09-26 11:56:11 -04:00 · 4b5b46604c
commit 4b5b46604c
parent 3d7eaf0fcc
8 changed files with 15 additions and 20 deletions
--- a/api/server/services/AppService.js
+++ b/api/server/services/AppService.js
@ -1,16 +1,12 @@
+const { FileSources, EModelEndpoint, getConfigDefaults } = require('librechat-data-provider');
 const {
  isEnabled,
+  loadOCRConfig,
  loadMemoryConfig,
  agentsConfigSetup,
  loadWebSearchConfig,
  loadDefaultInterface,
 } = require('@librechat/api');
-const {
-  FileSources,
-  loadOCRConfig,
-  EModelEndpoint,
-  getConfigDefaults,
-} = require('librechat-data-provider');
 const {
  checkWebSearchConfig,
  checkVariables,
--- a/api/server/services/AppService.spec.js
+++ b/api/server/services/AppService.spec.js
@ -142,7 +142,6 @@ describe('AppService', () => {
        turnstileConfig: mockedTurnstileConfig,
        modelSpecs: undefined,
        paths: expect.anything(),
-        ocr: expect.anything(),
        imageOutputType: expect.any(String),
        fileConfig: undefined,
        secureImageLinks: undefined,
--- a/api/server/services/Files/process.js
+++ b/api/server/services/Files/process.js
@ -594,10 +594,9 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {

    const fileConfig = mergeFileConfig(appConfig.fileConfig);

-    const shouldUseOCR = fileConfig.checkType(
-      file.mimetype,
-      fileConfig.ocr?.supportedMimeTypes || [],
-    );
+    const shouldUseOCR =
+      appConfig?.ocr != null &&
+      fileConfig.checkType(file.mimetype, fileConfig.ocr?.supportedMimeTypes || []);

    if (shouldUseOCR && !(await checkCapability(req, AgentCapabilities.ocr))) {
      throw new Error('OCR capability is not enabled for Agents');
@ -626,7 +625,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
    );

    if (!shouldUseText) {
-      throw new Error(`File type ${file.mimetype} is not supported for OCR or text parsing`);
+      throw new Error(`File type ${file.mimetype} is not supported for text parsing.`);
    }

    const { text, bytes } = await parseText({ req, file, file_id });