mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
🔍 refactor: OCR Fully Optional with Defaults for "Upload as Text" (#9856)
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
* refactor: move `loadOCRConfig` from `packages/data-provider` to `packages/api` and return `undefined` if not explicitly configured * fix: loadOCRConfig import from @librechat/api * refactor: update defaultTextMimeTypes to support virtually all file types for text parsing * fix: improve OCR capability check and error message for unsupported file types * ci: remove unnecessary ocr expectation from AppService test
This commit is contained in:
parent
3d7eaf0fcc
commit
4b5b46604c
8 changed files with 15 additions and 20 deletions
|
|
@ -1,16 +1,12 @@
|
||||||
|
const { FileSources, EModelEndpoint, getConfigDefaults } = require('librechat-data-provider');
|
||||||
const {
|
const {
|
||||||
isEnabled,
|
isEnabled,
|
||||||
|
loadOCRConfig,
|
||||||
loadMemoryConfig,
|
loadMemoryConfig,
|
||||||
agentsConfigSetup,
|
agentsConfigSetup,
|
||||||
loadWebSearchConfig,
|
loadWebSearchConfig,
|
||||||
loadDefaultInterface,
|
loadDefaultInterface,
|
||||||
} = require('@librechat/api');
|
} = require('@librechat/api');
|
||||||
const {
|
|
||||||
FileSources,
|
|
||||||
loadOCRConfig,
|
|
||||||
EModelEndpoint,
|
|
||||||
getConfigDefaults,
|
|
||||||
} = require('librechat-data-provider');
|
|
||||||
const {
|
const {
|
||||||
checkWebSearchConfig,
|
checkWebSearchConfig,
|
||||||
checkVariables,
|
checkVariables,
|
||||||
|
|
|
||||||
|
|
@ -142,7 +142,6 @@ describe('AppService', () => {
|
||||||
turnstileConfig: mockedTurnstileConfig,
|
turnstileConfig: mockedTurnstileConfig,
|
||||||
modelSpecs: undefined,
|
modelSpecs: undefined,
|
||||||
paths: expect.anything(),
|
paths: expect.anything(),
|
||||||
ocr: expect.anything(),
|
|
||||||
imageOutputType: expect.any(String),
|
imageOutputType: expect.any(String),
|
||||||
fileConfig: undefined,
|
fileConfig: undefined,
|
||||||
secureImageLinks: undefined,
|
secureImageLinks: undefined,
|
||||||
|
|
|
||||||
|
|
@ -594,10 +594,9 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
|
||||||
|
|
||||||
const fileConfig = mergeFileConfig(appConfig.fileConfig);
|
const fileConfig = mergeFileConfig(appConfig.fileConfig);
|
||||||
|
|
||||||
const shouldUseOCR = fileConfig.checkType(
|
const shouldUseOCR =
|
||||||
file.mimetype,
|
appConfig?.ocr != null &&
|
||||||
fileConfig.ocr?.supportedMimeTypes || [],
|
fileConfig.checkType(file.mimetype, fileConfig.ocr?.supportedMimeTypes || []);
|
||||||
);
|
|
||||||
|
|
||||||
if (shouldUseOCR && !(await checkCapability(req, AgentCapabilities.ocr))) {
|
if (shouldUseOCR && !(await checkCapability(req, AgentCapabilities.ocr))) {
|
||||||
throw new Error('OCR capability is not enabled for Agents');
|
throw new Error('OCR capability is not enabled for Agents');
|
||||||
|
|
@ -626,7 +625,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!shouldUseText) {
|
if (!shouldUseText) {
|
||||||
throw new Error(`File type ${file.mimetype} is not supported for OCR or text parsing`);
|
throw new Error(`File type ${file.mimetype} is not supported for text parsing.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const { text, bytes } = await parseText({ req, file, file_id });
|
const { text, bytes } = await parseText({ req, file, file_id });
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
export * from './mistral/crud';
|
|
||||||
export * from './audio';
|
export * from './audio';
|
||||||
export * from './text';
|
export * from './mistral/crud';
|
||||||
|
export * from './ocr';
|
||||||
export * from './parse';
|
export * from './parse';
|
||||||
|
export * from './text';
|
||||||
|
|
|
||||||
|
|
@ -303,7 +303,7 @@ async function loadAuthConfig(context: OCRContext): Promise<AuthConfig> {
|
||||||
/**
|
/**
|
||||||
* Gets the model configuration
|
* Gets the model configuration
|
||||||
*/
|
*/
|
||||||
function getModelConfig(ocrConfig: TCustomConfig['ocr']): string {
|
function getModelConfig(ocrConfig?: TCustomConfig['ocr']): string {
|
||||||
const modelConfig = ocrConfig?.mistralModel || '';
|
const modelConfig = ocrConfig?.mistralModel || '';
|
||||||
|
|
||||||
if (!modelConfig.trim()) {
|
if (!modelConfig.trim()) {
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
import type { TCustomConfig } from '../src/config';
|
import { OCRStrategy } from 'librechat-data-provider';
|
||||||
import { OCRStrategy } from '../src/config';
|
import type { TCustomConfig } from 'librechat-data-provider';
|
||||||
|
|
||||||
export function loadOCRConfig(config: TCustomConfig['ocr']): TCustomConfig['ocr'] {
|
export function loadOCRConfig(config?: TCustomConfig['ocr']): TCustomConfig['ocr'] | undefined {
|
||||||
|
if (!config) return;
|
||||||
const baseURL = config?.baseURL ?? '';
|
const baseURL = config?.baseURL ?? '';
|
||||||
const apiKey = config?.apiKey ?? '';
|
const apiKey = config?.apiKey ?? '';
|
||||||
const mistralModel = config?.mistralModel ?? '';
|
const mistralModel = config?.mistralModel ?? '';
|
||||||
|
|
@ -133,7 +133,7 @@ export const defaultOCRMimeTypes = [
|
||||||
/^application\/epub\+zip$/,
|
/^application\/epub\+zip$/,
|
||||||
];
|
];
|
||||||
|
|
||||||
export const defaultTextMimeTypes = [textMimeTypes];
|
export const defaultTextMimeTypes = [/^[\w.-]+\/[\w.-]+$/];
|
||||||
|
|
||||||
export const defaultSTTMimeTypes = [audioMimeTypes];
|
export const defaultSTTMimeTypes = [audioMimeTypes];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ export * from './messages';
|
||||||
export * from './artifacts';
|
export * from './artifacts';
|
||||||
/* schema helpers */
|
/* schema helpers */
|
||||||
export * from './parsers';
|
export * from './parsers';
|
||||||
export * from './ocr';
|
|
||||||
/* custom/dynamic configurations */
|
/* custom/dynamic configurations */
|
||||||
export * from './generate';
|
export * from './generate';
|
||||||
export * from './models';
|
export * from './models';
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue