mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
* 👁️ feat: Add Azure Mistral OCR strategy and endpoint integration This commit introduces a new OCR strategy named 'azure_mistral_ocr', allowing the use of a Mistral OCR endpoint deployed on Azure. The configuration, schemas, and file upload strategies have been updated to support this integration, enabling seamless OCR processing via Azure-hosted Mistral services. * 🗑️ chore: Clean up .gitignore by removing commented-out uncommon directory name * chore: remove unused vars * refactor: Move createAxiosInstance to packages/api/utils and update imports - Removed the createAxiosInstance function from the config module and relocated it to a new utils module for better organization. - Updated import paths in relevant files to reflect the new location of createAxiosInstance. - Added tests for createAxiosInstance to ensure proper functionality and proxy configuration handling. * chore: move axios helpers to packages/api - Added logAxiosError function to @librechat/api for centralized error logging. - Updated imports across various files to use the new logAxiosError function. - Removed the old axios.js utility file as it is no longer needed. * chore: Update Jest moduleNameMapper for improved path resolution - Added a new mapping for '~/' to resolve module paths in Jest configuration, enhancing import handling for the project. * feat: Implement Mistral OCR API integration in TS * chore: Update MistralOCR tests based on new imports * fix: Enhance MistralOCR configuration handling and tests - Introduced helper functions for resolving configuration values from environment variables or hardcoded settings. - Updated the uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration resolution logic. - Improved test cases to ensure correct behavior when mixing environment variables and hardcoded values. - Mocked file upload and signed URL responses in tests to validate functionality without external dependencies. * feat: Enhance MistralOCR functionality with improved configuration and error handling - Introduced helper functions for loading authentication configuration and resolving values from environment variables. - Updated uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration logic. - Added utility functions for processing OCR results and creating error messages. - Improved document type determination and result aggregation for better OCR processing. * refactor: Reorganize OCR type imports in Mistral CRUD file - Moved OCRResult, OCRResultPage, and OCRImage imports to a more logical grouping for better readability and maintainability. * feat: Add file exports to API and create files index * chore: Update OCR types for enhanced structure and clarity - Redesigned OCRImage interface to include mandatory fields and improved naming conventions. - Added PageDimensions interface for better representation of page metrics. - Updated OCRResultPage to include dimensions and mandatory images array. - Refined OCRResult to include document annotation and usage information. * refactor: use TS counterpart of uploadOCR methods * ci: Update MistralOCR tests to reflect new OCR result structure * chore: Bump version of @librechat/api to 1.2.3 in package.json and package-lock.json * chore: Update CONFIG_VERSION to 1.2.8 * chore: remove unused sendEvent function from config module (now imported from '@librechat/api') * chore: remove MistralOCR service files and tests (now in '@librechat/api') * ci: update logger import in ModelService tests to use @librechat/data-schemas --------- Co-authored-by: arthurolivierfortin <arthurolivier.fortin@gmail.com>
120 lines
4.2 KiB
JavaScript
120 lines
4.2 KiB
JavaScript
const fs = require('fs');
|
|
const axios = require('axios');
|
|
const FormData = require('form-data');
|
|
const { logAxiosError } = require('@librechat/api');
|
|
const { logger } = require('@librechat/data-schemas');
|
|
const { FileSources } = require('librechat-data-provider');
|
|
|
|
/**
|
|
* Deletes a file from the vector database. This function takes a file object, constructs the full path, and
|
|
* verifies the path's validity before deleting the file. If the path is invalid, an error is thrown.
|
|
*
|
|
* @param {ServerRequest} req - The request object from Express. It should have an `app.locals.paths` object with
|
|
* a `publicPath` property.
|
|
* @param {MongoFile} file - The file object to be deleted. It should have a `filepath` property that is
|
|
* a string representing the path of the file relative to the publicPath.
|
|
*
|
|
* @returns {Promise<void>}
|
|
* A promise that resolves when the file has been successfully deleted, or throws an error if the
|
|
* file path is invalid or if there is an error in deletion.
|
|
*/
|
|
const deleteVectors = async (req, file) => {
|
|
if (!file.embedded || !process.env.RAG_API_URL) {
|
|
return;
|
|
}
|
|
try {
|
|
const jwtToken = req.headers.authorization.split(' ')[1];
|
|
return await axios.delete(`${process.env.RAG_API_URL}/documents`, {
|
|
headers: {
|
|
Authorization: `Bearer ${jwtToken}`,
|
|
'Content-Type': 'application/json',
|
|
accept: 'application/json',
|
|
},
|
|
data: [file.file_id],
|
|
});
|
|
} catch (error) {
|
|
logAxiosError({
|
|
error,
|
|
message: 'Error deleting vectors',
|
|
});
|
|
if (
|
|
error.response &&
|
|
error.response.status !== 404 &&
|
|
(error.response.status < 200 || error.response.status >= 300)
|
|
) {
|
|
logger.warn('Error deleting vectors, file will not be deleted');
|
|
throw new Error(error.message || 'An error occurred during file deletion.');
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Uploads a file to the configured Vector database
|
|
*
|
|
* @param {Object} params - The params object.
|
|
* @param {Object} params.req - The request object from Express. It should have a `user` property with an `id`
|
|
* representing the user, and an `app.locals.paths` object with an `uploads` path.
|
|
* @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should
|
|
* have a `path` property that points to the location of the uploaded file.
|
|
* @param {string} params.file_id - The file ID.
|
|
* @param {string} [params.entity_id] - The entity ID for shared resources.
|
|
*
|
|
* @returns {Promise<{ filepath: string, bytes: number }>}
|
|
* A promise that resolves to an object containing:
|
|
* - filepath: The path where the file is saved.
|
|
* - bytes: The size of the file in bytes.
|
|
*/
|
|
async function uploadVectors({ req, file, file_id, entity_id }) {
|
|
if (!process.env.RAG_API_URL) {
|
|
throw new Error('RAG_API_URL not defined');
|
|
}
|
|
|
|
try {
|
|
const jwtToken = req.headers.authorization.split(' ')[1];
|
|
const formData = new FormData();
|
|
formData.append('file_id', file_id);
|
|
formData.append('file', fs.createReadStream(file.path));
|
|
if (entity_id != null && entity_id) {
|
|
formData.append('entity_id', entity_id);
|
|
}
|
|
|
|
const formHeaders = formData.getHeaders();
|
|
|
|
const response = await axios.post(`${process.env.RAG_API_URL}/embed`, formData, {
|
|
headers: {
|
|
Authorization: `Bearer ${jwtToken}`,
|
|
accept: 'application/json',
|
|
...formHeaders,
|
|
},
|
|
});
|
|
|
|
const responseData = response.data;
|
|
logger.debug('Response from embedding file', responseData);
|
|
|
|
if (responseData.known_type === false) {
|
|
throw new Error(`File embedding failed. The filetype ${file.mimetype} is not supported`);
|
|
}
|
|
|
|
if (!responseData.status) {
|
|
throw new Error('File embedding failed.');
|
|
}
|
|
|
|
return {
|
|
bytes: file.size,
|
|
filename: file.originalname,
|
|
filepath: FileSources.vectordb,
|
|
embedded: Boolean(responseData.known_type),
|
|
};
|
|
} catch (error) {
|
|
logAxiosError({
|
|
error,
|
|
message: 'Error uploading vectors',
|
|
});
|
|
throw new Error(error.message || 'An error occurred during file upload.');
|
|
}
|
|
}
|
|
|
|
module.exports = {
|
|
deleteVectors,
|
|
uploadVectors,
|
|
};
|