mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-19 18:00:15 +01:00
📂 refactor: File Read Operations (#9747)
* fix: axios response logging for text parsing, remove console logging, remove jsdoc * refactor: error logging in logAxiosError function to handle various error types with type guards * refactor: enhance text parsing with improved error handling and async file reading * refactor: replace synchronous file reading with asynchronous methods for improved performance and memory management * ci: update tests
This commit is contained in:
parent
0352067da2
commit
2489670f54
10 changed files with 692 additions and 83 deletions
|
|
@ -45,6 +45,10 @@ jest.mock('~/utils/axios', () => ({
|
|||
logAxiosError: jest.fn(({ message }) => message || 'Error'),
|
||||
}));
|
||||
|
||||
jest.mock('~/utils/files', () => ({
|
||||
readFileAsBuffer: jest.fn(),
|
||||
}));
|
||||
|
||||
import * as fs from 'fs';
|
||||
import axios from 'axios';
|
||||
import { HttpsProxyAgent } from 'https-proxy-agent';
|
||||
|
|
@ -56,6 +60,7 @@ import type {
|
|||
OCRResult,
|
||||
} from '~/types';
|
||||
import { logger as mockLogger } from '@librechat/data-schemas';
|
||||
import { readFileAsBuffer } from '~/utils/files';
|
||||
import {
|
||||
uploadDocumentToMistral,
|
||||
uploadAzureMistralOCR,
|
||||
|
|
@ -1978,9 +1983,10 @@ describe('MistralOCR Service', () => {
|
|||
|
||||
describe('Azure Mistral OCR with proxy', () => {
|
||||
beforeEach(() => {
|
||||
(jest.mocked(fs).readFileSync as jest.Mock).mockReturnValue(
|
||||
Buffer.from('mock-file-content'),
|
||||
);
|
||||
(readFileAsBuffer as jest.Mock).mockResolvedValue({
|
||||
content: Buffer.from('mock-file-content'),
|
||||
bytes: Buffer.from('mock-file-content').length,
|
||||
});
|
||||
});
|
||||
|
||||
it('should use proxy for Azure Mistral OCR requests', async () => {
|
||||
|
|
@ -2098,7 +2104,10 @@ describe('MistralOCR Service', () => {
|
|||
|
||||
describe('uploadAzureMistralOCR', () => {
|
||||
beforeEach(() => {
|
||||
(jest.mocked(fs).readFileSync as jest.Mock).mockReturnValue(Buffer.from('mock-file-content'));
|
||||
(readFileAsBuffer as jest.Mock).mockResolvedValue({
|
||||
content: Buffer.from('mock-file-content'),
|
||||
bytes: Buffer.from('mock-file-content').length,
|
||||
});
|
||||
// Reset the HttpsProxyAgent mock to its default implementation for Azure tests
|
||||
(HttpsProxyAgent as unknown as jest.Mock).mockImplementation((url) => ({ proxyUrl: url }));
|
||||
// Clean up any PROXY env var from previous tests
|
||||
|
|
@ -2172,7 +2181,9 @@ describe('MistralOCR Service', () => {
|
|||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(jest.mocked(fs).readFileSync).toHaveBeenCalledWith('/tmp/upload/azure-file.pdf');
|
||||
expect(readFileAsBuffer).toHaveBeenCalledWith('/tmp/upload/azure-file.pdf', {
|
||||
fileSize: undefined,
|
||||
});
|
||||
|
||||
// Verify OCR was called with base64 data URL
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import type {
|
|||
OCRImage,
|
||||
} from '~/types';
|
||||
import { logAxiosError, createAxiosInstance } from '~/utils/axios';
|
||||
import { readFileAsBuffer } from '~/utils/files';
|
||||
import { loadServiceKey } from '~/utils/key';
|
||||
|
||||
const axios = createAxiosInstance();
|
||||
|
|
@ -464,7 +465,9 @@ export const uploadAzureMistralOCR = async (
|
|||
const { apiKey, baseURL } = await loadAuthConfig(context);
|
||||
const model = getModelConfig(context.req.config?.ocr);
|
||||
|
||||
const buffer = fs.readFileSync(context.file.path);
|
||||
const { content: buffer } = await readFileAsBuffer(context.file.path, {
|
||||
fileSize: context.file.size,
|
||||
});
|
||||
const base64 = buffer.toString('base64');
|
||||
/** Uses actual mimetype of the file, 'image/jpeg' as fallback since it seems to be accepted regardless of mismatch */
|
||||
const base64Prefix = `data:${context.file.mimetype || 'image/jpeg'};base64,`;
|
||||
|
|
@ -691,7 +694,9 @@ export const uploadGoogleVertexMistralOCR = async (
|
|||
const { serviceAccount, accessToken } = await loadGoogleAuthConfig();
|
||||
const model = getModelConfig(context.req.config?.ocr);
|
||||
|
||||
const buffer = fs.readFileSync(context.file.path);
|
||||
const { content: buffer } = await readFileAsBuffer(context.file.path, {
|
||||
fileSize: context.file.size,
|
||||
});
|
||||
const base64 = buffer.toString('base64');
|
||||
const base64Prefix = `data:${context.file.mimetype || 'application/pdf'};base64,`;
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,6 @@ jest.mock('@librechat/data-schemas', () => ({
|
|||
},
|
||||
}));
|
||||
|
||||
import { parseTextNative, parseText } from './text';
|
||||
|
||||
jest.mock('fs', () => ({
|
||||
readFileSync: jest.fn(),
|
||||
createReadStream: jest.fn(),
|
||||
|
|
@ -36,10 +34,24 @@ jest.mock('form-data', () => {
|
|||
}));
|
||||
});
|
||||
|
||||
// Mock the utils module to avoid AWS SDK issues
|
||||
jest.mock('../utils', () => ({
|
||||
logAxiosError: jest.fn((args) => {
|
||||
if (typeof args === 'object' && args.message) {
|
||||
return args.message;
|
||||
}
|
||||
return 'Error';
|
||||
}),
|
||||
readFileAsString: jest.fn(),
|
||||
}));
|
||||
|
||||
// Now import everything after mocks are in place
|
||||
import { parseTextNative, parseText } from './text';
|
||||
import fs, { ReadStream } from 'fs';
|
||||
import axios from 'axios';
|
||||
import FormData from 'form-data';
|
||||
import { generateShortLivedToken } from '../crypto/jwt';
|
||||
import { readFileAsString } from '../utils';
|
||||
|
||||
const mockedFs = fs as jest.Mocked<typeof fs>;
|
||||
const mockedAxios = axios as jest.Mocked<typeof axios>;
|
||||
|
|
@ -47,6 +59,7 @@ const mockedFormData = FormData as jest.MockedClass<typeof FormData>;
|
|||
const mockedGenerateShortLivedToken = generateShortLivedToken as jest.MockedFunction<
|
||||
typeof generateShortLivedToken
|
||||
>;
|
||||
const mockedReadFileAsString = readFileAsString as jest.MockedFunction<typeof readFileAsString>;
|
||||
|
||||
describe('text', () => {
|
||||
const mockFile: Express.Multer.File = {
|
||||
|
|
@ -74,29 +87,32 @@ describe('text', () => {
|
|||
});
|
||||
|
||||
describe('parseTextNative', () => {
|
||||
it('should successfully parse a text file', () => {
|
||||
it('should successfully parse a text file', async () => {
|
||||
const mockText = 'Hello, world!';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
const result = parseTextNative(mockFile);
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
expect(mockedFs.readFileSync).toHaveBeenCalledWith('/tmp/test.txt', 'utf8');
|
||||
const result = await parseTextNative(mockFile);
|
||||
|
||||
expect(mockedReadFileAsString).toHaveBeenCalledWith('/tmp/test.txt', {
|
||||
fileSize: 100,
|
||||
});
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
});
|
||||
|
||||
it('should throw an error when file cannot be read', () => {
|
||||
it('should handle file read errors', async () => {
|
||||
const mockError = new Error('File not found');
|
||||
mockedFs.readFileSync.mockImplementation(() => {
|
||||
throw mockError;
|
||||
});
|
||||
mockedReadFileAsString.mockRejectedValue(mockError);
|
||||
|
||||
expect(() => parseTextNative(mockFile)).toThrow(
|
||||
'Failed to read file as text: Error: File not found',
|
||||
);
|
||||
await expect(parseTextNative(mockFile)).rejects.toThrow('File not found');
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -115,7 +131,12 @@ describe('text', () => {
|
|||
|
||||
it('should fall back to native parsing when RAG_API_URL is not defined', async () => {
|
||||
const mockText = 'Native parsing result';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
const result = await parseText({
|
||||
req: mockReq,
|
||||
|
|
@ -125,7 +146,7 @@ describe('text', () => {
|
|||
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
expect(mockedAxios.get).not.toHaveBeenCalled();
|
||||
|
|
@ -134,7 +155,12 @@ describe('text', () => {
|
|||
it('should fall back to native parsing when health check fails', async () => {
|
||||
process.env.RAG_API_URL = 'http://rag-api.test';
|
||||
const mockText = 'Native parsing result';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
mockedAxios.get.mockRejectedValue(new Error('Health check failed'));
|
||||
|
||||
|
|
@ -145,11 +171,11 @@ describe('text', () => {
|
|||
});
|
||||
|
||||
expect(mockedAxios.get).toHaveBeenCalledWith('http://rag-api.test/health', {
|
||||
timeout: 5000,
|
||||
timeout: 10000,
|
||||
});
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
});
|
||||
|
|
@ -157,7 +183,12 @@ describe('text', () => {
|
|||
it('should fall back to native parsing when health check returns non-OK status', async () => {
|
||||
process.env.RAG_API_URL = 'http://rag-api.test';
|
||||
const mockText = 'Native parsing result';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
mockedAxios.get.mockResolvedValue({
|
||||
status: 500,
|
||||
|
|
@ -172,7 +203,7 @@ describe('text', () => {
|
|||
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
});
|
||||
|
|
@ -207,7 +238,12 @@ describe('text', () => {
|
|||
it('should fall back to native parsing when RAG API response lacks text property', async () => {
|
||||
process.env.RAG_API_URL = 'http://rag-api.test';
|
||||
const mockText = 'Native parsing result';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
mockedAxios.get.mockResolvedValue({
|
||||
status: 200,
|
||||
|
|
@ -226,7 +262,7 @@ describe('text', () => {
|
|||
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
});
|
||||
|
|
@ -234,7 +270,12 @@ describe('text', () => {
|
|||
it('should fall back to native parsing when user is undefined', async () => {
|
||||
process.env.RAG_API_URL = 'http://rag-api.test';
|
||||
const mockText = 'Native parsing result';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
const result = await parseText({
|
||||
req: { user: undefined },
|
||||
|
|
@ -247,7 +288,7 @@ describe('text', () => {
|
|||
expect(mockedAxios.post).not.toHaveBeenCalled();
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,18 +1,19 @@
|
|||
import fs from 'fs';
|
||||
import axios from 'axios';
|
||||
import FormData from 'form-data';
|
||||
import { createReadStream } from 'fs';
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { FileSources } from 'librechat-data-provider';
|
||||
import type { Request as ServerRequest } from 'express';
|
||||
import { logAxiosError, readFileAsString } from '~/utils';
|
||||
import { generateShortLivedToken } from '~/crypto/jwt';
|
||||
|
||||
/**
|
||||
* Attempts to parse text using RAG API, falls back to native text parsing
|
||||
* @param {Object} params - The parameters object
|
||||
* @param {Express.Request} params.req - The Express request object
|
||||
* @param {Express.Multer.File} params.file - The uploaded file
|
||||
* @param {string} params.file_id - The file ID
|
||||
* @returns {Promise<{text: string, bytes: number, source: string}>}
|
||||
* @param params - The parameters object
|
||||
* @param params.req - The Express request object
|
||||
* @param params.file - The uploaded file
|
||||
* @param params.file_id - The file ID
|
||||
* @returns
|
||||
*/
|
||||
export async function parseText({
|
||||
req,
|
||||
|
|
@ -30,32 +31,33 @@ export async function parseText({
|
|||
return parseTextNative(file);
|
||||
}
|
||||
|
||||
if (!req.user?.id) {
|
||||
const userId = req.user?.id;
|
||||
if (!userId) {
|
||||
logger.debug('[parseText] No user ID provided, falling back to native text parsing');
|
||||
return parseTextNative(file);
|
||||
}
|
||||
|
||||
try {
|
||||
const healthResponse = await axios.get(`${process.env.RAG_API_URL}/health`, {
|
||||
timeout: 5000,
|
||||
timeout: 10000,
|
||||
});
|
||||
if (healthResponse?.statusText !== 'OK' && healthResponse?.status !== 200) {
|
||||
logger.debug('[parseText] RAG API health check failed, falling back to native parsing');
|
||||
return parseTextNative(file);
|
||||
}
|
||||
} catch (healthError) {
|
||||
logger.debug(
|
||||
'[parseText] RAG API health check failed, falling back to native parsing',
|
||||
healthError,
|
||||
);
|
||||
logAxiosError({
|
||||
message: '[parseText] RAG API health check failed, falling back to native parsing:',
|
||||
error: healthError,
|
||||
});
|
||||
return parseTextNative(file);
|
||||
}
|
||||
|
||||
try {
|
||||
const jwtToken = generateShortLivedToken(req.user.id);
|
||||
const jwtToken = generateShortLivedToken(userId);
|
||||
const formData = new FormData();
|
||||
formData.append('file_id', file_id);
|
||||
formData.append('file', fs.createReadStream(file.path));
|
||||
formData.append('file', createReadStream(file.path));
|
||||
|
||||
const formHeaders = formData.getHeaders();
|
||||
|
||||
|
|
@ -69,7 +71,7 @@ export async function parseText({
|
|||
});
|
||||
|
||||
const responseData = response.data;
|
||||
logger.debug('[parseText] Response from RAG API', responseData);
|
||||
logger.debug(`[parseText] RAG API completed successfully (${response.status})`);
|
||||
|
||||
if (!('text' in responseData)) {
|
||||
throw new Error('RAG API did not return parsed text');
|
||||
|
|
@ -81,7 +83,10 @@ export async function parseText({
|
|||
source: FileSources.text,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warn('[parseText] RAG API text parsing failed, falling back to native parsing', error);
|
||||
logAxiosError({
|
||||
message: '[parseText] RAG API text parsing failed, falling back to native parsing',
|
||||
error,
|
||||
});
|
||||
return parseTextNative(file);
|
||||
}
|
||||
}
|
||||
|
|
@ -89,25 +94,21 @@ export async function parseText({
|
|||
/**
|
||||
* Native JavaScript text parsing fallback
|
||||
* Simple text file reading - complex formats handled by RAG API
|
||||
* @param {Express.Multer.File} file - The uploaded file
|
||||
* @returns {{text: string, bytes: number, source: string}}
|
||||
* @param file - The uploaded file
|
||||
* @returns
|
||||
*/
|
||||
export function parseTextNative(file: Express.Multer.File): {
|
||||
export async function parseTextNative(file: Express.Multer.File): Promise<{
|
||||
text: string;
|
||||
bytes: number;
|
||||
source: string;
|
||||
} {
|
||||
try {
|
||||
const text = fs.readFileSync(file.path, 'utf8');
|
||||
const bytes = Buffer.byteLength(text, 'utf8');
|
||||
}> {
|
||||
const { content: text, bytes } = await readFileAsString(file.path, {
|
||||
fileSize: file.size,
|
||||
});
|
||||
|
||||
return {
|
||||
text,
|
||||
bytes,
|
||||
source: FileSources.text,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[parseTextNative] Failed to parse file:', error);
|
||||
throw new Error(`Failed to read file as text: ${error}`);
|
||||
}
|
||||
return {
|
||||
text,
|
||||
bytes,
|
||||
source: FileSources.text,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue