mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-21 21:50:49 +02:00
📂 refactor: File Read Operations (#9747)
* fix: axios response logging for text parsing, remove console logging, remove jsdoc * refactor: error logging in logAxiosError function to handle various error types with type guards * refactor: enhance text parsing with improved error handling and async file reading * refactor: replace synchronous file reading with asynchronous methods for improved performance and memory management * ci: update tests
This commit is contained in:
parent
0352067da2
commit
2489670f54
10 changed files with 692 additions and 83 deletions
|
@ -1,11 +1,10 @@
|
|||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const axios = require('axios');
|
||||
const FormData = require('form-data');
|
||||
const nodemailer = require('nodemailer');
|
||||
const handlebars = require('handlebars');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { logAxiosError, isEnabled } = require('@librechat/api');
|
||||
const { logAxiosError, isEnabled, readFileAsString } = require('@librechat/api');
|
||||
|
||||
/**
|
||||
* Sends an email using Mailgun API.
|
||||
|
@ -93,8 +92,7 @@ const sendEmailViaSMTP = async ({ transporterOptions, mailOptions }) => {
|
|||
*/
|
||||
const sendEmail = async ({ email, subject, payload, template, throwError = true }) => {
|
||||
try {
|
||||
// Read and compile the email template
|
||||
const source = fs.readFileSync(path.join(__dirname, 'emails', template), 'utf8');
|
||||
const { content: source } = await readFileAsString(path.join(__dirname, 'emails', template));
|
||||
const compiledTemplate = handlebars.compile(source);
|
||||
const html = compiledTemplate(payload);
|
||||
|
||||
|
|
|
@ -45,6 +45,10 @@ jest.mock('~/utils/axios', () => ({
|
|||
logAxiosError: jest.fn(({ message }) => message || 'Error'),
|
||||
}));
|
||||
|
||||
jest.mock('~/utils/files', () => ({
|
||||
readFileAsBuffer: jest.fn(),
|
||||
}));
|
||||
|
||||
import * as fs from 'fs';
|
||||
import axios from 'axios';
|
||||
import { HttpsProxyAgent } from 'https-proxy-agent';
|
||||
|
@ -56,6 +60,7 @@ import type {
|
|||
OCRResult,
|
||||
} from '~/types';
|
||||
import { logger as mockLogger } from '@librechat/data-schemas';
|
||||
import { readFileAsBuffer } from '~/utils/files';
|
||||
import {
|
||||
uploadDocumentToMistral,
|
||||
uploadAzureMistralOCR,
|
||||
|
@ -1978,9 +1983,10 @@ describe('MistralOCR Service', () => {
|
|||
|
||||
describe('Azure Mistral OCR with proxy', () => {
|
||||
beforeEach(() => {
|
||||
(jest.mocked(fs).readFileSync as jest.Mock).mockReturnValue(
|
||||
Buffer.from('mock-file-content'),
|
||||
);
|
||||
(readFileAsBuffer as jest.Mock).mockResolvedValue({
|
||||
content: Buffer.from('mock-file-content'),
|
||||
bytes: Buffer.from('mock-file-content').length,
|
||||
});
|
||||
});
|
||||
|
||||
it('should use proxy for Azure Mistral OCR requests', async () => {
|
||||
|
@ -2098,7 +2104,10 @@ describe('MistralOCR Service', () => {
|
|||
|
||||
describe('uploadAzureMistralOCR', () => {
|
||||
beforeEach(() => {
|
||||
(jest.mocked(fs).readFileSync as jest.Mock).mockReturnValue(Buffer.from('mock-file-content'));
|
||||
(readFileAsBuffer as jest.Mock).mockResolvedValue({
|
||||
content: Buffer.from('mock-file-content'),
|
||||
bytes: Buffer.from('mock-file-content').length,
|
||||
});
|
||||
// Reset the HttpsProxyAgent mock to its default implementation for Azure tests
|
||||
(HttpsProxyAgent as unknown as jest.Mock).mockImplementation((url) => ({ proxyUrl: url }));
|
||||
// Clean up any PROXY env var from previous tests
|
||||
|
@ -2172,7 +2181,9 @@ describe('MistralOCR Service', () => {
|
|||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(jest.mocked(fs).readFileSync).toHaveBeenCalledWith('/tmp/upload/azure-file.pdf');
|
||||
expect(readFileAsBuffer).toHaveBeenCalledWith('/tmp/upload/azure-file.pdf', {
|
||||
fileSize: undefined,
|
||||
});
|
||||
|
||||
// Verify OCR was called with base64 data URL
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
|
|
|
@ -22,6 +22,7 @@ import type {
|
|||
OCRImage,
|
||||
} from '~/types';
|
||||
import { logAxiosError, createAxiosInstance } from '~/utils/axios';
|
||||
import { readFileAsBuffer } from '~/utils/files';
|
||||
import { loadServiceKey } from '~/utils/key';
|
||||
|
||||
const axios = createAxiosInstance();
|
||||
|
@ -464,7 +465,9 @@ export const uploadAzureMistralOCR = async (
|
|||
const { apiKey, baseURL } = await loadAuthConfig(context);
|
||||
const model = getModelConfig(context.req.config?.ocr);
|
||||
|
||||
const buffer = fs.readFileSync(context.file.path);
|
||||
const { content: buffer } = await readFileAsBuffer(context.file.path, {
|
||||
fileSize: context.file.size,
|
||||
});
|
||||
const base64 = buffer.toString('base64');
|
||||
/** Uses actual mimetype of the file, 'image/jpeg' as fallback since it seems to be accepted regardless of mismatch */
|
||||
const base64Prefix = `data:${context.file.mimetype || 'image/jpeg'};base64,`;
|
||||
|
@ -691,7 +694,9 @@ export const uploadGoogleVertexMistralOCR = async (
|
|||
const { serviceAccount, accessToken } = await loadGoogleAuthConfig();
|
||||
const model = getModelConfig(context.req.config?.ocr);
|
||||
|
||||
const buffer = fs.readFileSync(context.file.path);
|
||||
const { content: buffer } = await readFileAsBuffer(context.file.path, {
|
||||
fileSize: context.file.size,
|
||||
});
|
||||
const base64 = buffer.toString('base64');
|
||||
const base64Prefix = `data:${context.file.mimetype || 'application/pdf'};base64,`;
|
||||
|
||||
|
|
|
@ -9,8 +9,6 @@ jest.mock('@librechat/data-schemas', () => ({
|
|||
},
|
||||
}));
|
||||
|
||||
import { parseTextNative, parseText } from './text';
|
||||
|
||||
jest.mock('fs', () => ({
|
||||
readFileSync: jest.fn(),
|
||||
createReadStream: jest.fn(),
|
||||
|
@ -36,10 +34,24 @@ jest.mock('form-data', () => {
|
|||
}));
|
||||
});
|
||||
|
||||
// Mock the utils module to avoid AWS SDK issues
|
||||
jest.mock('../utils', () => ({
|
||||
logAxiosError: jest.fn((args) => {
|
||||
if (typeof args === 'object' && args.message) {
|
||||
return args.message;
|
||||
}
|
||||
return 'Error';
|
||||
}),
|
||||
readFileAsString: jest.fn(),
|
||||
}));
|
||||
|
||||
// Now import everything after mocks are in place
|
||||
import { parseTextNative, parseText } from './text';
|
||||
import fs, { ReadStream } from 'fs';
|
||||
import axios from 'axios';
|
||||
import FormData from 'form-data';
|
||||
import { generateShortLivedToken } from '../crypto/jwt';
|
||||
import { readFileAsString } from '../utils';
|
||||
|
||||
const mockedFs = fs as jest.Mocked<typeof fs>;
|
||||
const mockedAxios = axios as jest.Mocked<typeof axios>;
|
||||
|
@ -47,6 +59,7 @@ const mockedFormData = FormData as jest.MockedClass<typeof FormData>;
|
|||
const mockedGenerateShortLivedToken = generateShortLivedToken as jest.MockedFunction<
|
||||
typeof generateShortLivedToken
|
||||
>;
|
||||
const mockedReadFileAsString = readFileAsString as jest.MockedFunction<typeof readFileAsString>;
|
||||
|
||||
describe('text', () => {
|
||||
const mockFile: Express.Multer.File = {
|
||||
|
@ -74,29 +87,32 @@ describe('text', () => {
|
|||
});
|
||||
|
||||
describe('parseTextNative', () => {
|
||||
it('should successfully parse a text file', () => {
|
||||
it('should successfully parse a text file', async () => {
|
||||
const mockText = 'Hello, world!';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
const result = parseTextNative(mockFile);
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
expect(mockedFs.readFileSync).toHaveBeenCalledWith('/tmp/test.txt', 'utf8');
|
||||
const result = await parseTextNative(mockFile);
|
||||
|
||||
expect(mockedReadFileAsString).toHaveBeenCalledWith('/tmp/test.txt', {
|
||||
fileSize: 100,
|
||||
});
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
});
|
||||
|
||||
it('should throw an error when file cannot be read', () => {
|
||||
it('should handle file read errors', async () => {
|
||||
const mockError = new Error('File not found');
|
||||
mockedFs.readFileSync.mockImplementation(() => {
|
||||
throw mockError;
|
||||
});
|
||||
mockedReadFileAsString.mockRejectedValue(mockError);
|
||||
|
||||
expect(() => parseTextNative(mockFile)).toThrow(
|
||||
'Failed to read file as text: Error: File not found',
|
||||
);
|
||||
await expect(parseTextNative(mockFile)).rejects.toThrow('File not found');
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -115,7 +131,12 @@ describe('text', () => {
|
|||
|
||||
it('should fall back to native parsing when RAG_API_URL is not defined', async () => {
|
||||
const mockText = 'Native parsing result';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
const result = await parseText({
|
||||
req: mockReq,
|
||||
|
@ -125,7 +146,7 @@ describe('text', () => {
|
|||
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
expect(mockedAxios.get).not.toHaveBeenCalled();
|
||||
|
@ -134,7 +155,12 @@ describe('text', () => {
|
|||
it('should fall back to native parsing when health check fails', async () => {
|
||||
process.env.RAG_API_URL = 'http://rag-api.test';
|
||||
const mockText = 'Native parsing result';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
mockedAxios.get.mockRejectedValue(new Error('Health check failed'));
|
||||
|
||||
|
@ -145,11 +171,11 @@ describe('text', () => {
|
|||
});
|
||||
|
||||
expect(mockedAxios.get).toHaveBeenCalledWith('http://rag-api.test/health', {
|
||||
timeout: 5000,
|
||||
timeout: 10000,
|
||||
});
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
});
|
||||
|
@ -157,7 +183,12 @@ describe('text', () => {
|
|||
it('should fall back to native parsing when health check returns non-OK status', async () => {
|
||||
process.env.RAG_API_URL = 'http://rag-api.test';
|
||||
const mockText = 'Native parsing result';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
mockedAxios.get.mockResolvedValue({
|
||||
status: 500,
|
||||
|
@ -172,7 +203,7 @@ describe('text', () => {
|
|||
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
});
|
||||
|
@ -207,7 +238,12 @@ describe('text', () => {
|
|||
it('should fall back to native parsing when RAG API response lacks text property', async () => {
|
||||
process.env.RAG_API_URL = 'http://rag-api.test';
|
||||
const mockText = 'Native parsing result';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
mockedAxios.get.mockResolvedValue({
|
||||
status: 200,
|
||||
|
@ -226,7 +262,7 @@ describe('text', () => {
|
|||
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
});
|
||||
|
@ -234,7 +270,12 @@ describe('text', () => {
|
|||
it('should fall back to native parsing when user is undefined', async () => {
|
||||
process.env.RAG_API_URL = 'http://rag-api.test';
|
||||
const mockText = 'Native parsing result';
|
||||
mockedFs.readFileSync.mockReturnValue(mockText);
|
||||
const mockBytes = Buffer.byteLength(mockText, 'utf8');
|
||||
|
||||
mockedReadFileAsString.mockResolvedValue({
|
||||
content: mockText,
|
||||
bytes: mockBytes,
|
||||
});
|
||||
|
||||
const result = await parseText({
|
||||
req: { user: undefined },
|
||||
|
@ -247,7 +288,7 @@ describe('text', () => {
|
|||
expect(mockedAxios.post).not.toHaveBeenCalled();
|
||||
expect(result).toEqual({
|
||||
text: mockText,
|
||||
bytes: Buffer.byteLength(mockText, 'utf8'),
|
||||
bytes: mockBytes,
|
||||
source: FileSources.text,
|
||||
});
|
||||
});
|
||||
|
|
|
@ -1,18 +1,19 @@
|
|||
import fs from 'fs';
|
||||
import axios from 'axios';
|
||||
import FormData from 'form-data';
|
||||
import { createReadStream } from 'fs';
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { FileSources } from 'librechat-data-provider';
|
||||
import type { Request as ServerRequest } from 'express';
|
||||
import { logAxiosError, readFileAsString } from '~/utils';
|
||||
import { generateShortLivedToken } from '~/crypto/jwt';
|
||||
|
||||
/**
|
||||
* Attempts to parse text using RAG API, falls back to native text parsing
|
||||
* @param {Object} params - The parameters object
|
||||
* @param {Express.Request} params.req - The Express request object
|
||||
* @param {Express.Multer.File} params.file - The uploaded file
|
||||
* @param {string} params.file_id - The file ID
|
||||
* @returns {Promise<{text: string, bytes: number, source: string}>}
|
||||
* @param params - The parameters object
|
||||
* @param params.req - The Express request object
|
||||
* @param params.file - The uploaded file
|
||||
* @param params.file_id - The file ID
|
||||
* @returns
|
||||
*/
|
||||
export async function parseText({
|
||||
req,
|
||||
|
@ -30,32 +31,33 @@ export async function parseText({
|
|||
return parseTextNative(file);
|
||||
}
|
||||
|
||||
if (!req.user?.id) {
|
||||
const userId = req.user?.id;
|
||||
if (!userId) {
|
||||
logger.debug('[parseText] No user ID provided, falling back to native text parsing');
|
||||
return parseTextNative(file);
|
||||
}
|
||||
|
||||
try {
|
||||
const healthResponse = await axios.get(`${process.env.RAG_API_URL}/health`, {
|
||||
timeout: 5000,
|
||||
timeout: 10000,
|
||||
});
|
||||
if (healthResponse?.statusText !== 'OK' && healthResponse?.status !== 200) {
|
||||
logger.debug('[parseText] RAG API health check failed, falling back to native parsing');
|
||||
return parseTextNative(file);
|
||||
}
|
||||
} catch (healthError) {
|
||||
logger.debug(
|
||||
'[parseText] RAG API health check failed, falling back to native parsing',
|
||||
healthError,
|
||||
);
|
||||
logAxiosError({
|
||||
message: '[parseText] RAG API health check failed, falling back to native parsing:',
|
||||
error: healthError,
|
||||
});
|
||||
return parseTextNative(file);
|
||||
}
|
||||
|
||||
try {
|
||||
const jwtToken = generateShortLivedToken(req.user.id);
|
||||
const jwtToken = generateShortLivedToken(userId);
|
||||
const formData = new FormData();
|
||||
formData.append('file_id', file_id);
|
||||
formData.append('file', fs.createReadStream(file.path));
|
||||
formData.append('file', createReadStream(file.path));
|
||||
|
||||
const formHeaders = formData.getHeaders();
|
||||
|
||||
|
@ -69,7 +71,7 @@ export async function parseText({
|
|||
});
|
||||
|
||||
const responseData = response.data;
|
||||
logger.debug('[parseText] Response from RAG API', responseData);
|
||||
logger.debug(`[parseText] RAG API completed successfully (${response.status})`);
|
||||
|
||||
if (!('text' in responseData)) {
|
||||
throw new Error('RAG API did not return parsed text');
|
||||
|
@ -81,7 +83,10 @@ export async function parseText({
|
|||
source: FileSources.text,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warn('[parseText] RAG API text parsing failed, falling back to native parsing', error);
|
||||
logAxiosError({
|
||||
message: '[parseText] RAG API text parsing failed, falling back to native parsing',
|
||||
error,
|
||||
});
|
||||
return parseTextNative(file);
|
||||
}
|
||||
}
|
||||
|
@ -89,25 +94,21 @@ export async function parseText({
|
|||
/**
|
||||
* Native JavaScript text parsing fallback
|
||||
* Simple text file reading - complex formats handled by RAG API
|
||||
* @param {Express.Multer.File} file - The uploaded file
|
||||
* @returns {{text: string, bytes: number, source: string}}
|
||||
* @param file - The uploaded file
|
||||
* @returns
|
||||
*/
|
||||
export function parseTextNative(file: Express.Multer.File): {
|
||||
export async function parseTextNative(file: Express.Multer.File): Promise<{
|
||||
text: string;
|
||||
bytes: number;
|
||||
source: string;
|
||||
} {
|
||||
try {
|
||||
const text = fs.readFileSync(file.path, 'utf8');
|
||||
const bytes = Buffer.byteLength(text, 'utf8');
|
||||
}> {
|
||||
const { content: text, bytes } = await readFileAsString(file.path, {
|
||||
fileSize: file.size,
|
||||
});
|
||||
|
||||
return {
|
||||
text,
|
||||
bytes,
|
||||
source: FileSources.text,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[parseTextNative] Failed to parse file:', error);
|
||||
throw new Error(`Failed to read file as text: ${error}`);
|
||||
}
|
||||
return {
|
||||
text,
|
||||
bytes,
|
||||
source: FileSources.text,
|
||||
};
|
||||
}
|
||||
|
|
414
packages/api/src/utils/__tests__/files.test.ts
Normal file
414
packages/api/src/utils/__tests__/files.test.ts
Normal file
|
@ -0,0 +1,414 @@
|
|||
import { createReadStream } from 'fs';
|
||||
import { readFile, stat } from 'fs/promises';
|
||||
import { Readable } from 'stream';
|
||||
import { readFileAsString, readFileAsBuffer, readJsonFile } from '../files';
|
||||
|
||||
jest.mock('fs');
|
||||
jest.mock('fs/promises');
|
||||
|
||||
describe('File utilities', () => {
|
||||
const mockFilePath = '/test/file.txt';
|
||||
const smallContent = 'Hello, World!';
|
||||
const largeContent = 'x'.repeat(11 * 1024 * 1024); // 11MB of 'x'
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('readFileAsString', () => {
|
||||
it('should read small files directly without streaming', async () => {
|
||||
const fileSize = Buffer.byteLength(smallContent);
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
(readFile as jest.Mock).mockResolvedValue(smallContent);
|
||||
|
||||
const result = await readFileAsString(mockFilePath);
|
||||
|
||||
expect(result).toEqual({
|
||||
content: smallContent,
|
||||
bytes: fileSize,
|
||||
});
|
||||
expect(stat).toHaveBeenCalledWith(mockFilePath);
|
||||
expect(readFile).toHaveBeenCalledWith(mockFilePath, 'utf8');
|
||||
expect(createReadStream).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should use provided fileSize to avoid stat call', async () => {
|
||||
const fileSize = Buffer.byteLength(smallContent);
|
||||
|
||||
(readFile as jest.Mock).mockResolvedValue(smallContent);
|
||||
|
||||
const result = await readFileAsString(mockFilePath, { fileSize });
|
||||
|
||||
expect(result).toEqual({
|
||||
content: smallContent,
|
||||
bytes: fileSize,
|
||||
});
|
||||
expect(stat).not.toHaveBeenCalled();
|
||||
expect(readFile).toHaveBeenCalledWith(mockFilePath, 'utf8');
|
||||
});
|
||||
|
||||
it('should stream large files', async () => {
|
||||
const fileSize = Buffer.byteLength(largeContent);
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
|
||||
// Create a mock readable stream
|
||||
const chunks = [
|
||||
largeContent.substring(0, 5000000),
|
||||
largeContent.substring(5000000, 10000000),
|
||||
largeContent.substring(10000000),
|
||||
];
|
||||
|
||||
const mockStream = new Readable({
|
||||
read() {
|
||||
if (chunks.length > 0) {
|
||||
this.push(chunks.shift());
|
||||
} else {
|
||||
this.push(null); // End stream
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
(createReadStream as jest.Mock).mockReturnValue(mockStream);
|
||||
|
||||
const result = await readFileAsString(mockFilePath);
|
||||
|
||||
expect(result).toEqual({
|
||||
content: largeContent,
|
||||
bytes: fileSize,
|
||||
});
|
||||
expect(stat).toHaveBeenCalledWith(mockFilePath);
|
||||
expect(createReadStream).toHaveBeenCalledWith(mockFilePath, {
|
||||
encoding: 'utf8',
|
||||
highWaterMark: 64 * 1024,
|
||||
});
|
||||
expect(readFile).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should use custom encoding', async () => {
|
||||
const fileSize = 100;
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
(readFile as jest.Mock).mockResolvedValue(smallContent);
|
||||
|
||||
await readFileAsString(mockFilePath, { encoding: 'latin1' });
|
||||
|
||||
expect(readFile).toHaveBeenCalledWith(mockFilePath, 'latin1');
|
||||
});
|
||||
|
||||
it('should respect custom stream threshold', async () => {
|
||||
const customThreshold = 1024; // 1KB
|
||||
const fileSize = 2048; // 2KB
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
|
||||
const mockStream = new Readable({
|
||||
read() {
|
||||
this.push('test content');
|
||||
this.push(null);
|
||||
},
|
||||
});
|
||||
|
||||
(createReadStream as jest.Mock).mockReturnValue(mockStream);
|
||||
|
||||
await readFileAsString(mockFilePath, { streamThreshold: customThreshold });
|
||||
|
||||
expect(createReadStream).toHaveBeenCalled();
|
||||
expect(readFile).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle empty files', async () => {
|
||||
const fileSize = 0;
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
(readFile as jest.Mock).mockResolvedValue('');
|
||||
|
||||
const result = await readFileAsString(mockFilePath);
|
||||
|
||||
expect(result).toEqual({
|
||||
content: '',
|
||||
bytes: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it('should propagate read errors', async () => {
|
||||
const error = new Error('File not found');
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: 100 });
|
||||
(readFile as jest.Mock).mockRejectedValue(error);
|
||||
|
||||
await expect(readFileAsString(mockFilePath)).rejects.toThrow('File not found');
|
||||
});
|
||||
|
||||
it('should propagate stat errors when fileSize not provided', async () => {
|
||||
const error = new Error('Permission denied');
|
||||
|
||||
(stat as jest.Mock).mockRejectedValue(error);
|
||||
|
||||
await expect(readFileAsString(mockFilePath)).rejects.toThrow('Permission denied');
|
||||
});
|
||||
|
||||
it('should propagate stream errors', async () => {
|
||||
const fileSize = 11 * 1024 * 1024; // 11MB
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
|
||||
const mockStream = new Readable({
|
||||
read() {
|
||||
this.emit('error', new Error('Stream error'));
|
||||
},
|
||||
});
|
||||
|
||||
(createReadStream as jest.Mock).mockReturnValue(mockStream);
|
||||
|
||||
await expect(readFileAsString(mockFilePath)).rejects.toThrow('Stream error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('readFileAsBuffer', () => {
|
||||
const smallBuffer = Buffer.from(smallContent);
|
||||
const largeBuffer = Buffer.from(largeContent);
|
||||
|
||||
it('should read small files directly without streaming', async () => {
|
||||
const fileSize = smallBuffer.length;
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
(readFile as jest.Mock).mockResolvedValue(smallBuffer);
|
||||
|
||||
const result = await readFileAsBuffer(mockFilePath);
|
||||
|
||||
expect(result).toEqual({
|
||||
content: smallBuffer,
|
||||
bytes: fileSize,
|
||||
});
|
||||
expect(stat).toHaveBeenCalledWith(mockFilePath);
|
||||
expect(readFile).toHaveBeenCalledWith(mockFilePath);
|
||||
expect(createReadStream).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should use provided fileSize to avoid stat call', async () => {
|
||||
const fileSize = smallBuffer.length;
|
||||
|
||||
(readFile as jest.Mock).mockResolvedValue(smallBuffer);
|
||||
|
||||
const result = await readFileAsBuffer(mockFilePath, { fileSize });
|
||||
|
||||
expect(result).toEqual({
|
||||
content: smallBuffer,
|
||||
bytes: fileSize,
|
||||
});
|
||||
expect(stat).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should stream large files', async () => {
|
||||
const fileSize = largeBuffer.length;
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
|
||||
// Split large buffer into chunks
|
||||
const chunk1 = largeBuffer.slice(0, 5000000);
|
||||
const chunk2 = largeBuffer.slice(5000000, 10000000);
|
||||
const chunk3 = largeBuffer.slice(10000000);
|
||||
|
||||
const chunks = [chunk1, chunk2, chunk3];
|
||||
|
||||
const mockStream = new Readable({
|
||||
read() {
|
||||
if (chunks.length > 0) {
|
||||
this.push(chunks.shift());
|
||||
} else {
|
||||
this.push(null);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
(createReadStream as jest.Mock).mockReturnValue(mockStream);
|
||||
|
||||
const result = await readFileAsBuffer(mockFilePath);
|
||||
|
||||
expect(result.bytes).toBe(fileSize);
|
||||
expect(Buffer.compare(result.content, largeBuffer)).toBe(0);
|
||||
expect(createReadStream).toHaveBeenCalledWith(mockFilePath, {
|
||||
highWaterMark: 64 * 1024,
|
||||
});
|
||||
expect(readFile).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should respect custom highWaterMark', async () => {
|
||||
const fileSize = 11 * 1024 * 1024; // 11MB
|
||||
const customHighWaterMark = 128 * 1024; // 128KB
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
|
||||
const mockStream = new Readable({
|
||||
read() {
|
||||
this.push(Buffer.from('test'));
|
||||
this.push(null);
|
||||
},
|
||||
});
|
||||
|
||||
(createReadStream as jest.Mock).mockReturnValue(mockStream);
|
||||
|
||||
await readFileAsBuffer(mockFilePath, { highWaterMark: customHighWaterMark });
|
||||
|
||||
expect(createReadStream).toHaveBeenCalledWith(mockFilePath, {
|
||||
highWaterMark: customHighWaterMark,
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle empty buffer files', async () => {
|
||||
const emptyBuffer = Buffer.alloc(0);
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: 0 });
|
||||
(readFile as jest.Mock).mockResolvedValue(emptyBuffer);
|
||||
|
||||
const result = await readFileAsBuffer(mockFilePath);
|
||||
|
||||
expect(result).toEqual({
|
||||
content: emptyBuffer,
|
||||
bytes: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it('should propagate errors', async () => {
|
||||
const error = new Error('Access denied');
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: 100 });
|
||||
(readFile as jest.Mock).mockRejectedValue(error);
|
||||
|
||||
await expect(readFileAsBuffer(mockFilePath)).rejects.toThrow('Access denied');
|
||||
});
|
||||
});
|
||||
|
||||
describe('readJsonFile', () => {
|
||||
const validJson = { name: 'test', value: 123, nested: { key: 'value' } };
|
||||
const jsonString = JSON.stringify(validJson);
|
||||
|
||||
it('should parse valid JSON files', async () => {
|
||||
(stat as jest.Mock).mockResolvedValue({ size: jsonString.length });
|
||||
(readFile as jest.Mock).mockResolvedValue(jsonString);
|
||||
|
||||
const result = await readJsonFile(mockFilePath);
|
||||
|
||||
expect(result).toEqual(validJson);
|
||||
expect(readFile).toHaveBeenCalledWith(mockFilePath, 'utf8');
|
||||
});
|
||||
|
||||
it('should parse JSON with provided fileSize', async () => {
|
||||
const fileSize = jsonString.length;
|
||||
|
||||
(readFile as jest.Mock).mockResolvedValue(jsonString);
|
||||
|
||||
const result = await readJsonFile(mockFilePath, { fileSize });
|
||||
|
||||
expect(result).toEqual(validJson);
|
||||
expect(stat).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle JSON arrays', async () => {
|
||||
const jsonArray = [1, 2, 3, { key: 'value' }];
|
||||
const arrayString = JSON.stringify(jsonArray);
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: arrayString.length });
|
||||
(readFile as jest.Mock).mockResolvedValue(arrayString);
|
||||
|
||||
const result = await readJsonFile(mockFilePath);
|
||||
|
||||
expect(result).toEqual(jsonArray);
|
||||
});
|
||||
|
||||
it('should throw on invalid JSON', async () => {
|
||||
const invalidJson = '{ invalid json }';
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: invalidJson.length });
|
||||
(readFile as jest.Mock).mockResolvedValue(invalidJson);
|
||||
|
||||
await expect(readJsonFile(mockFilePath)).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('should throw on empty file', async () => {
|
||||
(stat as jest.Mock).mockResolvedValue({ size: 0 });
|
||||
(readFile as jest.Mock).mockResolvedValue('');
|
||||
|
||||
await expect(readJsonFile(mockFilePath)).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('should handle large JSON files with streaming', async () => {
|
||||
const largeJson = { data: 'x'.repeat(11 * 1024 * 1024) }; // >10MB
|
||||
const largeJsonString = JSON.stringify(largeJson);
|
||||
const fileSize = largeJsonString.length;
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
|
||||
// Create chunks for streaming
|
||||
const chunks: string[] = [];
|
||||
let offset = 0;
|
||||
const chunkSize = 5 * 1024 * 1024; // 5MB chunks
|
||||
|
||||
while (offset < largeJsonString.length) {
|
||||
chunks.push(largeJsonString.slice(offset, offset + chunkSize));
|
||||
offset += chunkSize;
|
||||
}
|
||||
|
||||
const mockStream = new Readable({
|
||||
read() {
|
||||
if (chunks.length > 0) {
|
||||
this.push(chunks.shift());
|
||||
} else {
|
||||
this.push(null);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
(createReadStream as jest.Mock).mockReturnValue(mockStream);
|
||||
|
||||
const result = await readJsonFile(mockFilePath);
|
||||
|
||||
expect(result).toEqual(largeJson);
|
||||
expect(createReadStream).toHaveBeenCalled();
|
||||
expect(readFile).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should use custom stream threshold', async () => {
|
||||
const customThreshold = 100;
|
||||
const json = { test: 'x'.repeat(200) };
|
||||
const jsonStr = JSON.stringify(json);
|
||||
const fileSize = jsonStr.length;
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
|
||||
|
||||
const mockStream = new Readable({
|
||||
read() {
|
||||
this.push(jsonStr);
|
||||
this.push(null);
|
||||
},
|
||||
});
|
||||
|
||||
(createReadStream as jest.Mock).mockReturnValue(mockStream);
|
||||
|
||||
await readJsonFile(mockFilePath, { streamThreshold: customThreshold });
|
||||
|
||||
expect(createReadStream).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should preserve type with generics', async () => {
|
||||
interface TestType {
|
||||
id: number;
|
||||
name: string;
|
||||
}
|
||||
|
||||
const typedJson: TestType = { id: 1, name: 'test' };
|
||||
const jsonString = JSON.stringify(typedJson);
|
||||
|
||||
(stat as jest.Mock).mockResolvedValue({ size: jsonString.length });
|
||||
(readFile as jest.Mock).mockResolvedValue(jsonString);
|
||||
|
||||
const result = await readJsonFile<TestType>(mockFilePath);
|
||||
|
||||
expect(result).toEqual(typedJson);
|
||||
expect(result.id).toBe(1);
|
||||
expect(result.name).toBe('test');
|
||||
});
|
||||
});
|
||||
});
|
|
@ -9,12 +9,25 @@ import type { AxiosInstance, AxiosProxyConfig, AxiosError } from 'axios';
|
|||
* @param options.error - The Axios error object.
|
||||
* @returns The log message.
|
||||
*/
|
||||
export const logAxiosError = ({ message, error }: { message: string; error: AxiosError }) => {
|
||||
export const logAxiosError = ({
|
||||
message,
|
||||
error,
|
||||
}: {
|
||||
message: string;
|
||||
error: AxiosError | Error | unknown;
|
||||
}) => {
|
||||
let logMessage = message;
|
||||
try {
|
||||
const stack = error.stack || 'No stack trace available';
|
||||
const stack =
|
||||
error != null
|
||||
? (error as Error | AxiosError)?.stack || 'No stack trace available'
|
||||
: 'No stack trace available';
|
||||
const errorMessage =
|
||||
error != null
|
||||
? (error as Error | AxiosError)?.message || 'No error message available'
|
||||
: 'No error message available';
|
||||
|
||||
if (error.response?.status) {
|
||||
if (axios.isAxiosError(error) && error.response && error.response?.status) {
|
||||
const { status, headers, data } = error.response;
|
||||
logMessage = `${message} The server responded with status ${status}: ${error.message}`;
|
||||
logger.error(logMessage, {
|
||||
|
@ -23,18 +36,18 @@ export const logAxiosError = ({ message, error }: { message: string; error: Axio
|
|||
data,
|
||||
stack,
|
||||
});
|
||||
} else if (error.request) {
|
||||
} else if (axios.isAxiosError(error) && error.request) {
|
||||
const { method, url } = error.config || {};
|
||||
logMessage = `${message} No response received for ${method ? method.toUpperCase() : ''} ${url || ''}: ${error.message}`;
|
||||
logger.error(logMessage, {
|
||||
requestInfo: { method, url },
|
||||
stack,
|
||||
});
|
||||
} else if (error?.message?.includes("Cannot read properties of undefined (reading 'status')")) {
|
||||
logMessage = `${message} It appears the request timed out or was unsuccessful: ${error.message}`;
|
||||
} else if (errorMessage?.includes("Cannot read properties of undefined (reading 'status')")) {
|
||||
logMessage = `${message} It appears the request timed out or was unsuccessful: ${errorMessage}`;
|
||||
logger.error(logMessage, { stack });
|
||||
} else {
|
||||
logMessage = `${message} An error occurred while setting up the request: ${error.message}`;
|
||||
logMessage = `${message} An error occurred while setting up the request: ${errorMessage}`;
|
||||
logger.error(logMessage, { stack });
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import path from 'path';
|
||||
import crypto from 'node:crypto';
|
||||
import { createReadStream } from 'fs';
|
||||
import { readFile, stat } from 'fs/promises';
|
||||
|
||||
/**
|
||||
* Sanitize a filename by removing any directory components, replacing non-alphanumeric characters
|
||||
|
@ -31,3 +33,122 @@ export function sanitizeFilename(inputName: string): string {
|
|||
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for reading files
|
||||
*/
|
||||
export interface ReadFileOptions {
|
||||
encoding?: BufferEncoding;
|
||||
/** Size threshold in bytes. Files larger than this will be streamed. Default: 10MB */
|
||||
streamThreshold?: number;
|
||||
/** Size of chunks when streaming. Default: 64KB */
|
||||
highWaterMark?: number;
|
||||
/** File size in bytes if known (e.g. from multer). Avoids extra stat() call. */
|
||||
fileSize?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result from reading a file
|
||||
*/
|
||||
export interface ReadFileResult<T> {
|
||||
content: T;
|
||||
bytes: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a file asynchronously. Uses streaming for large files to avoid memory issues.
|
||||
*
|
||||
* @param filePath - Path to the file to read
|
||||
* @param options - Options for reading the file
|
||||
* @returns Promise resolving to the file contents and size
|
||||
* @throws Error if the file cannot be read
|
||||
*/
|
||||
export async function readFileAsString(
|
||||
filePath: string,
|
||||
options: ReadFileOptions = {},
|
||||
): Promise<ReadFileResult<string>> {
|
||||
const {
|
||||
encoding = 'utf8',
|
||||
streamThreshold = 10 * 1024 * 1024, // 10MB
|
||||
highWaterMark = 64 * 1024, // 64KB
|
||||
fileSize,
|
||||
} = options;
|
||||
|
||||
// Get file size if not provided
|
||||
const bytes = fileSize ?? (await stat(filePath)).size;
|
||||
|
||||
// For large files, use streaming to avoid memory issues
|
||||
if (bytes > streamThreshold) {
|
||||
const chunks: string[] = [];
|
||||
const stream = createReadStream(filePath, {
|
||||
encoding,
|
||||
highWaterMark,
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
chunks.push(chunk as string);
|
||||
}
|
||||
|
||||
return { content: chunks.join(''), bytes };
|
||||
}
|
||||
|
||||
// For smaller files, read directly
|
||||
const content = await readFile(filePath, encoding);
|
||||
return { content, bytes };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a file as a Buffer asynchronously. Uses streaming for large files.
|
||||
*
|
||||
* @param filePath - Path to the file to read
|
||||
* @param options - Options for reading the file
|
||||
* @returns Promise resolving to the file contents and size
|
||||
* @throws Error if the file cannot be read
|
||||
*/
|
||||
export async function readFileAsBuffer(
|
||||
filePath: string,
|
||||
options: Omit<ReadFileOptions, 'encoding'> = {},
|
||||
): Promise<ReadFileResult<Buffer>> {
|
||||
const {
|
||||
streamThreshold = 10 * 1024 * 1024, // 10MB
|
||||
highWaterMark = 64 * 1024, // 64KB
|
||||
fileSize,
|
||||
} = options;
|
||||
|
||||
// Get file size if not provided
|
||||
const bytes = fileSize ?? (await stat(filePath)).size;
|
||||
|
||||
// For large files, use streaming to avoid memory issues
|
||||
if (bytes > streamThreshold) {
|
||||
const chunks: Buffer[] = [];
|
||||
const stream = createReadStream(filePath, {
|
||||
highWaterMark,
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
chunks.push(chunk as Buffer);
|
||||
}
|
||||
|
||||
return { content: Buffer.concat(chunks), bytes };
|
||||
}
|
||||
|
||||
// For smaller files, read directly
|
||||
const content = await readFile(filePath);
|
||||
return { content, bytes };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a JSON file asynchronously
|
||||
*
|
||||
* @param filePath - Path to the JSON file to read
|
||||
* @param options - Options for reading the file
|
||||
* @returns Promise resolving to the parsed JSON object
|
||||
* @throws Error if the file cannot be read or parsed
|
||||
*/
|
||||
export async function readJsonFile<T = unknown>(
|
||||
filePath: string,
|
||||
options: Omit<ReadFileOptions, 'encoding'> = {},
|
||||
): Promise<T> {
|
||||
const { content } = await readFileAsString(filePath, { ...options, encoding: 'utf8' });
|
||||
return JSON.parse(content);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import axios from 'axios';
|
||||
import { readFileAsString } from './files';
|
||||
import { loadServiceKey } from './key';
|
||||
|
||||
jest.mock('fs');
|
||||
|
@ -11,6 +11,10 @@ jest.mock('@librechat/data-schemas', () => ({
|
|||
},
|
||||
}));
|
||||
|
||||
jest.mock('./files', () => ({
|
||||
readFileAsString: jest.fn(),
|
||||
}));
|
||||
|
||||
describe('loadServiceKey', () => {
|
||||
const mockServiceKey = {
|
||||
type: 'service_account',
|
||||
|
@ -49,10 +53,13 @@ describe('loadServiceKey', () => {
|
|||
|
||||
it('should load from file path', async () => {
|
||||
const filePath = '/path/to/service-key.json';
|
||||
(fs.readFileSync as jest.Mock).mockReturnValue(JSON.stringify(mockServiceKey));
|
||||
(readFileAsString as jest.Mock).mockResolvedValue({
|
||||
content: JSON.stringify(mockServiceKey),
|
||||
bytes: JSON.stringify(mockServiceKey).length,
|
||||
});
|
||||
|
||||
const result = await loadServiceKey(filePath);
|
||||
expect(fs.readFileSync).toHaveBeenCalledWith(path.resolve(filePath), 'utf8');
|
||||
expect(readFileAsString).toHaveBeenCalledWith(path.resolve(filePath));
|
||||
expect(result).toEqual(mockServiceKey);
|
||||
});
|
||||
|
||||
|
@ -73,9 +80,7 @@ describe('loadServiceKey', () => {
|
|||
|
||||
it('should handle file read errors', async () => {
|
||||
const filePath = '/path/to/nonexistent.json';
|
||||
(fs.readFileSync as jest.Mock).mockImplementation(() => {
|
||||
throw new Error('File not found');
|
||||
});
|
||||
(readFileAsString as jest.Mock).mockRejectedValue(new Error('File not found'));
|
||||
|
||||
const result = await loadServiceKey(filePath);
|
||||
expect(result).toBeNull();
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import axios from 'axios';
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { readFileAsString } from './files';
|
||||
|
||||
export interface GoogleServiceKey {
|
||||
type?: string;
|
||||
|
@ -63,7 +63,7 @@ export async function loadServiceKey(keyPath: string): Promise<GoogleServiceKey
|
|||
// It's a file path
|
||||
try {
|
||||
const absolutePath = path.isAbsolute(keyPath) ? keyPath : path.resolve(keyPath);
|
||||
const fileContent = fs.readFileSync(absolutePath, 'utf8');
|
||||
const { content: fileContent } = await readFileAsString(absolutePath);
|
||||
serviceKey = JSON.parse(fileContent);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to load service key from file: ${keyPath}`, error);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue