LibreChat/packages/api/src/files/text.ts
rossbg 959984f959
⏱️ fix: Increase RAG API Text Parsing Timeout (#10562)
* fix: increase RAG API text parsing timeout for large files

* ci: Update text.spec.ts

---------

Co-authored-by: Rosen Simov <rosen.simov@endurosat.com>
Co-authored-by: Danny Avila <danny@librechat.ai>
2025-11-25 14:54:53 -05:00

112 lines
3.2 KiB
TypeScript

import axios from 'axios';
import FormData from 'form-data';
import { createReadStream } from 'fs';
import { logger } from '@librechat/data-schemas';
import { FileSources } from 'librechat-data-provider';
import type { ServerRequest } from '~/types';
import { logAxiosError, readFileAsString } from '~/utils';
import { generateShortLivedToken } from '~/crypto/jwt';
/**
* Attempts to parse text using RAG API, falls back to native text parsing
* @param params - The parameters object
* @param params.req - The Express request object
* @param params.file - The uploaded file
* @param params.file_id - The file ID
* @returns
*/
export async function parseText({
req,
file,
file_id,
}: {
req: ServerRequest;
file: Express.Multer.File;
file_id: string;
}): Promise<{ text: string; bytes: number; source: string }> {
if (!process.env.RAG_API_URL) {
logger.debug('[parseText] RAG_API_URL not defined, falling back to native text parsing');
return parseTextNative(file);
}
const userId = req.user?.id;
if (!userId) {
logger.debug('[parseText] No user ID provided, falling back to native text parsing');
return parseTextNative(file);
}
try {
const healthResponse = await axios.get(`${process.env.RAG_API_URL}/health`, {
timeout: 10000,
});
if (healthResponse?.statusText !== 'OK' && healthResponse?.status !== 200) {
logger.debug('[parseText] RAG API health check failed, falling back to native parsing');
return parseTextNative(file);
}
} catch (healthError) {
logAxiosError({
message: '[parseText] RAG API health check failed, falling back to native parsing:',
error: healthError,
});
return parseTextNative(file);
}
try {
const jwtToken = generateShortLivedToken(userId);
const formData = new FormData();
formData.append('file_id', file_id);
formData.append('file', createReadStream(file.path));
const formHeaders = formData.getHeaders();
const response = await axios.post(`${process.env.RAG_API_URL}/text`, formData, {
headers: {
Authorization: `Bearer ${jwtToken}`,
accept: 'application/json',
...formHeaders,
},
timeout: 300000,
});
const responseData = response.data;
logger.debug(`[parseText] RAG API completed successfully (${response.status})`);
if (!('text' in responseData)) {
throw new Error('RAG API did not return parsed text');
}
return {
text: responseData.text,
bytes: Buffer.byteLength(responseData.text, 'utf8'),
source: FileSources.text,
};
} catch (error) {
logAxiosError({
message: '[parseText] RAG API text parsing failed, falling back to native parsing',
error,
});
return parseTextNative(file);
}
}
/**
* Native JavaScript text parsing fallback
* Simple text file reading - complex formats handled by RAG API
* @param file - The uploaded file
* @returns
*/
export async function parseTextNative(file: Express.Multer.File): Promise<{
text: string;
bytes: number;
source: string;
}> {
const { content: text, bytes } = await readFileAsString(file.path, {
fileSize: file.size,
});
return {
text,
bytes,
source: FileSources.text,
};
}