mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
* 🪶 feat: Add Support for Uploading Plaintext Files feat: delineate between OCR and text handling in fileConfig field of config file - also adds support for passing in mimetypes as just plain file extensions feat: add showLabel bool to support future synthetic component DynamicDropdownInput feat: add new combination dropdown-input component in params panel to support file type token limits refactor: move hovercard to side to align with other hovercards chore: clean up autogenerated comments feat: add delineation to file upload path between text and ocr configured filetypes feat: add token limit checks during file upload refactor: move textParsing out of ocrEnabled logic refactor: clean up types for filetype config refactor: finish decoupling DynamicDropdownInput from fileTokenLimits fix: move image token cost function into file to fix circular dependency causing unittest to fail and remove unused var for linter chore: remove out of scope code following review refactor: make fileTokenLimit conform to existing styles chore: remove unused localization string chore: undo changes to DynamicInput and other strays feat: add fileTokenLimit to all provider config panels fix: move textParsing back into ocr tool_resource block for now so that it doesn't interfere with other upload types * 📤 feat: Add RAG API Endpoint Support for Text Parsing (#8849) * feat: implement RAG API integration for text parsing with fallback to native parsing * chore: remove TODO now that placeholder and fllback are implemented * ✈️ refactor: Migrate Text Parsing to TS (#8892) * refactor: move generateShortLivedToken to packages/api * refactor: move textParsing logic into packages/api * refactor: reduce nesting and dry code with createTextFile * fix: add proper source handling * fix: mock new parseText and parseTextNative functions in jest file * ci: add test coverage for textParser * 💬 feat: Add Audio File Support to Upload as Text (#8893) * feat: add STT support for Upload as Text * refactor: move processAudioFile to packages/api * refactor: move textParsing from utils to files * fix: remove audio/mp3 from unsupported mimetypes test since it is now supported * ✂️ feat: Configurable File Token Limits and Truncation (#8911) * feat: add configurable fileTokenLimit default value * fix: add stt to fileConfig merge logic * fix: add fileTokenLimit to mergeFileConfig logic so configurable value is actually respected from yaml * feat: add token limiting to parsed text files * fix: add extraction logic and update tests so fileTokenLimit isnt sent to LLM providers * fix: address comments * refactor: rename textTokenLimiter.ts to text.ts * chore: update form-data package to address CVE-2025-7783 and update package-lock * feat: use default supported mime types for ocr on frontend file validation * fix: should be using logger.debug not console.debug * fix: mock existsSync in text.spec.ts * fix: mock logger rather than every one of its function calls * fix: reorganize imports and streamline file upload processing logic * refactor: update createTextFile function to use destructured parameters and improve readability * chore: update file validation to use EToolResources for improved type safety * chore: update import path for types in audio processing module * fix: update file configuration access and replace console.debug with logger.debug for improved logging --------- Co-authored-by: Dustin Healy <dustinhealy1@gmail.com> Co-authored-by: Dustin Healy <54083382+dustinhealy@users.noreply.github.com>
160 lines
4.2 KiB
JavaScript
160 lines
4.2 KiB
JavaScript
const axios = require('axios');
|
|
const { logger } = require('@librechat/data-schemas');
|
|
const { isEnabled, generateShortLivedToken } = require('@librechat/api');
|
|
|
|
const footer = `Use the context as your learned knowledge to better answer the user.
|
|
|
|
In your response, remember to follow these guidelines:
|
|
- If you don't know the answer, simply say that you don't know.
|
|
- If you are unsure how to answer, ask for clarification.
|
|
- Avoid mentioning that you obtained the information from the context.
|
|
`;
|
|
|
|
function createContextHandlers(req, userMessageContent) {
|
|
if (!process.env.RAG_API_URL) {
|
|
return;
|
|
}
|
|
|
|
const queryPromises = [];
|
|
const processedFiles = [];
|
|
const processedIds = new Set();
|
|
const jwtToken = generateShortLivedToken(req.user.id);
|
|
const useFullContext = isEnabled(process.env.RAG_USE_FULL_CONTEXT);
|
|
|
|
const query = async (file) => {
|
|
if (useFullContext) {
|
|
return axios.get(`${process.env.RAG_API_URL}/documents/${file.file_id}/context`, {
|
|
headers: {
|
|
Authorization: `Bearer ${jwtToken}`,
|
|
},
|
|
});
|
|
}
|
|
|
|
return axios.post(
|
|
`${process.env.RAG_API_URL}/query`,
|
|
{
|
|
file_id: file.file_id,
|
|
query: userMessageContent,
|
|
k: 4,
|
|
},
|
|
{
|
|
headers: {
|
|
Authorization: `Bearer ${jwtToken}`,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
},
|
|
);
|
|
};
|
|
|
|
const processFile = async (file) => {
|
|
if (file.embedded && !processedIds.has(file.file_id)) {
|
|
try {
|
|
const promise = query(file);
|
|
queryPromises.push(promise);
|
|
processedFiles.push(file);
|
|
processedIds.add(file.file_id);
|
|
} catch (error) {
|
|
logger.error(`Error processing file ${file.filename}:`, error);
|
|
}
|
|
}
|
|
};
|
|
|
|
const createContext = async () => {
|
|
try {
|
|
if (!queryPromises.length || !processedFiles.length) {
|
|
return '';
|
|
}
|
|
|
|
const oneFile = processedFiles.length === 1;
|
|
const header = `The user has attached ${oneFile ? 'a' : processedFiles.length} file${
|
|
!oneFile ? 's' : ''
|
|
} to the conversation:`;
|
|
|
|
const files = `${
|
|
oneFile
|
|
? ''
|
|
: `
|
|
<files>`
|
|
}${processedFiles
|
|
.map(
|
|
(file) => `
|
|
<file>
|
|
<filename>${file.filename}</filename>
|
|
<type>${file.type}</type>
|
|
</file>`,
|
|
)
|
|
.join('')}${
|
|
oneFile
|
|
? ''
|
|
: `
|
|
</files>`
|
|
}`;
|
|
|
|
const resolvedQueries = await Promise.all(queryPromises);
|
|
|
|
const context =
|
|
resolvedQueries.length === 0
|
|
? '\n\tThe semantic search did not return any results.'
|
|
: resolvedQueries
|
|
.map((queryResult, index) => {
|
|
const file = processedFiles[index];
|
|
let contextItems = queryResult.data;
|
|
|
|
const generateContext = (currentContext) =>
|
|
`
|
|
<file>
|
|
<filename>${file.filename}</filename>
|
|
<context>${currentContext}
|
|
</context>
|
|
</file>`;
|
|
|
|
if (useFullContext) {
|
|
return generateContext(`\n${contextItems}`);
|
|
}
|
|
|
|
contextItems = queryResult.data
|
|
.map((item) => {
|
|
const pageContent = item[0].page_content;
|
|
return `
|
|
<contextItem>
|
|
<![CDATA[${pageContent?.trim()}]]>
|
|
</contextItem>`;
|
|
})
|
|
.join('');
|
|
|
|
return generateContext(contextItems);
|
|
})
|
|
.join('');
|
|
|
|
if (useFullContext) {
|
|
const prompt = `${header}
|
|
${context}
|
|
${footer}`;
|
|
|
|
return prompt;
|
|
}
|
|
|
|
const prompt = `${header}
|
|
${files}
|
|
|
|
A semantic search was executed with the user's message as the query, retrieving the following context inside <context></context> XML tags.
|
|
|
|
<context>${context}
|
|
</context>
|
|
|
|
${footer}`;
|
|
|
|
return prompt;
|
|
} catch (error) {
|
|
logger.error('Error creating context:', error);
|
|
throw error;
|
|
}
|
|
};
|
|
|
|
return {
|
|
processFile,
|
|
createContext,
|
|
};
|
|
}
|
|
|
|
module.exports = createContextHandlers;
|