LibreChat/api/server/utils/handleText.js
Danny Avila ded3cd8876
🔍 feat: Mistral OCR API / Upload Files as Text (#6274)
* refactor: move `loadAuthValues` to `~/services/Tools/credentials`

* feat: add createAxiosInstance function to configure axios with proxy support

* WIP: First pass mistral ocr

* refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic

* refactor: improve document formatting in encodeAndFormat function

* refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config)

* fix: update getFiles call to include files with `text` property as well

* refactor: move file handling to `initializeAgentOptions`

* refactor: enhance addImageURLs method to handle OCR text and improve message formatting

* refactor: update message formatting to handle OCR text in various content types

* refactor: remove unused resendFiles property from compactAgentsSchema

* fix: add error handling for Mistral OCR document upload and logging

* refactor: integrate OCR capability into file upload options and configuration

* refactor: skip processing for text source files in delete request, as they are directly tied to database

* feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling

* fix: source icon styling

* wip: first pass, frontend file context agent resources

* refactor: add hover card with contextual information for File Context (OCR) in FileContext component

* feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization

* feat: implement OCR config; fix: agent resource deletion for ocr files

* feat: enhance agent initialization by adding OCR capability check in resource priming

* ci: fix `~/config` module mock

* ci: add OCR property expectation in AppService tests

* refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed

* ci: add unit test to ensure environment variable references are not parsed in OCR config

* refactor: disable base64 image inclusion in OCR request

* refactor: enhance OCR configuration handling by validating environment variables and providing defaults

* refactor: use file stream from disk for mistral ocr api
2025-03-10 17:23:46 -04:00

272 lines
7.4 KiB
JavaScript

const path = require('path');
const crypto = require('crypto');
const {
Capabilities,
EModelEndpoint,
isAgentsEndpoint,
AgentCapabilities,
isAssistantsEndpoint,
defaultRetrievalModels,
defaultAssistantsVersion,
} = require('librechat-data-provider');
const { Providers } = require('@librechat/agents');
const partialRight = require('lodash/partialRight');
const { sendMessage } = require('./streamResponse');
/** Helper function to escape special characters in regex
* @param {string} string - The string to escape.
* @returns {string} The escaped string.
*/
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
const addSpaceIfNeeded = (text) => (text.length > 0 && !text.endsWith(' ') ? text + ' ' : text);
const base = { message: true, initial: true };
const createOnProgress = (
{ generation = '', onProgress: _onProgress } = {
generation: '',
onProgress: null,
},
) => {
let i = 0;
let tokens = addSpaceIfNeeded(generation);
const basePayload = Object.assign({}, base, { text: tokens || '' });
const progressCallback = (chunk, { res, ...rest }) => {
basePayload.text = basePayload.text + chunk;
const payload = Object.assign({}, basePayload, rest);
sendMessage(res, payload);
if (_onProgress) {
_onProgress(payload);
}
if (i === 0) {
basePayload.initial = false;
}
i++;
};
const sendIntermediateMessage = (res, payload, extraTokens = '') => {
basePayload.text = basePayload.text + extraTokens;
const message = Object.assign({}, basePayload, payload);
sendMessage(res, message);
if (i === 0) {
basePayload.initial = false;
}
i++;
};
const onProgress = (opts) => {
return partialRight(progressCallback, opts);
};
const getPartialText = () => {
return basePayload.text;
};
return { onProgress, getPartialText, sendIntermediateMessage };
};
const handleText = async (response) => {
let { text } = response;
response.text = text;
return text;
};
const isObject = (item) => item && typeof item === 'object' && !Array.isArray(item);
const getString = (input) => (isObject(input) ? JSON.stringify(input) : input);
function formatSteps(steps) {
let output = '';
for (let i = 0; i < steps.length; i++) {
const step = steps[i];
const actionInput = getString(step.action.toolInput);
const observation = step.observation;
if (actionInput === 'N/A' || observation?.trim()?.length === 0) {
continue;
}
output += `Input: ${actionInput}\nOutput: ${getString(observation)}`;
if (steps.length > 1 && i !== steps.length - 1) {
output += '\n---\n';
}
}
return output;
}
function formatAction(action) {
const formattedAction = {
plugin: action.tool,
input: getString(action.toolInput),
thought: action.log.includes('Thought: ')
? action.log.split('\n')[0].replace('Thought: ', '')
: action.log.split('\n')[0],
};
formattedAction.thought = getString(formattedAction.thought);
if (action.tool.toLowerCase() === 'self-reflection' || formattedAction.plugin === 'N/A') {
formattedAction.inputStr = `{\n\tthought: ${formattedAction.input}${
!formattedAction.thought.includes(formattedAction.input)
? ' - ' + formattedAction.thought
: ''
}\n}`;
formattedAction.inputStr = formattedAction.inputStr.replace('N/A - ', '');
} else {
const hasThought = formattedAction.thought.length > 0;
const thought = hasThought ? `\n\tthought: ${formattedAction.thought}` : '';
formattedAction.inputStr = `{\n\tplugin: ${formattedAction.plugin}\n\tinput: ${formattedAction.input}\n${thought}}`;
}
return formattedAction;
}
/**
* Checks if the given value is truthy by being either the boolean `true` or a string
* that case-insensitively matches 'true'.
*
* @function
* @param {string|boolean|null|undefined} value - The value to check.
* @returns {boolean} Returns `true` if the value is the boolean `true` or a case-insensitive
* match for the string 'true', otherwise returns `false`.
* @example
*
* isEnabled("True"); // returns true
* isEnabled("TRUE"); // returns true
* isEnabled(true); // returns true
* isEnabled("false"); // returns false
* isEnabled(false); // returns false
* isEnabled(null); // returns false
* isEnabled(); // returns false
*/
function isEnabled(value) {
if (typeof value === 'boolean') {
return value;
}
if (typeof value === 'string') {
return value.toLowerCase().trim() === 'true';
}
return false;
}
/**
* Checks if the provided value is 'user_provided'.
*
* @param {string} value - The value to check.
* @returns {boolean} - Returns true if the value is 'user_provided', otherwise false.
*/
const isUserProvided = (value) => value === 'user_provided';
/**
* Generate the configuration for a given key and base URL.
* @param {string} key
* @param {string} [baseURL]
* @param {string} [endpoint]
* @returns {boolean | { userProvide: boolean, userProvideURL?: boolean }}
*/
function generateConfig(key, baseURL, endpoint) {
if (!key) {
return false;
}
/** @type {{ userProvide: boolean, userProvideURL?: boolean }} */
const config = { userProvide: isUserProvided(key) };
if (baseURL) {
config.userProvideURL = isUserProvided(baseURL);
}
const assistants = isAssistantsEndpoint(endpoint);
const agents = isAgentsEndpoint(endpoint);
if (assistants) {
config.retrievalModels = defaultRetrievalModels;
config.capabilities = [
Capabilities.code_interpreter,
Capabilities.image_vision,
Capabilities.retrieval,
Capabilities.actions,
Capabilities.tools,
];
}
if (agents) {
config.capabilities = [
AgentCapabilities.execute_code,
AgentCapabilities.file_search,
AgentCapabilities.artifacts,
AgentCapabilities.actions,
AgentCapabilities.tools,
AgentCapabilities.ocr,
];
}
if (assistants && endpoint === EModelEndpoint.azureAssistants) {
config.version = defaultAssistantsVersion.azureAssistants;
} else if (assistants) {
config.version = defaultAssistantsVersion.assistants;
}
return config;
}
/**
* Normalize the endpoint name to system-expected value.
* @param {string} name
* @returns {string}
*/
function normalizeEndpointName(name = '') {
return name.toLowerCase() === Providers.OLLAMA ? Providers.OLLAMA : name;
}
/**
* Sanitize a filename by removing any directory components, replacing non-alphanumeric characters
* @param {string} inputName
* @returns {string}
*/
function sanitizeFilename(inputName) {
// Remove any directory components
let name = path.basename(inputName);
// Replace any non-alphanumeric characters except for '.' and '-'
name = name.replace(/[^a-zA-Z0-9.-]/g, '_');
// Ensure the name doesn't start with a dot (hidden file in Unix-like systems)
if (name.startsWith('.') || name === '') {
name = '_' + name;
}
// Limit the length of the filename
const MAX_LENGTH = 255;
if (name.length > MAX_LENGTH) {
const ext = path.extname(name);
const nameWithoutExt = path.basename(name, ext);
name =
nameWithoutExt.slice(0, MAX_LENGTH - ext.length - 7) +
'-' +
crypto.randomBytes(3).toString('hex') +
ext;
}
return name;
}
module.exports = {
isEnabled,
handleText,
formatSteps,
escapeRegExp,
formatAction,
isUserProvided,
generateConfig,
addSpaceIfNeeded,
createOnProgress,
sanitizeFilename,
normalizeEndpointName,
};