mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 17:00:15 +01:00
🔍 feat: Mistral OCR API / Upload Files as Text (#6274)
* refactor: move `loadAuthValues` to `~/services/Tools/credentials` * feat: add createAxiosInstance function to configure axios with proxy support * WIP: First pass mistral ocr * refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic * refactor: improve document formatting in encodeAndFormat function * refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config) * fix: update getFiles call to include files with `text` property as well * refactor: move file handling to `initializeAgentOptions` * refactor: enhance addImageURLs method to handle OCR text and improve message formatting * refactor: update message formatting to handle OCR text in various content types * refactor: remove unused resendFiles property from compactAgentsSchema * fix: add error handling for Mistral OCR document upload and logging * refactor: integrate OCR capability into file upload options and configuration * refactor: skip processing for text source files in delete request, as they are directly tied to database * feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling * fix: source icon styling * wip: first pass, frontend file context agent resources * refactor: add hover card with contextual information for File Context (OCR) in FileContext component * feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization * feat: implement OCR config; fix: agent resource deletion for ocr files * feat: enhance agent initialization by adding OCR capability check in resource priming * ci: fix `~/config` module mock * ci: add OCR property expectation in AppService tests * refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed * ci: add unit test to ensure environment variable references are not parsed in OCR config * refactor: disable base64 image inclusion in OCR request * refactor: enhance OCR configuration handling by validating environment variables and providing defaults * refactor: use file stream from disk for mistral ocr api
This commit is contained in:
parent
9db00edfc4
commit
ded3cd8876
48 changed files with 1621 additions and 131 deletions
|
|
@ -1121,9 +1121,13 @@ class BaseClient {
|
|||
return message;
|
||||
}
|
||||
|
||||
const files = await getFiles({
|
||||
const files = await getFiles(
|
||||
{
|
||||
file_id: { $in: fileIds },
|
||||
});
|
||||
},
|
||||
{},
|
||||
{},
|
||||
);
|
||||
|
||||
await this.addImageURLs(message, files, this.visionMode);
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ const {
|
|||
} = require('../');
|
||||
const { primeFiles: primeCodeFiles } = require('~/server/services/Files/Code/process');
|
||||
const { createFileSearchTool, primeFiles: primeSearchFiles } = require('./fileSearch');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { createMCPTool } = require('~/server/services/MCP');
|
||||
const { loadSpecs } = require('./loadSpecs');
|
||||
const { logger } = require('~/config');
|
||||
|
|
@ -90,45 +91,6 @@ const validateTools = async (user, tools = []) => {
|
|||
}
|
||||
};
|
||||
|
||||
const loadAuthValues = async ({ userId, authFields, throwError = true }) => {
|
||||
let authValues = {};
|
||||
|
||||
/**
|
||||
* Finds the first non-empty value for the given authentication field, supporting alternate fields.
|
||||
* @param {string[]} fields Array of strings representing the authentication fields. Supports alternate fields delimited by "||".
|
||||
* @returns {Promise<{ authField: string, authValue: string} | null>} An object containing the authentication field and value, or null if not found.
|
||||
*/
|
||||
const findAuthValue = async (fields) => {
|
||||
for (const field of fields) {
|
||||
let value = process.env[field];
|
||||
if (value) {
|
||||
return { authField: field, authValue: value };
|
||||
}
|
||||
try {
|
||||
value = await getUserPluginAuthValue(userId, field, throwError);
|
||||
} catch (err) {
|
||||
if (field === fields[fields.length - 1] && !value) {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
if (value) {
|
||||
return { authField: field, authValue: value };
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
for (let authField of authFields) {
|
||||
const fields = authField.split('||');
|
||||
const result = await findAuthValue(fields);
|
||||
if (result) {
|
||||
authValues[result.authField] = result.authValue;
|
||||
}
|
||||
}
|
||||
|
||||
return authValues;
|
||||
};
|
||||
|
||||
/** @typedef {typeof import('@langchain/core/tools').Tool} ToolConstructor */
|
||||
/** @typedef {import('@langchain/core/tools').Tool} Tool */
|
||||
|
||||
|
|
@ -348,7 +310,6 @@ const loadTools = async ({
|
|||
|
||||
module.exports = {
|
||||
loadToolWithAuth,
|
||||
loadAuthValues,
|
||||
validateTools,
|
||||
loadTools,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
const { validateTools, loadTools, loadAuthValues } = require('./handleTools');
|
||||
const { validateTools, loadTools } = require('./handleTools');
|
||||
const handleOpenAIErrors = require('./handleOpenAIErrors');
|
||||
|
||||
module.exports = {
|
||||
handleOpenAIErrors,
|
||||
loadAuthValues,
|
||||
validateTools,
|
||||
loadTools,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
const axios = require('axios');
|
||||
const { EventSource } = require('eventsource');
|
||||
const { Time, CacheKeys } = require('librechat-data-provider');
|
||||
const logger = require('./winston');
|
||||
|
|
@ -47,9 +48,24 @@ const sendEvent = (res, event) => {
|
|||
res.write(`event: message\ndata: ${JSON.stringify(event)}\n\n`);
|
||||
};
|
||||
|
||||
function createAxiosInstance() {
|
||||
const instance = axios.create();
|
||||
|
||||
if (process.env.proxy) {
|
||||
const url = new URL(process.env.proxy);
|
||||
instance.defaults.proxy = {
|
||||
host: url.hostname,
|
||||
protocol: url.protocol.replace(':', ''),
|
||||
};
|
||||
}
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
logger,
|
||||
sendEvent,
|
||||
getMCPManager,
|
||||
createAxiosInstance,
|
||||
getFlowStateManager,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -15,19 +15,6 @@ const searchConversation = async (conversationId) => {
|
|||
throw new Error('Error searching conversation');
|
||||
}
|
||||
};
|
||||
/**
|
||||
* Searches for a conversation by conversationId and returns associated file ids.
|
||||
* @param {string} conversationId - The conversation's ID.
|
||||
* @returns {Promise<string[] | null>}
|
||||
*/
|
||||
const getConvoFiles = async (conversationId) => {
|
||||
try {
|
||||
return (await Conversation.findOne({ conversationId }, 'files').lean())?.files ?? [];
|
||||
} catch (error) {
|
||||
logger.error('[getConvoFiles] Error getting conversation files', error);
|
||||
throw new Error('Error getting conversation files');
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves a single conversation for a given user and conversation ID.
|
||||
|
|
@ -73,9 +60,46 @@ const deleteNullOrEmptyConversations = async () => {
|
|||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves files from a conversation that have either embedded=true
|
||||
* or a metadata.fileIdentifier. Simplified and efficient query.
|
||||
*
|
||||
* @param {string} conversationId - The conversation ID
|
||||
* @returns {Promise<MongoFile[]>} - Filtered array of matching file objects
|
||||
*/
|
||||
const getToolFiles = async (conversationId) => {
|
||||
try {
|
||||
const [result] = await Conversation.aggregate([
|
||||
{ $match: { conversationId } },
|
||||
{
|
||||
$project: {
|
||||
files: {
|
||||
$filter: {
|
||||
input: '$files',
|
||||
as: 'file',
|
||||
cond: {
|
||||
$or: [
|
||||
{ $eq: ['$$file.embedded', true] },
|
||||
{ $ifNull: ['$$file.metadata.fileIdentifier', false] },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
_id: 0,
|
||||
},
|
||||
},
|
||||
]).exec();
|
||||
|
||||
return result?.files || [];
|
||||
} catch (error) {
|
||||
logger.error('[getConvoEmbeddedFiles] Error fetching embedded files:', error);
|
||||
throw new Error('Error fetching embedded files');
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
Conversation,
|
||||
getConvoFiles,
|
||||
getToolFiles,
|
||||
searchConversation,
|
||||
deleteNullOrEmptyConversations,
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -17,11 +17,13 @@ const findFileById = async (file_id, options = {}) => {
|
|||
* Retrieves files matching a given filter, sorted by the most recently updated.
|
||||
* @param {Object} filter - The filter criteria to apply.
|
||||
* @param {Object} [_sortOptions] - Optional sort parameters.
|
||||
* @param {Object|String} [selectFields={ text: 0 }] - Fields to include/exclude in the query results.
|
||||
* Default excludes the 'text' field.
|
||||
* @returns {Promise<Array<IMongoFile>>} A promise that resolves to an array of file documents.
|
||||
*/
|
||||
const getFiles = async (filter, _sortOptions) => {
|
||||
const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => {
|
||||
const sortOptions = { updatedAt: -1, ..._sortOptions };
|
||||
return await File.find(filter).sort(sortOptions).lean();
|
||||
return await File.find(filter).select(selectFields).sort(sortOptions).lean();
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ const {
|
|||
ChatModelStreamHandler,
|
||||
} = require('@librechat/agents');
|
||||
const { processCodeOutput } = require('~/server/services/Files/Code/process');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { saveBase64Image } = require('~/server/services/Files/process');
|
||||
const { loadAuthValues } = require('~/app/clients/tools/util');
|
||||
const { logger, sendEvent } = require('~/config');
|
||||
|
||||
/** @typedef {import('@librechat/agents').Graph} Graph */
|
||||
|
|
|
|||
|
|
@ -223,14 +223,23 @@ class AgentClient extends BaseClient {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {TMessage} message
|
||||
* @param {Array<MongoFile>} attachments
|
||||
* @returns {Promise<Array<Partial<MongoFile>>>}
|
||||
*/
|
||||
async addImageURLs(message, attachments) {
|
||||
const { files, image_urls } = await encodeAndFormat(
|
||||
const { files, text, image_urls } = await encodeAndFormat(
|
||||
this.options.req,
|
||||
attachments,
|
||||
this.options.agent.provider,
|
||||
VisionModes.agents,
|
||||
);
|
||||
message.image_urls = image_urls.length ? image_urls : undefined;
|
||||
if (text && text.length) {
|
||||
message.ocr = text;
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
|
|
@ -308,7 +317,21 @@ class AgentClient extends BaseClient {
|
|||
assistantName: this.options?.modelLabel,
|
||||
});
|
||||
|
||||
const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount;
|
||||
if (message.ocr && i !== orderedMessages.length - 1) {
|
||||
if (typeof formattedMessage.content === 'string') {
|
||||
formattedMessage.content = message.ocr + '\n' + formattedMessage.content;
|
||||
} else {
|
||||
const textPart = formattedMessage.content.find((part) => part.type === 'text');
|
||||
textPart
|
||||
? (textPart.text = message.ocr + '\n' + textPart.text)
|
||||
: formattedMessage.content.unshift({ type: 'text', text: message.ocr });
|
||||
}
|
||||
} else if (message.ocr && i === orderedMessages.length - 1) {
|
||||
systemContent = [systemContent, message.ocr].join('\n');
|
||||
}
|
||||
|
||||
const needsTokenCount =
|
||||
(this.contextStrategy && !orderedMessages[i].tokenCount) || message.ocr;
|
||||
|
||||
/* If tokens were never counted, or, is a Vision request and the message has files, count again */
|
||||
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ const {
|
|||
const { processFileURL, uploadImageBuffer } = require('~/server/services/Files/process');
|
||||
const { processCodeOutput } = require('~/server/services/Files/Code/process');
|
||||
const { createToolCall, getToolCallsByConvo } = require('~/models/ToolCall');
|
||||
const { loadAuthValues, loadTools } = require('~/app/clients/tools/util');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { loadTools } = require('~/app/clients/tools/util');
|
||||
const { checkAccess } = require('~/server/middleware');
|
||||
const { getMessage } = require('~/models/Message');
|
||||
const { logger } = require('~/config');
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ const openAI = require('~/server/services/Endpoints/openAI');
|
|||
const agents = require('~/server/services/Endpoints/agents');
|
||||
const custom = require('~/server/services/Endpoints/custom');
|
||||
const google = require('~/server/services/Endpoints/google');
|
||||
const { getConvoFiles } = require('~/models/Conversation');
|
||||
const { handleError } = require('~/server/utils');
|
||||
|
||||
const buildFunction = {
|
||||
|
|
@ -87,16 +86,8 @@ async function buildEndpointOption(req, res, next) {
|
|||
|
||||
// TODO: use `getModelsConfig` only when necessary
|
||||
const modelsConfig = await getModelsConfig(req);
|
||||
const { resendFiles = true } = req.body.endpointOption;
|
||||
req.body.endpointOption.modelsConfig = modelsConfig;
|
||||
if (isAgents && resendFiles && req.body.conversationId) {
|
||||
const fileIds = await getConvoFiles(req.body.conversationId);
|
||||
const requestFiles = req.body.files ?? [];
|
||||
if (requestFiles.length || fileIds.length) {
|
||||
req.body.endpointOption.attachments = processFiles(requestFiles, fileIds);
|
||||
}
|
||||
} else if (req.body.files) {
|
||||
// hold the promise
|
||||
if (req.body.files && !isAgents) {
|
||||
req.body.endpointOption.attachments = processFiles(req.body.files);
|
||||
}
|
||||
next();
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ const {
|
|||
} = require('~/server/services/Files/process');
|
||||
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
|
||||
const { getOpenAIClient } = require('~/server/controllers/assistants/helpers');
|
||||
const { loadAuthValues } = require('~/app/clients/tools/util');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { getAgent } = require('~/models/Agent');
|
||||
const { getFiles } = require('~/models/File');
|
||||
const { logger } = require('~/config');
|
||||
|
|
|
|||
|
|
@ -1,4 +1,9 @@
|
|||
const { FileSources, EModelEndpoint, getConfigDefaults } = require('librechat-data-provider');
|
||||
const {
|
||||
FileSources,
|
||||
EModelEndpoint,
|
||||
loadOCRConfig,
|
||||
getConfigDefaults,
|
||||
} = require('librechat-data-provider');
|
||||
const { checkVariables, checkHealth, checkConfig, checkAzureVariables } = require('./start/checks');
|
||||
const { azureAssistantsDefaults, assistantsConfigSetup } = require('./start/assistants');
|
||||
const { initializeFirebase } = require('./Files/Firebase/initialize');
|
||||
|
|
@ -25,6 +30,7 @@ const AppService = async (app) => {
|
|||
const config = (await loadCustomConfig()) ?? {};
|
||||
const configDefaults = getConfigDefaults();
|
||||
|
||||
const ocr = loadOCRConfig(config.ocr);
|
||||
const filteredTools = config.filteredTools;
|
||||
const includedTools = config.includedTools;
|
||||
const fileStrategy = config.fileStrategy ?? configDefaults.fileStrategy;
|
||||
|
|
@ -57,6 +63,7 @@ const AppService = async (app) => {
|
|||
const interfaceConfig = await loadDefaultInterface(config, configDefaults);
|
||||
|
||||
const defaultLocals = {
|
||||
ocr,
|
||||
paths,
|
||||
fileStrategy,
|
||||
socialLogins,
|
||||
|
|
|
|||
|
|
@ -120,6 +120,7 @@ describe('AppService', () => {
|
|||
},
|
||||
},
|
||||
paths: expect.anything(),
|
||||
ocr: expect.anything(),
|
||||
imageOutputType: expect.any(String),
|
||||
fileConfig: undefined,
|
||||
secureImageLinks: undefined,
|
||||
|
|
@ -588,4 +589,33 @@ describe('AppService updating app.locals and issuing warnings', () => {
|
|||
);
|
||||
});
|
||||
});
|
||||
|
||||
it('should not parse environment variable references in OCR config', async () => {
|
||||
// Mock custom configuration with env variable references in OCR config
|
||||
const mockConfig = {
|
||||
ocr: {
|
||||
apiKey: '${OCR_API_KEY_CUSTOM_VAR_NAME}',
|
||||
baseURL: '${OCR_BASEURL_CUSTOM_VAR_NAME}',
|
||||
strategy: 'mistral_ocr',
|
||||
mistralModel: 'mistral-medium',
|
||||
},
|
||||
};
|
||||
|
||||
require('./Config/loadCustomConfig').mockImplementationOnce(() => Promise.resolve(mockConfig));
|
||||
|
||||
// Set actual environment variables with different values
|
||||
process.env.OCR_API_KEY_CUSTOM_VAR_NAME = 'actual-api-key';
|
||||
process.env.OCR_BASEURL_CUSTOM_VAR_NAME = 'https://actual-ocr-url.com';
|
||||
|
||||
// Initialize app
|
||||
const app = { locals: {} };
|
||||
await AppService(app);
|
||||
|
||||
// Verify that the raw string references were preserved and not interpolated
|
||||
expect(app.locals.ocr).toBeDefined();
|
||||
expect(app.locals.ocr.apiKey).toEqual('${OCR_API_KEY_CUSTOM_VAR_NAME}');
|
||||
expect(app.locals.ocr.baseURL).toEqual('${OCR_BASEURL_CUSTOM_VAR_NAME}');
|
||||
expect(app.locals.ocr.strategy).toEqual('mistral_ocr');
|
||||
expect(app.locals.ocr.mistralModel).toEqual('mistral-medium');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,15 +2,8 @@ const { loadAgent } = require('~/models/Agent');
|
|||
const { logger } = require('~/config');
|
||||
|
||||
const buildOptions = (req, endpoint, parsedBody) => {
|
||||
const {
|
||||
spec,
|
||||
iconURL,
|
||||
agent_id,
|
||||
instructions,
|
||||
maxContextTokens,
|
||||
resendFiles = true,
|
||||
...model_parameters
|
||||
} = parsedBody;
|
||||
const { spec, iconURL, agent_id, instructions, maxContextTokens, ...model_parameters } =
|
||||
parsedBody;
|
||||
const agentPromise = loadAgent({
|
||||
req,
|
||||
agent_id,
|
||||
|
|
@ -24,7 +17,6 @@ const buildOptions = (req, endpoint, parsedBody) => {
|
|||
iconURL,
|
||||
endpoint,
|
||||
agent_id,
|
||||
resendFiles,
|
||||
instructions,
|
||||
maxContextTokens,
|
||||
model_parameters,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ const { createContentAggregator, Providers } = require('@librechat/agents');
|
|||
const {
|
||||
EModelEndpoint,
|
||||
getResponseSender,
|
||||
AgentCapabilities,
|
||||
providerEndpointMap,
|
||||
} = require('librechat-data-provider');
|
||||
const {
|
||||
|
|
@ -15,10 +16,13 @@ const initCustom = require('~/server/services/Endpoints/custom/initialize');
|
|||
const initGoogle = require('~/server/services/Endpoints/google/initialize');
|
||||
const generateArtifactsPrompt = require('~/app/clients/prompts/artifacts');
|
||||
const { getCustomEndpointConfig } = require('~/server/services/Config');
|
||||
const { processFiles } = require('~/server/services/Files/process');
|
||||
const { loadAgentTools } = require('~/server/services/ToolService');
|
||||
const AgentClient = require('~/server/controllers/agents/client');
|
||||
const { getToolFiles } = require('~/models/Conversation');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { getAgent } = require('~/models/Agent');
|
||||
const { getFiles } = require('~/models/File');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
const providerConfigMap = {
|
||||
|
|
@ -34,20 +38,38 @@ const providerConfigMap = {
|
|||
};
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {ServerRequest} req
|
||||
* @param {Promise<Array<MongoFile | null>> | undefined} _attachments
|
||||
* @param {AgentToolResources | undefined} _tool_resources
|
||||
* @returns {Promise<{ attachments: Array<MongoFile | undefined> | undefined, tool_resources: AgentToolResources | undefined }>}
|
||||
*/
|
||||
const primeResources = async (_attachments, _tool_resources) => {
|
||||
const primeResources = async (req, _attachments, _tool_resources) => {
|
||||
try {
|
||||
/** @type {Array<MongoFile | undefined> | undefined} */
|
||||
let attachments;
|
||||
const tool_resources = _tool_resources ?? {};
|
||||
const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes(
|
||||
AgentCapabilities.ocr,
|
||||
);
|
||||
if (tool_resources.ocr?.file_ids && isOCREnabled) {
|
||||
const context = await getFiles(
|
||||
{
|
||||
file_id: { $in: tool_resources.ocr.file_ids },
|
||||
},
|
||||
{},
|
||||
{},
|
||||
);
|
||||
attachments = (attachments ?? []).concat(context);
|
||||
}
|
||||
if (!_attachments) {
|
||||
return { attachments: undefined, tool_resources: _tool_resources };
|
||||
return { attachments, tool_resources };
|
||||
}
|
||||
/** @type {Array<MongoFile | undefined> | undefined} */
|
||||
const files = await _attachments;
|
||||
const attachments = [];
|
||||
const tool_resources = _tool_resources ?? {};
|
||||
if (!attachments) {
|
||||
/** @type {Array<MongoFile | undefined>} */
|
||||
attachments = [];
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
if (!file) {
|
||||
|
|
@ -82,7 +104,6 @@ const primeResources = async (_attachments, _tool_resources) => {
|
|||
* @param {ServerResponse} params.res
|
||||
* @param {Agent} params.agent
|
||||
* @param {object} [params.endpointOption]
|
||||
* @param {AgentToolResources} [params.tool_resources]
|
||||
* @param {boolean} [params.isInitialAgent]
|
||||
* @returns {Promise<Agent>}
|
||||
*/
|
||||
|
|
@ -91,9 +112,28 @@ const initializeAgentOptions = async ({
|
|||
res,
|
||||
agent,
|
||||
endpointOption,
|
||||
tool_resources,
|
||||
isInitialAgent = false,
|
||||
}) => {
|
||||
let currentFiles;
|
||||
const requestFiles = req.body.files ?? [];
|
||||
if (
|
||||
isInitialAgent &&
|
||||
req.body.conversationId != null &&
|
||||
agent.model_parameters?.resendFiles === true
|
||||
) {
|
||||
const fileIds = (await getToolFiles(req.body.conversationId)).map((f) => f.file_id);
|
||||
if (requestFiles.length || fileIds.length) {
|
||||
currentFiles = await processFiles(requestFiles, fileIds);
|
||||
}
|
||||
} else if (isInitialAgent && requestFiles.length) {
|
||||
currentFiles = await processFiles(requestFiles);
|
||||
}
|
||||
|
||||
const { attachments, tool_resources } = await primeResources(
|
||||
req,
|
||||
currentFiles,
|
||||
agent.tool_resources,
|
||||
);
|
||||
const { tools, toolContextMap } = await loadAgentTools({
|
||||
req,
|
||||
res,
|
||||
|
|
@ -160,6 +200,7 @@ const initializeAgentOptions = async ({
|
|||
return {
|
||||
...agent,
|
||||
tools,
|
||||
attachments,
|
||||
toolContextMap,
|
||||
maxContextTokens:
|
||||
agent.max_context_tokens ??
|
||||
|
|
@ -197,11 +238,6 @@ const initializeClient = async ({ req, res, endpointOption }) => {
|
|||
throw new Error('Agent not found');
|
||||
}
|
||||
|
||||
const { attachments, tool_resources } = await primeResources(
|
||||
endpointOption.attachments,
|
||||
primaryAgent.tool_resources,
|
||||
);
|
||||
|
||||
const agentConfigs = new Map();
|
||||
|
||||
// Handle primary agent
|
||||
|
|
@ -210,7 +246,6 @@ const initializeClient = async ({ req, res, endpointOption }) => {
|
|||
res,
|
||||
agent: primaryAgent,
|
||||
endpointOption,
|
||||
tool_resources,
|
||||
isInitialAgent: true,
|
||||
});
|
||||
|
||||
|
|
@ -240,18 +275,19 @@ const initializeClient = async ({ req, res, endpointOption }) => {
|
|||
|
||||
const client = new AgentClient({
|
||||
req,
|
||||
agent: primaryConfig,
|
||||
sender,
|
||||
attachments,
|
||||
contentParts,
|
||||
agentConfigs,
|
||||
eventHandlers,
|
||||
collectedUsage,
|
||||
artifactPromises,
|
||||
agent: primaryConfig,
|
||||
spec: endpointOption.spec,
|
||||
iconURL: endpointOption.iconURL,
|
||||
agentConfigs,
|
||||
endpoint: EModelEndpoint.agents,
|
||||
attachments: primaryConfig.attachments,
|
||||
maxContextTokens: primaryConfig.maxContextTokens,
|
||||
resendFiles: primaryConfig.model_parameters?.resendFiles ?? true,
|
||||
});
|
||||
|
||||
return { client };
|
||||
|
|
|
|||
207
api/server/services/Files/MistralOCR/crud.js
Normal file
207
api/server/services/Files/MistralOCR/crud.js
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
// ~/server/services/Files/MistralOCR/crud.js
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const FormData = require('form-data');
|
||||
const { FileSources, envVarRegex, extractEnvVariable } = require('librechat-data-provider');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { logger, createAxiosInstance } = require('~/config');
|
||||
const { logAxiosError } = require('~/utils');
|
||||
|
||||
const axios = createAxiosInstance();
|
||||
|
||||
/**
|
||||
* Uploads a document to Mistral API using file streaming to avoid loading the entire file into memory
|
||||
*
|
||||
* @param {Object} params Upload parameters
|
||||
* @param {string} params.filePath The path to the file on disk
|
||||
* @param {string} [params.fileName] Optional filename to use (defaults to the name from filePath)
|
||||
* @param {string} params.apiKey Mistral API key
|
||||
* @param {string} [params.baseURL=https://api.mistral.ai/v1] Mistral API base URL
|
||||
* @returns {Promise<Object>} The response from Mistral API
|
||||
*/
|
||||
async function uploadDocumentToMistral({
|
||||
filePath,
|
||||
fileName = '',
|
||||
apiKey,
|
||||
baseURL = 'https://api.mistral.ai/v1',
|
||||
}) {
|
||||
const form = new FormData();
|
||||
form.append('purpose', 'ocr');
|
||||
const actualFileName = fileName || path.basename(filePath);
|
||||
const fileStream = fs.createReadStream(filePath);
|
||||
form.append('file', fileStream, { filename: actualFileName });
|
||||
|
||||
return axios
|
||||
.post(`${baseURL}/files`, form, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
...form.getHeaders(),
|
||||
},
|
||||
maxBodyLength: Infinity,
|
||||
maxContentLength: Infinity,
|
||||
})
|
||||
.then((res) => res.data)
|
||||
.catch((error) => {
|
||||
logger.error('Error uploading document to Mistral:', error.message);
|
||||
throw error;
|
||||
});
|
||||
}
|
||||
|
||||
async function getSignedUrl({
|
||||
apiKey,
|
||||
fileId,
|
||||
expiry = 24,
|
||||
baseURL = 'https://api.mistral.ai/v1',
|
||||
}) {
|
||||
return axios
|
||||
.get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
})
|
||||
.then((res) => res.data)
|
||||
.catch((error) => {
|
||||
logger.error('Error fetching signed URL:', error.message);
|
||||
throw error;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Object} params
|
||||
* @param {string} params.apiKey
|
||||
* @param {string} params.documentUrl
|
||||
* @param {string} [params.baseURL]
|
||||
* @returns {Promise<OCRResult>}
|
||||
*/
|
||||
async function performOCR({
|
||||
apiKey,
|
||||
documentUrl,
|
||||
model = 'mistral-ocr-latest',
|
||||
baseURL = 'https://api.mistral.ai/v1',
|
||||
}) {
|
||||
return axios
|
||||
.post(
|
||||
`${baseURL}/ocr`,
|
||||
{
|
||||
model,
|
||||
include_image_base64: false,
|
||||
document: {
|
||||
type: 'document_url',
|
||||
document_url: documentUrl,
|
||||
},
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
},
|
||||
)
|
||||
.then((res) => res.data)
|
||||
.catch((error) => {
|
||||
logger.error('Error performing OCR:', error.message);
|
||||
throw error;
|
||||
});
|
||||
}
|
||||
|
||||
function extractVariableName(str) {
|
||||
const match = str.match(envVarRegex);
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
const uploadMistralOCR = async ({ req, file, file_id, entity_id }) => {
|
||||
try {
|
||||
/** @type {TCustomConfig['ocr']} */
|
||||
const ocrConfig = req.app.locals?.ocr;
|
||||
|
||||
const apiKeyConfig = ocrConfig.apiKey || '';
|
||||
const baseURLConfig = ocrConfig.baseURL || '';
|
||||
|
||||
const isApiKeyEnvVar = envVarRegex.test(apiKeyConfig);
|
||||
const isBaseURLEnvVar = envVarRegex.test(baseURLConfig);
|
||||
|
||||
const isApiKeyEmpty = !apiKeyConfig.trim();
|
||||
const isBaseURLEmpty = !baseURLConfig.trim();
|
||||
|
||||
let apiKey, baseURL;
|
||||
|
||||
if (isApiKeyEnvVar || isBaseURLEnvVar || isApiKeyEmpty || isBaseURLEmpty) {
|
||||
const apiKeyVarName = isApiKeyEnvVar ? extractVariableName(apiKeyConfig) : 'OCR_API_KEY';
|
||||
const baseURLVarName = isBaseURLEnvVar ? extractVariableName(baseURLConfig) : 'OCR_BASEURL';
|
||||
|
||||
const authValues = await loadAuthValues({
|
||||
userId: req.user.id,
|
||||
authFields: [baseURLVarName, apiKeyVarName],
|
||||
optional: new Set([baseURLVarName]),
|
||||
});
|
||||
|
||||
apiKey = authValues[apiKeyVarName];
|
||||
baseURL = authValues[baseURLVarName];
|
||||
} else {
|
||||
apiKey = apiKeyConfig;
|
||||
baseURL = baseURLConfig;
|
||||
}
|
||||
|
||||
const mistralFile = await uploadDocumentToMistral({
|
||||
filePath: file.path,
|
||||
fileName: file.originalname,
|
||||
apiKey,
|
||||
baseURL,
|
||||
});
|
||||
|
||||
const modelConfig = ocrConfig.mistralModel || '';
|
||||
const model = envVarRegex.test(modelConfig)
|
||||
? extractEnvVariable(modelConfig)
|
||||
: modelConfig.trim() || 'mistral-ocr-latest';
|
||||
|
||||
const signedUrlResponse = await getSignedUrl({
|
||||
apiKey,
|
||||
baseURL,
|
||||
fileId: mistralFile.id,
|
||||
});
|
||||
|
||||
const ocrResult = await performOCR({
|
||||
apiKey,
|
||||
baseURL,
|
||||
model,
|
||||
documentUrl: signedUrlResponse.url,
|
||||
});
|
||||
|
||||
let aggregatedText = '';
|
||||
const images = [];
|
||||
ocrResult.pages.forEach((page, index) => {
|
||||
if (ocrResult.pages.length > 1) {
|
||||
aggregatedText += `# PAGE ${index + 1}\n`;
|
||||
}
|
||||
|
||||
aggregatedText += page.markdown + '\n\n';
|
||||
|
||||
if (page.images && page.images.length > 0) {
|
||||
page.images.forEach((image) => {
|
||||
if (image.image_base64) {
|
||||
images.push(image.image_base64);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
filename: file.originalname,
|
||||
bytes: aggregatedText.length * 4,
|
||||
filepath: FileSources.mistral_ocr,
|
||||
text: aggregatedText,
|
||||
images,
|
||||
};
|
||||
} catch (error) {
|
||||
const message = 'Error uploading document to Mistral OCR API';
|
||||
logAxiosError({ error, message });
|
||||
throw new Error(message);
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
uploadDocumentToMistral,
|
||||
uploadMistralOCR,
|
||||
getSignedUrl,
|
||||
performOCR,
|
||||
};
|
||||
737
api/server/services/Files/MistralOCR/crud.spec.js
Normal file
737
api/server/services/Files/MistralOCR/crud.spec.js
Normal file
|
|
@ -0,0 +1,737 @@
|
|||
const fs = require('fs');
|
||||
|
||||
const mockAxios = {
|
||||
interceptors: {
|
||||
request: { use: jest.fn(), eject: jest.fn() },
|
||||
response: { use: jest.fn(), eject: jest.fn() },
|
||||
},
|
||||
create: jest.fn().mockReturnValue({
|
||||
defaults: {
|
||||
proxy: null,
|
||||
},
|
||||
get: jest.fn().mockResolvedValue({ data: {} }),
|
||||
post: jest.fn().mockResolvedValue({ data: {} }),
|
||||
put: jest.fn().mockResolvedValue({ data: {} }),
|
||||
delete: jest.fn().mockResolvedValue({ data: {} }),
|
||||
}),
|
||||
get: jest.fn().mockResolvedValue({ data: {} }),
|
||||
post: jest.fn().mockResolvedValue({ data: {} }),
|
||||
put: jest.fn().mockResolvedValue({ data: {} }),
|
||||
delete: jest.fn().mockResolvedValue({ data: {} }),
|
||||
reset: jest.fn().mockImplementation(function () {
|
||||
this.get.mockClear();
|
||||
this.post.mockClear();
|
||||
this.put.mockClear();
|
||||
this.delete.mockClear();
|
||||
this.create.mockClear();
|
||||
}),
|
||||
};
|
||||
|
||||
jest.mock('axios', () => mockAxios);
|
||||
jest.mock('fs');
|
||||
jest.mock('~/utils', () => ({
|
||||
logAxiosError: jest.fn(),
|
||||
}));
|
||||
jest.mock('~/config', () => ({
|
||||
logger: {
|
||||
error: jest.fn(),
|
||||
},
|
||||
createAxiosInstance: () => mockAxios,
|
||||
}));
|
||||
jest.mock('~/server/services/Tools/credentials', () => ({
|
||||
loadAuthValues: jest.fn(),
|
||||
}));
|
||||
|
||||
const { uploadDocumentToMistral, uploadMistralOCR, getSignedUrl, performOCR } = require('./crud');
|
||||
|
||||
describe('MistralOCR Service', () => {
|
||||
afterEach(() => {
|
||||
mockAxios.reset();
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('uploadDocumentToMistral', () => {
|
||||
beforeEach(() => {
|
||||
// Create a more complete mock for file streams that FormData can work with
|
||||
const mockReadStream = {
|
||||
on: jest.fn().mockImplementation(function (event, handler) {
|
||||
// Simulate immediate 'end' event to make FormData complete processing
|
||||
if (event === 'end') {
|
||||
handler();
|
||||
}
|
||||
return this;
|
||||
}),
|
||||
pipe: jest.fn().mockImplementation(function () {
|
||||
return this;
|
||||
}),
|
||||
pause: jest.fn(),
|
||||
resume: jest.fn(),
|
||||
emit: jest.fn(),
|
||||
once: jest.fn(),
|
||||
destroy: jest.fn(),
|
||||
};
|
||||
|
||||
fs.createReadStream = jest.fn().mockReturnValue(mockReadStream);
|
||||
|
||||
// Mock FormData's append to avoid actual stream processing
|
||||
jest.mock('form-data', () => {
|
||||
const mockFormData = function () {
|
||||
return {
|
||||
append: jest.fn(),
|
||||
getHeaders: jest
|
||||
.fn()
|
||||
.mockReturnValue({ 'content-type': 'multipart/form-data; boundary=---boundary' }),
|
||||
getBuffer: jest.fn().mockReturnValue(Buffer.from('mock-form-data')),
|
||||
getLength: jest.fn().mockReturnValue(100),
|
||||
};
|
||||
};
|
||||
return mockFormData;
|
||||
});
|
||||
});
|
||||
|
||||
it('should upload a document to Mistral API using file streaming', async () => {
|
||||
const mockResponse = { data: { id: 'file-123', purpose: 'ocr' } };
|
||||
mockAxios.post.mockResolvedValueOnce(mockResponse);
|
||||
|
||||
const result = await uploadDocumentToMistral({
|
||||
filePath: '/path/to/test.pdf',
|
||||
fileName: 'test.pdf',
|
||||
apiKey: 'test-api-key',
|
||||
});
|
||||
|
||||
// Check that createReadStream was called with the correct file path
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/path/to/test.pdf');
|
||||
|
||||
// Since we're mocking FormData, we'll just check that axios was called correctly
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://api.mistral.ai/v1/files',
|
||||
expect.anything(),
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({
|
||||
Authorization: 'Bearer test-api-key',
|
||||
}),
|
||||
maxBodyLength: Infinity,
|
||||
maxContentLength: Infinity,
|
||||
}),
|
||||
);
|
||||
expect(result).toEqual(mockResponse.data);
|
||||
});
|
||||
|
||||
it('should handle errors during document upload', async () => {
|
||||
const errorMessage = 'API error';
|
||||
mockAxios.post.mockRejectedValueOnce(new Error(errorMessage));
|
||||
|
||||
await expect(
|
||||
uploadDocumentToMistral({
|
||||
filePath: '/path/to/test.pdf',
|
||||
fileName: 'test.pdf',
|
||||
apiKey: 'test-api-key',
|
||||
}),
|
||||
).rejects.toThrow();
|
||||
|
||||
const { logger } = require('~/config');
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
expect.stringContaining('Error uploading document to Mistral:'),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getSignedUrl', () => {
|
||||
it('should fetch signed URL from Mistral API', async () => {
|
||||
const mockResponse = { data: { url: 'https://document-url.com' } };
|
||||
mockAxios.get.mockResolvedValueOnce(mockResponse);
|
||||
|
||||
const result = await getSignedUrl({
|
||||
fileId: 'file-123',
|
||||
apiKey: 'test-api-key',
|
||||
});
|
||||
|
||||
expect(mockAxios.get).toHaveBeenCalledWith(
|
||||
'https://api.mistral.ai/v1/files/file-123/url?expiry=24',
|
||||
{
|
||||
headers: {
|
||||
Authorization: 'Bearer test-api-key',
|
||||
},
|
||||
},
|
||||
);
|
||||
expect(result).toEqual(mockResponse.data);
|
||||
});
|
||||
|
||||
it('should handle errors when fetching signed URL', async () => {
|
||||
const errorMessage = 'API error';
|
||||
mockAxios.get.mockRejectedValueOnce(new Error(errorMessage));
|
||||
|
||||
await expect(
|
||||
getSignedUrl({
|
||||
fileId: 'file-123',
|
||||
apiKey: 'test-api-key',
|
||||
}),
|
||||
).rejects.toThrow();
|
||||
|
||||
const { logger } = require('~/config');
|
||||
expect(logger.error).toHaveBeenCalledWith('Error fetching signed URL:', errorMessage);
|
||||
});
|
||||
});
|
||||
|
||||
describe('performOCR', () => {
|
||||
it('should perform OCR using Mistral API', async () => {
|
||||
const mockResponse = {
|
||||
data: {
|
||||
pages: [{ markdown: 'Page 1 content' }, { markdown: 'Page 2 content' }],
|
||||
},
|
||||
};
|
||||
mockAxios.post.mockResolvedValueOnce(mockResponse);
|
||||
|
||||
const result = await performOCR({
|
||||
apiKey: 'test-api-key',
|
||||
documentUrl: 'https://document-url.com',
|
||||
model: 'mistral-ocr-latest',
|
||||
});
|
||||
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://api.mistral.ai/v1/ocr',
|
||||
{
|
||||
model: 'mistral-ocr-latest',
|
||||
include_image_base64: false,
|
||||
document: {
|
||||
type: 'document_url',
|
||||
document_url: 'https://document-url.com',
|
||||
},
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: 'Bearer test-api-key',
|
||||
},
|
||||
},
|
||||
);
|
||||
expect(result).toEqual(mockResponse.data);
|
||||
});
|
||||
|
||||
it('should handle errors during OCR processing', async () => {
|
||||
const errorMessage = 'OCR processing error';
|
||||
mockAxios.post.mockRejectedValueOnce(new Error(errorMessage));
|
||||
|
||||
await expect(
|
||||
performOCR({
|
||||
apiKey: 'test-api-key',
|
||||
documentUrl: 'https://document-url.com',
|
||||
}),
|
||||
).rejects.toThrow();
|
||||
|
||||
const { logger } = require('~/config');
|
||||
expect(logger.error).toHaveBeenCalledWith('Error performing OCR:', errorMessage);
|
||||
});
|
||||
});
|
||||
|
||||
describe('uploadMistralOCR', () => {
|
||||
beforeEach(() => {
|
||||
const mockReadStream = {
|
||||
on: jest.fn().mockImplementation(function (event, handler) {
|
||||
if (event === 'end') {
|
||||
handler();
|
||||
}
|
||||
return this;
|
||||
}),
|
||||
pipe: jest.fn().mockImplementation(function () {
|
||||
return this;
|
||||
}),
|
||||
pause: jest.fn(),
|
||||
resume: jest.fn(),
|
||||
emit: jest.fn(),
|
||||
once: jest.fn(),
|
||||
destroy: jest.fn(),
|
||||
};
|
||||
|
||||
fs.createReadStream = jest.fn().mockReturnValue(mockReadStream);
|
||||
});
|
||||
|
||||
it('should process OCR for a file with standard configuration', async () => {
|
||||
// Setup mocks
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
loadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'test-api-key',
|
||||
OCR_BASEURL: 'https://api.mistral.ai/v1',
|
||||
});
|
||||
|
||||
// Mock file upload response
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: { id: 'file-123', purpose: 'ocr' },
|
||||
});
|
||||
|
||||
// Mock signed URL response
|
||||
mockAxios.get.mockResolvedValueOnce({
|
||||
data: { url: 'https://signed-url.com' },
|
||||
});
|
||||
|
||||
// Mock OCR response with text and images
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: {
|
||||
pages: [
|
||||
{
|
||||
markdown: 'Page 1 content',
|
||||
images: [{ image_base64: 'base64image1' }],
|
||||
},
|
||||
{
|
||||
markdown: 'Page 2 content',
|
||||
images: [{ image_base64: 'base64image2' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
// Use environment variable syntax to ensure loadAuthValues is called
|
||||
apiKey: '${OCR_API_KEY}',
|
||||
baseURL: '${OCR_BASEURL}',
|
||||
mistralModel: 'mistral-medium',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
};
|
||||
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
expect(loadAuthValues).toHaveBeenCalledWith({
|
||||
userId: 'user123',
|
||||
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
|
||||
optional: expect.any(Set),
|
||||
});
|
||||
|
||||
// Verify OCR result
|
||||
expect(result).toEqual({
|
||||
filename: 'document.pdf',
|
||||
bytes: expect.any(Number),
|
||||
filepath: 'mistral_ocr',
|
||||
text: expect.stringContaining('# PAGE 1'),
|
||||
images: ['base64image1', 'base64image2'],
|
||||
});
|
||||
});
|
||||
|
||||
it('should process variable references in configuration', async () => {
|
||||
// Setup mocks with environment variables
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
loadAuthValues.mockResolvedValue({
|
||||
CUSTOM_API_KEY: 'custom-api-key',
|
||||
CUSTOM_BASEURL: 'https://custom-api.mistral.ai/v1',
|
||||
});
|
||||
|
||||
// Mock API responses
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: { id: 'file-123', purpose: 'ocr' },
|
||||
});
|
||||
mockAxios.get.mockResolvedValueOnce({
|
||||
data: { url: 'https://signed-url.com' },
|
||||
});
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: {
|
||||
pages: [{ markdown: 'Content from custom API' }],
|
||||
},
|
||||
});
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
apiKey: '${CUSTOM_API_KEY}',
|
||||
baseURL: '${CUSTOM_BASEURL}',
|
||||
mistralModel: '${CUSTOM_MODEL}',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
// Set environment variable for model
|
||||
process.env.CUSTOM_MODEL = 'mistral-large';
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
};
|
||||
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
// Verify that custom environment variables were extracted and used
|
||||
expect(loadAuthValues).toHaveBeenCalledWith({
|
||||
userId: 'user123',
|
||||
authFields: ['CUSTOM_BASEURL', 'CUSTOM_API_KEY'],
|
||||
optional: expect.any(Set),
|
||||
});
|
||||
|
||||
// Check that mistral-large was used in the OCR API call
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
expect.objectContaining({
|
||||
model: 'mistral-large',
|
||||
}),
|
||||
expect.anything(),
|
||||
);
|
||||
|
||||
expect(result.text).toEqual('Content from custom API\n\n');
|
||||
});
|
||||
|
||||
it('should fall back to default values when variables are not properly formatted', async () => {
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
loadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'default-api-key',
|
||||
OCR_BASEURL: undefined, // Testing optional parameter
|
||||
});
|
||||
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: { id: 'file-123', purpose: 'ocr' },
|
||||
});
|
||||
mockAxios.get.mockResolvedValueOnce({
|
||||
data: { url: 'https://signed-url.com' },
|
||||
});
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: {
|
||||
pages: [{ markdown: 'Default API result' }],
|
||||
},
|
||||
});
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
// Use environment variable syntax to ensure loadAuthValues is called
|
||||
apiKey: '${INVALID_FORMAT}', // Using valid env var format but with an invalid name
|
||||
baseURL: '${OCR_BASEURL}', // Using valid env var format
|
||||
mistralModel: 'mistral-ocr-latest', // Plain string value
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
};
|
||||
|
||||
await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
// Should use the default values
|
||||
expect(loadAuthValues).toHaveBeenCalledWith({
|
||||
userId: 'user123',
|
||||
authFields: ['OCR_BASEURL', 'INVALID_FORMAT'],
|
||||
optional: expect.any(Set),
|
||||
});
|
||||
|
||||
// Should use the default model when not using environment variable format
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
expect.objectContaining({
|
||||
model: 'mistral-ocr-latest',
|
||||
}),
|
||||
expect.anything(),
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle API errors during OCR process', async () => {
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
loadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'test-api-key',
|
||||
});
|
||||
|
||||
// Mock file upload to fail
|
||||
mockAxios.post.mockRejectedValueOnce(new Error('Upload failed'));
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
apiKey: 'OCR_API_KEY',
|
||||
baseURL: 'OCR_BASEURL',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
};
|
||||
|
||||
await expect(
|
||||
uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
}),
|
||||
).rejects.toThrow('Error uploading document to Mistral OCR API');
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
const { logAxiosError } = require('~/utils');
|
||||
expect(logAxiosError).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle single page documents without page numbering', async () => {
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
loadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'test-api-key',
|
||||
OCR_BASEURL: 'https://api.mistral.ai/v1', // Make sure this is included
|
||||
});
|
||||
|
||||
// Clear all previous mocks
|
||||
mockAxios.post.mockClear();
|
||||
mockAxios.get.mockClear();
|
||||
|
||||
// 1. First mock: File upload response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
|
||||
);
|
||||
|
||||
// 2. Second mock: Signed URL response
|
||||
mockAxios.get.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
|
||||
);
|
||||
|
||||
// 3. Third mock: OCR response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({
|
||||
data: {
|
||||
pages: [{ markdown: 'Single page content' }],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
apiKey: 'OCR_API_KEY',
|
||||
baseURL: 'OCR_BASEURL',
|
||||
mistralModel: 'mistral-ocr-latest',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'single-page.pdf',
|
||||
};
|
||||
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
// Verify that single page documents don't include page numbering
|
||||
expect(result.text).not.toContain('# PAGE');
|
||||
expect(result.text).toEqual('Single page content\n\n');
|
||||
});
|
||||
|
||||
it('should use literal values in configuration when provided directly', async () => {
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
// We'll still mock this but it should not be used for literal values
|
||||
loadAuthValues.mockResolvedValue({});
|
||||
|
||||
// Clear all previous mocks
|
||||
mockAxios.post.mockClear();
|
||||
mockAxios.get.mockClear();
|
||||
|
||||
// 1. First mock: File upload response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
|
||||
);
|
||||
|
||||
// 2. Second mock: Signed URL response
|
||||
mockAxios.get.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
|
||||
);
|
||||
|
||||
// 3. Third mock: OCR response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({
|
||||
data: {
|
||||
pages: [{ markdown: 'Processed with literal config values' }],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
// Direct values that should be used as-is, without variable substitution
|
||||
apiKey: 'actual-api-key-value',
|
||||
baseURL: 'https://direct-api-url.mistral.ai/v1',
|
||||
mistralModel: 'mistral-direct-model',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'direct-values.pdf',
|
||||
};
|
||||
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
// Verify the correct URL was used with the direct baseURL value
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://direct-api-url.mistral.ai/v1/files',
|
||||
expect.any(Object),
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({
|
||||
Authorization: 'Bearer actual-api-key-value',
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
// Check the OCR call was made with the direct model value
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://direct-api-url.mistral.ai/v1/ocr',
|
||||
expect.objectContaining({
|
||||
model: 'mistral-direct-model',
|
||||
}),
|
||||
expect.any(Object),
|
||||
);
|
||||
|
||||
// Verify the result
|
||||
expect(result.text).toEqual('Processed with literal config values\n\n');
|
||||
|
||||
// Verify loadAuthValues was never called since we used direct values
|
||||
expect(loadAuthValues).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle empty configuration values and use defaults', async () => {
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
// Set up the mock values to be returned by loadAuthValues
|
||||
loadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'default-from-env-key',
|
||||
OCR_BASEURL: 'https://default-from-env.mistral.ai/v1',
|
||||
});
|
||||
|
||||
// Clear all previous mocks
|
||||
mockAxios.post.mockClear();
|
||||
mockAxios.get.mockClear();
|
||||
|
||||
// 1. First mock: File upload response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
|
||||
);
|
||||
|
||||
// 2. Second mock: Signed URL response
|
||||
mockAxios.get.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
|
||||
);
|
||||
|
||||
// 3. Third mock: OCR response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({
|
||||
data: {
|
||||
pages: [{ markdown: 'Content from default configuration' }],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
// Empty string values - should fall back to defaults
|
||||
apiKey: '',
|
||||
baseURL: '',
|
||||
mistralModel: '',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'empty-config.pdf',
|
||||
};
|
||||
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
// Verify loadAuthValues was called with the default variable names
|
||||
expect(loadAuthValues).toHaveBeenCalledWith({
|
||||
userId: 'user123',
|
||||
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
|
||||
optional: expect.any(Set),
|
||||
});
|
||||
|
||||
// Verify the API calls used the default values from loadAuthValues
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://default-from-env.mistral.ai/v1/files',
|
||||
expect.any(Object),
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({
|
||||
Authorization: 'Bearer default-from-env-key',
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
// Verify the OCR model defaulted to mistral-ocr-latest
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://default-from-env.mistral.ai/v1/ocr',
|
||||
expect.objectContaining({
|
||||
model: 'mistral-ocr-latest',
|
||||
}),
|
||||
expect.any(Object),
|
||||
);
|
||||
|
||||
// Check result
|
||||
expect(result.text).toEqual('Content from default configuration\n\n');
|
||||
});
|
||||
});
|
||||
});
|
||||
5
api/server/services/Files/MistralOCR/index.js
Normal file
5
api/server/services/Files/MistralOCR/index.js
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
const crud = require('./crud');
|
||||
|
||||
module.exports = {
|
||||
...crud,
|
||||
};
|
||||
|
|
@ -49,6 +49,7 @@ async function encodeAndFormat(req, files, endpoint, mode) {
|
|||
const promises = [];
|
||||
const encodingMethods = {};
|
||||
const result = {
|
||||
text: '',
|
||||
files: [],
|
||||
image_urls: [],
|
||||
};
|
||||
|
|
@ -59,6 +60,9 @@ async function encodeAndFormat(req, files, endpoint, mode) {
|
|||
|
||||
for (let file of files) {
|
||||
const source = file.source ?? FileSources.local;
|
||||
if (source === FileSources.text && file.text) {
|
||||
result.text += `${!result.text ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${file.text}\n`;
|
||||
}
|
||||
|
||||
if (!file.height) {
|
||||
promises.push([file, null]);
|
||||
|
|
@ -85,6 +89,10 @@ async function encodeAndFormat(req, files, endpoint, mode) {
|
|||
promises.push(preparePayload(req, file));
|
||||
}
|
||||
|
||||
if (result.text) {
|
||||
result.text += '\n```';
|
||||
}
|
||||
|
||||
const detail = req.body.imageDetail ?? ImageDetail.auto;
|
||||
|
||||
/** @type {Array<[MongoFile, string]>} */
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ const { addResourceFileId, deleteResourceFileId } = require('~/server/controller
|
|||
const { addAgentResourceFile, removeAgentResourceFiles } = require('~/models/Agent');
|
||||
const { getOpenAIClient } = require('~/server/controllers/assistants/helpers');
|
||||
const { createFile, updateFileUsage, deleteFiles } = require('~/models/File');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { getEndpointsConfig } = require('~/server/services/Config');
|
||||
const { loadAuthValues } = require('~/app/clients/tools/util');
|
||||
const { LB_QueueAsyncCall } = require('~/server/utils/queue');
|
||||
const { getStrategyFunctions } = require('./strategies');
|
||||
const { determineFileType } = require('~/server/utils');
|
||||
|
|
@ -162,7 +162,6 @@ const processDeleteRequest = async ({ req, files }) => {
|
|||
|
||||
for (const file of files) {
|
||||
const source = file.source ?? FileSources.local;
|
||||
|
||||
if (req.body.agent_id && req.body.tool_resource) {
|
||||
agentFiles.push({
|
||||
tool_resource: req.body.tool_resource,
|
||||
|
|
@ -170,6 +169,11 @@ const processDeleteRequest = async ({ req, files }) => {
|
|||
});
|
||||
}
|
||||
|
||||
if (source === FileSources.text) {
|
||||
resolvedFileIds.push(file.file_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (checkOpenAIStorage(source) && !client[source]) {
|
||||
await initializeClients();
|
||||
}
|
||||
|
|
@ -521,6 +525,52 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
|
|||
if (!isFileSearchEnabled) {
|
||||
throw new Error('File search is not enabled for Agents');
|
||||
}
|
||||
} else if (tool_resource === EToolResources.ocr) {
|
||||
const isOCREnabled = await checkCapability(req, AgentCapabilities.ocr);
|
||||
if (!isOCREnabled) {
|
||||
throw new Error('OCR capability is not enabled for Agents');
|
||||
}
|
||||
|
||||
const { handleFileUpload } = getStrategyFunctions(
|
||||
req.app.locals?.ocr?.strategy ?? FileSources.mistral_ocr,
|
||||
);
|
||||
const { file_id, temp_file_id } = metadata;
|
||||
|
||||
const {
|
||||
text,
|
||||
bytes,
|
||||
// TODO: OCR images support?
|
||||
images,
|
||||
filename,
|
||||
filepath: ocrFileURL,
|
||||
} = await handleFileUpload({ req, file, file_id, entity_id: agent_id });
|
||||
|
||||
const fileInfo = removeNullishValues({
|
||||
text,
|
||||
bytes,
|
||||
file_id,
|
||||
temp_file_id,
|
||||
user: req.user.id,
|
||||
type: file.mimetype,
|
||||
filepath: ocrFileURL,
|
||||
source: FileSources.text,
|
||||
filename: filename ?? file.originalname,
|
||||
model: messageAttachment ? undefined : req.body.model,
|
||||
context: messageAttachment ? FileContext.message_attachment : FileContext.agents,
|
||||
});
|
||||
|
||||
if (!messageAttachment && tool_resource) {
|
||||
await addAgentResourceFile({
|
||||
req,
|
||||
file_id,
|
||||
agent_id,
|
||||
tool_resource,
|
||||
});
|
||||
}
|
||||
const result = await createFile(fileInfo, true);
|
||||
return res
|
||||
.status(200)
|
||||
.json({ message: 'Agent file uploaded and processed successfully', ...result });
|
||||
}
|
||||
|
||||
const source =
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ const {
|
|||
const { uploadOpenAIFile, deleteOpenAIFile, getOpenAIFileStream } = require('./OpenAI');
|
||||
const { getCodeOutputDownloadStream, uploadCodeEnvFile } = require('./Code');
|
||||
const { uploadVectors, deleteVectors } = require('./VectorDB');
|
||||
const { uploadMistralOCR } = require('./MistralOCR');
|
||||
|
||||
/**
|
||||
* Firebase Storage Strategy Functions
|
||||
|
|
@ -127,6 +128,26 @@ const codeOutputStrategy = () => ({
|
|||
getDownloadStream: getCodeOutputDownloadStream,
|
||||
});
|
||||
|
||||
const mistralOCRStrategy = () => ({
|
||||
/** @type {typeof saveFileFromURL | null} */
|
||||
saveURL: null,
|
||||
/** @type {typeof getLocalFileURL | null} */
|
||||
getFileURL: null,
|
||||
/** @type {typeof saveLocalBuffer | null} */
|
||||
saveBuffer: null,
|
||||
/** @type {typeof processLocalAvatar | null} */
|
||||
processAvatar: null,
|
||||
/** @type {typeof uploadLocalImage | null} */
|
||||
handleImageUpload: null,
|
||||
/** @type {typeof prepareImagesLocal | null} */
|
||||
prepareImagePayload: null,
|
||||
/** @type {typeof deleteLocalFile | null} */
|
||||
deleteFile: null,
|
||||
/** @type {typeof getLocalFileStream | null} */
|
||||
getDownloadStream: null,
|
||||
handleFileUpload: uploadMistralOCR,
|
||||
});
|
||||
|
||||
// Strategy Selector
|
||||
const getStrategyFunctions = (fileSource) => {
|
||||
if (fileSource === FileSources.firebase) {
|
||||
|
|
@ -141,6 +162,8 @@ const getStrategyFunctions = (fileSource) => {
|
|||
return vectorStrategy();
|
||||
} else if (fileSource === FileSources.execute_code) {
|
||||
return codeOutputStrategy();
|
||||
} else if (fileSource === FileSources.mistral_ocr) {
|
||||
return mistralOCRStrategy();
|
||||
} else {
|
||||
throw new Error('Invalid file source');
|
||||
}
|
||||
|
|
|
|||
56
api/server/services/Tools/credentials.js
Normal file
56
api/server/services/Tools/credentials.js
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
const { getUserPluginAuthValue } = require('~/server/services/PluginService');
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Object} params
|
||||
* @param {string} params.userId
|
||||
* @param {string[]} params.authFields
|
||||
* @param {Set<string>} [params.optional]
|
||||
* @param {boolean} [params.throwError]
|
||||
* @returns
|
||||
*/
|
||||
const loadAuthValues = async ({ userId, authFields, optional, throwError = true }) => {
|
||||
let authValues = {};
|
||||
|
||||
/**
|
||||
* Finds the first non-empty value for the given authentication field, supporting alternate fields.
|
||||
* @param {string[]} fields Array of strings representing the authentication fields. Supports alternate fields delimited by "||".
|
||||
* @returns {Promise<{ authField: string, authValue: string} | null>} An object containing the authentication field and value, or null if not found.
|
||||
*/
|
||||
const findAuthValue = async (fields) => {
|
||||
for (const field of fields) {
|
||||
let value = process.env[field];
|
||||
if (value) {
|
||||
return { authField: field, authValue: value };
|
||||
}
|
||||
try {
|
||||
value = await getUserPluginAuthValue(userId, field, throwError);
|
||||
} catch (err) {
|
||||
if (optional && optional.has(field)) {
|
||||
return { authField: field, authValue: undefined };
|
||||
}
|
||||
if (field === fields[fields.length - 1] && !value) {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
if (value) {
|
||||
return { authField: field, authValue: value };
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
for (let authField of authFields) {
|
||||
const fields = authField.split('||');
|
||||
const result = await findAuthValue(fields);
|
||||
if (result) {
|
||||
authValues[result.authField] = result.authValue;
|
||||
}
|
||||
}
|
||||
|
||||
return authValues;
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
loadAuthValues,
|
||||
};
|
||||
|
|
@ -203,6 +203,7 @@ function generateConfig(key, baseURL, endpoint) {
|
|||
AgentCapabilities.artifacts,
|
||||
AgentCapabilities.actions,
|
||||
AgentCapabilities.tools,
|
||||
AgentCapabilities.ocr,
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,10 @@ jest.mock('winston-daily-rotate-file', () => {
|
|||
});
|
||||
|
||||
jest.mock('~/config', () => {
|
||||
const actualModule = jest.requireActual('~/config');
|
||||
return {
|
||||
sendEvent: actualModule.sendEvent,
|
||||
createAxiosInstance: actualModule.createAxiosInstance,
|
||||
logger: {
|
||||
info: jest.fn(),
|
||||
warn: jest.fn(),
|
||||
|
|
|
|||
|
|
@ -1787,3 +1787,51 @@
|
|||
* @typedef {Promise<{ message: TMessage, conversation: TConversation }> | undefined} ClientDatabaseSavePromise
|
||||
* @memberof typedefs
|
||||
*/
|
||||
|
||||
/**
|
||||
* @exports OCRImage
|
||||
* @typedef {Object} OCRImage
|
||||
* @property {string} id - The identifier of the image.
|
||||
* @property {number} top_left_x - X-coordinate of the top left corner of the image.
|
||||
* @property {number} top_left_y - Y-coordinate of the top left corner of the image.
|
||||
* @property {number} bottom_right_x - X-coordinate of the bottom right corner of the image.
|
||||
* @property {number} bottom_right_y - Y-coordinate of the bottom right corner of the image.
|
||||
* @property {string} image_base64 - Base64-encoded image data.
|
||||
* @memberof typedefs
|
||||
*/
|
||||
|
||||
/**
|
||||
* @exports PageDimensions
|
||||
* @typedef {Object} PageDimensions
|
||||
* @property {number} dpi - The dots per inch resolution of the page.
|
||||
* @property {number} height - The height of the page in pixels.
|
||||
* @property {number} width - The width of the page in pixels.
|
||||
* @memberof typedefs
|
||||
*/
|
||||
|
||||
/**
|
||||
* @exports OCRPage
|
||||
* @typedef {Object} OCRPage
|
||||
* @property {number} index - The index of the page in the document.
|
||||
* @property {string} markdown - The extracted text content of the page in markdown format.
|
||||
* @property {OCRImage[]} images - Array of images found on the page.
|
||||
* @property {PageDimensions} dimensions - The dimensions of the page.
|
||||
* @memberof typedefs
|
||||
*/
|
||||
|
||||
/**
|
||||
* @exports OCRUsageInfo
|
||||
* @typedef {Object} OCRUsageInfo
|
||||
* @property {number} pages_processed - Number of pages processed in the document.
|
||||
* @property {number} doc_size_bytes - Size of the document in bytes.
|
||||
* @memberof typedefs
|
||||
*/
|
||||
|
||||
/**
|
||||
* @exports OCRResult
|
||||
* @typedef {Object} OCRResult
|
||||
* @property {OCRPage[]} pages - Array of pages extracted from the document.
|
||||
* @property {string} model - The model used for OCR processing.
|
||||
* @property {OCRUsageInfo} usage_info - Usage information for the OCR operation.
|
||||
* @memberof typedefs
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import type { OptionWithIcon, ExtendedFile } from './types';
|
|||
export type TAgentOption = OptionWithIcon &
|
||||
Agent & {
|
||||
knowledge_files?: Array<[string, ExtendedFile]>;
|
||||
context_files?: Array<[string, ExtendedFile]>;
|
||||
code_files?: Array<[string, ExtendedFile]>;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -483,6 +483,7 @@ export interface ExtendedFile {
|
|||
attached?: boolean;
|
||||
embedded?: boolean;
|
||||
tool_resource?: string;
|
||||
metadata?: t.TFile['metadata'];
|
||||
}
|
||||
|
||||
export type ContextType = { navVisible: boolean; setNavVisible: (visible: boolean) => void };
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import * as Ariakit from '@ariakit/react';
|
||||
import React, { useRef, useState, useMemo } from 'react';
|
||||
import { FileSearch, ImageUpIcon, TerminalSquareIcon } from 'lucide-react';
|
||||
import { EToolResources, EModelEndpoint } from 'librechat-data-provider';
|
||||
import { FileSearch, ImageUpIcon, TerminalSquareIcon, FileType2Icon } from 'lucide-react';
|
||||
import { FileUpload, TooltipAnchor, DropdownPopup } from '~/components/ui';
|
||||
import { useGetEndpointsQuery } from '~/data-provider';
|
||||
import { AttachmentIcon } from '~/components/svg';
|
||||
|
|
@ -49,6 +49,17 @@ const AttachFile = ({ isRTL, disabled, handleFileChange }: AttachFileProps) => {
|
|||
},
|
||||
];
|
||||
|
||||
if (capabilities.includes(EToolResources.ocr)) {
|
||||
items.push({
|
||||
label: localize('com_ui_upload_ocr_text'),
|
||||
onClick: () => {
|
||||
setToolResource(EToolResources.ocr);
|
||||
handleUploadClick();
|
||||
},
|
||||
icon: <FileType2Icon className="icon-md" />,
|
||||
});
|
||||
}
|
||||
|
||||
if (capabilities.includes(EToolResources.file_search)) {
|
||||
items.push({
|
||||
label: localize('com_ui_upload_file_search'),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import React, { useMemo } from 'react';
|
||||
import { EModelEndpoint, EToolResources } from 'librechat-data-provider';
|
||||
import { FileSearch, ImageUpIcon, TerminalSquareIcon } from 'lucide-react';
|
||||
import { FileSearch, ImageUpIcon, FileType2Icon, TerminalSquareIcon } from 'lucide-react';
|
||||
import OGDialogTemplate from '~/components/ui/OGDialogTemplate';
|
||||
import { useGetEndpointsQuery } from '~/data-provider';
|
||||
import useLocalize from '~/hooks/useLocalize';
|
||||
|
|
@ -50,6 +50,12 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
|
|||
value: EToolResources.execute_code,
|
||||
icon: <TerminalSquareIcon className="icon-md" />,
|
||||
});
|
||||
} else if (capability === EToolResources.ocr) {
|
||||
_options.push({
|
||||
label: localize('com_ui_upload_ocr_text'),
|
||||
value: EToolResources.ocr,
|
||||
icon: <FileType2Icon className="icon-md" />,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ const FilePreview = ({
|
|||
};
|
||||
className?: string;
|
||||
}) => {
|
||||
const radius = 55; // Radius of the SVG circle
|
||||
const radius = 55;
|
||||
const circumference = 2 * Math.PI * radius;
|
||||
const progress = useProgress(
|
||||
file?.['progress'] ?? 1,
|
||||
|
|
@ -27,16 +27,15 @@ const FilePreview = ({
|
|||
(file as ExtendedFile | undefined)?.size ?? 1,
|
||||
);
|
||||
|
||||
// Calculate the offset based on the loading progress
|
||||
const offset = circumference - progress * circumference;
|
||||
const circleCSSProperties = {
|
||||
transition: 'stroke-dashoffset 0.5s linear',
|
||||
};
|
||||
|
||||
return (
|
||||
<div className={cn('size-10 shrink-0 overflow-hidden rounded-xl', className)}>
|
||||
<div className={cn('relative size-10 shrink-0 overflow-hidden rounded-xl', className)}>
|
||||
<FileIcon file={file} fileType={fileType} />
|
||||
<SourceIcon source={file?.source} />
|
||||
<SourceIcon source={file?.source} isCodeFile={!!file?.['metadata']?.fileIdentifier} />
|
||||
{progress < 1 && (
|
||||
<ProgressCircle
|
||||
circumference={circumference}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { Terminal, Type, Database } from 'lucide-react';
|
||||
import { EModelEndpoint, FileSources } from 'librechat-data-provider';
|
||||
import { MinimalIcon } from '~/components/Endpoints';
|
||||
import { cn } from '~/utils';
|
||||
|
|
@ -6,9 +7,13 @@ const sourceToEndpoint = {
|
|||
[FileSources.openai]: EModelEndpoint.openAI,
|
||||
[FileSources.azure]: EModelEndpoint.azureOpenAI,
|
||||
};
|
||||
|
||||
const sourceToClassname = {
|
||||
[FileSources.openai]: 'bg-white/75 dark:bg-black/65',
|
||||
[FileSources.azure]: 'azure-bg-color opacity-85',
|
||||
[FileSources.execute_code]: 'bg-black text-white opacity-85',
|
||||
[FileSources.text]: 'bg-blue-100 dark:bg-blue-900 opacity-85 text-white',
|
||||
[FileSources.vectordb]: 'bg-yellow-100 dark:bg-yellow-900 opacity-85 text-white',
|
||||
};
|
||||
|
||||
const defaultClassName =
|
||||
|
|
@ -16,13 +21,41 @@ const defaultClassName =
|
|||
|
||||
export default function SourceIcon({
|
||||
source,
|
||||
isCodeFile,
|
||||
className = defaultClassName,
|
||||
}: {
|
||||
source?: FileSources;
|
||||
isCodeFile?: boolean;
|
||||
className?: string;
|
||||
}) {
|
||||
if (source === FileSources.local || source === FileSources.firebase) {
|
||||
return null;
|
||||
if (isCodeFile === true) {
|
||||
return (
|
||||
<div className={cn(className, sourceToClassname[FileSources.execute_code] ?? '')}>
|
||||
<span className="flex items-center justify-center">
|
||||
<Terminal className="h-3 w-3" />
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (source === FileSources.text) {
|
||||
return (
|
||||
<div className={cn(className, sourceToClassname[source] ?? '')}>
|
||||
<span className="flex items-center justify-center">
|
||||
<Type className="h-3 w-3" />
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (source === FileSources.vectordb) {
|
||||
return (
|
||||
<div className={cn(className, sourceToClassname[source] ?? '')}>
|
||||
<span className="flex items-center justify-center">
|
||||
<Database className="h-3 w-3" />
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const endpoint = sourceToEndpoint[source ?? ''];
|
||||
|
|
@ -31,7 +64,7 @@ export default function SourceIcon({
|
|||
return null;
|
||||
}
|
||||
return (
|
||||
<button type="button" className={cn(className, sourceToClassname[source ?? ''] ?? '')}>
|
||||
<div className={cn(className, sourceToClassname[source ?? ''] ?? '')}>
|
||||
<span className="flex items-center justify-center">
|
||||
<MinimalIcon
|
||||
endpoint={endpoint}
|
||||
|
|
@ -40,6 +73,6 @@ export default function SourceIcon({
|
|||
iconClassName="h-3 w-3"
|
||||
/>
|
||||
</span>
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import { processAgentOption } from '~/utils';
|
|||
import AdminSettings from './AdminSettings';
|
||||
import DeleteButton from './DeleteButton';
|
||||
import AgentAvatar from './AgentAvatar';
|
||||
import FileContext from './FileContext';
|
||||
import { Spinner } from '~/components';
|
||||
import FileSearch from './FileSearch';
|
||||
import ShareAgent from './ShareAgent';
|
||||
|
|
@ -82,6 +83,10 @@ export default function AgentConfig({
|
|||
() => agentsConfig?.capabilities.includes(AgentCapabilities.artifacts) ?? false,
|
||||
[agentsConfig],
|
||||
);
|
||||
const ocrEnabled = useMemo(
|
||||
() => agentsConfig?.capabilities.includes(AgentCapabilities.ocr) ?? false,
|
||||
[agentsConfig],
|
||||
);
|
||||
const fileSearchEnabled = useMemo(
|
||||
() => agentsConfig?.capabilities.includes(AgentCapabilities.file_search) ?? false,
|
||||
[agentsConfig],
|
||||
|
|
@ -91,6 +96,26 @@ export default function AgentConfig({
|
|||
[agentsConfig],
|
||||
);
|
||||
|
||||
const context_files = useMemo(() => {
|
||||
if (typeof agent === 'string') {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (agent?.id !== agent_id) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (agent.context_files) {
|
||||
return agent.context_files;
|
||||
}
|
||||
|
||||
const _agent = processAgentOption({
|
||||
agent,
|
||||
fileMap,
|
||||
});
|
||||
return _agent.context_files ?? [];
|
||||
}, [agent, agent_id, fileMap]);
|
||||
|
||||
const knowledge_files = useMemo(() => {
|
||||
if (typeof agent === 'string') {
|
||||
return [];
|
||||
|
|
@ -334,7 +359,7 @@ export default function AgentConfig({
|
|||
</div>
|
||||
</button>
|
||||
</div>
|
||||
{(codeEnabled || fileSearchEnabled || artifactsEnabled) && (
|
||||
{(codeEnabled || fileSearchEnabled || artifactsEnabled || ocrEnabled) && (
|
||||
<div className="mb-4 flex w-full flex-col items-start gap-3">
|
||||
<label className="text-token-text-primary block font-medium">
|
||||
{localize('com_assistants_capabilities')}
|
||||
|
|
@ -345,6 +370,8 @@ export default function AgentConfig({
|
|||
{fileSearchEnabled && <FileSearch agent_id={agent_id} files={knowledge_files} />}
|
||||
{/* Artifacts */}
|
||||
{artifactsEnabled && <Artifacts />}
|
||||
{/* File Context (OCR) */}
|
||||
{ocrEnabled && <FileContext agent_id={agent_id} files={context_files} />}
|
||||
</div>
|
||||
)}
|
||||
{/* Agent Tools & Actions */}
|
||||
|
|
|
|||
128
client/src/components/SidePanel/Agents/FileContext.tsx
Normal file
128
client/src/components/SidePanel/Agents/FileContext.tsx
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
import { useState, useRef } from 'react';
|
||||
import {
|
||||
EModelEndpoint,
|
||||
EToolResources,
|
||||
mergeFileConfig,
|
||||
fileConfig as defaultFileConfig,
|
||||
} from 'librechat-data-provider';
|
||||
import type { ExtendedFile } from '~/common';
|
||||
import { useFileHandling, useLocalize, useLazyEffect } from '~/hooks';
|
||||
import FileRow from '~/components/Chat/Input/Files/FileRow';
|
||||
import { useGetFileConfig } from '~/data-provider';
|
||||
import { HoverCard, HoverCardContent, HoverCardPortal, HoverCardTrigger } from '~/components/ui';
|
||||
import { AttachmentIcon, CircleHelpIcon } from '~/components/svg';
|
||||
import { useChatContext } from '~/Providers';
|
||||
import { ESide } from '~/common';
|
||||
|
||||
export default function FileContext({
|
||||
agent_id,
|
||||
files: _files,
|
||||
}: {
|
||||
agent_id: string;
|
||||
files?: [string, ExtendedFile][];
|
||||
}) {
|
||||
const localize = useLocalize();
|
||||
const { setFilesLoading } = useChatContext();
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const [files, setFiles] = useState<Map<string, ExtendedFile>>(new Map());
|
||||
|
||||
const { data: fileConfig = defaultFileConfig } = useGetFileConfig({
|
||||
select: (data) => mergeFileConfig(data),
|
||||
});
|
||||
|
||||
const { handleFileChange } = useFileHandling({
|
||||
overrideEndpoint: EModelEndpoint.agents,
|
||||
additionalMetadata: { agent_id, tool_resource: EToolResources.ocr },
|
||||
fileSetter: setFiles,
|
||||
});
|
||||
|
||||
useLazyEffect(
|
||||
() => {
|
||||
if (_files) {
|
||||
setFiles(new Map(_files));
|
||||
}
|
||||
},
|
||||
[_files],
|
||||
750,
|
||||
);
|
||||
|
||||
const endpointFileConfig = fileConfig.endpoints[EModelEndpoint.agents];
|
||||
const isUploadDisabled = endpointFileConfig.disabled ?? false;
|
||||
|
||||
if (isUploadDisabled) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const handleButtonClick = () => {
|
||||
// necessary to reset the input
|
||||
if (fileInputRef.current) {
|
||||
fileInputRef.current.value = '';
|
||||
}
|
||||
fileInputRef.current?.click();
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="w-full">
|
||||
<HoverCard openDelay={50}>
|
||||
<div className="mb-2 flex items-center gap-2">
|
||||
<HoverCardTrigger asChild>
|
||||
<span className="flex items-center gap-2">
|
||||
<label className="text-token-text-primary block font-medium">
|
||||
{localize('com_agents_file_context')}
|
||||
</label>
|
||||
<CircleHelpIcon className="h-4 w-4 text-text-tertiary" />
|
||||
</span>
|
||||
</HoverCardTrigger>
|
||||
<HoverCardPortal>
|
||||
<HoverCardContent side={ESide.Top} className="w-80">
|
||||
<div className="space-y-2">
|
||||
<p className="text-sm text-text-secondary">
|
||||
{localize('com_agents_file_context_info')}
|
||||
</p>
|
||||
</div>
|
||||
</HoverCardContent>
|
||||
</HoverCardPortal>
|
||||
</div>
|
||||
</HoverCard>
|
||||
<div className="flex flex-col gap-3">
|
||||
{/* File Context (OCR) Files */}
|
||||
<FileRow
|
||||
files={files}
|
||||
setFiles={setFiles}
|
||||
setFilesLoading={setFilesLoading}
|
||||
agent_id={agent_id}
|
||||
tool_resource={EToolResources.ocr}
|
||||
Wrapper={({ children }) => <div className="flex flex-wrap gap-2">{children}</div>}
|
||||
/>
|
||||
<div>
|
||||
<button
|
||||
type="button"
|
||||
disabled={!agent_id}
|
||||
className="btn btn-neutral border-token-border-light relative h-9 w-full rounded-lg font-medium"
|
||||
onClick={handleButtonClick}
|
||||
>
|
||||
<div className="flex w-full items-center justify-center gap-1">
|
||||
<AttachmentIcon className="text-token-text-primary h-4 w-4" />
|
||||
<input
|
||||
multiple={true}
|
||||
type="file"
|
||||
style={{ display: 'none' }}
|
||||
tabIndex={-1}
|
||||
ref={fileInputRef}
|
||||
disabled={!agent_id}
|
||||
onChange={handleFileChange}
|
||||
/>
|
||||
{localize('com_ui_upload_file_context')}
|
||||
</div>
|
||||
</button>
|
||||
</div>
|
||||
{/* Disabled Message */}
|
||||
{agent_id ? null : (
|
||||
<div className="text-xs text-text-secondary">
|
||||
{localize('com_agents_file_context_disabled')}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -1,21 +1,23 @@
|
|||
import { ArrowUpDown } from 'lucide-react';
|
||||
import type { ColumnDef } from '@tanstack/react-table';
|
||||
import type { TFile } from 'librechat-data-provider';
|
||||
import useLocalize from '~/hooks/useLocalize';
|
||||
import PanelFileCell from './PanelFileCell';
|
||||
import { Button } from '~/components/ui';
|
||||
import { formatDate } from '~/utils';
|
||||
|
||||
export const columns: ColumnDef<TFile>[] = [
|
||||
export const columns: ColumnDef<TFile | undefined>[] = [
|
||||
{
|
||||
accessorKey: 'filename',
|
||||
header: ({ column }) => {
|
||||
const localize = useLocalize();
|
||||
return (
|
||||
<Button
|
||||
variant="ghost"
|
||||
className="hover:bg-surface-hover"
|
||||
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
|
||||
>
|
||||
Name
|
||||
{localize('com_ui_name')}
|
||||
<ArrowUpDown className="ml-2 h-4 w-4" />
|
||||
</Button>
|
||||
);
|
||||
|
|
@ -31,20 +33,21 @@ export const columns: ColumnDef<TFile>[] = [
|
|||
size: '10%',
|
||||
},
|
||||
header: ({ column }) => {
|
||||
const localize = useLocalize();
|
||||
return (
|
||||
<Button
|
||||
variant="ghost"
|
||||
className="hover:bg-surface-hover"
|
||||
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
|
||||
>
|
||||
Date
|
||||
{localize('com_ui_date')}
|
||||
<ArrowUpDown className="ml-2 h-4 w-4" />
|
||||
</Button>
|
||||
);
|
||||
},
|
||||
cell: ({ row }) => (
|
||||
<span className="flex justify-end text-xs">
|
||||
{formatDate(row.original.updatedAt?.toString() ?? '')}
|
||||
{formatDate(row.original?.updatedAt?.toString() ?? '')}
|
||||
</span>
|
||||
),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import { getFileType } from '~/utils';
|
|||
|
||||
export default function PanelFileCell({ row }: { row: Row<TFile | undefined> }) {
|
||||
const file = row.original;
|
||||
|
||||
return (
|
||||
<div className="flex w-full items-center gap-2">
|
||||
{file?.type.startsWith('image') === true ? (
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ export default function DataTable<TData, TValue>({ columns, data }: DataTablePro
|
|||
filename: fileData.filename,
|
||||
source: fileData.source,
|
||||
size: fileData.bytes,
|
||||
metadata: fileData.metadata,
|
||||
});
|
||||
},
|
||||
[addFile, fileMap, conversation, localize, showToast, fileConfig.endpoints],
|
||||
|
|
|
|||
|
|
@ -63,8 +63,9 @@ export const useUploadFileMutation = (
|
|||
|
||||
const update = {};
|
||||
const prevResources = agent.tool_resources ?? {};
|
||||
const prevResource: t.ExecuteCodeResource | t.AgentFileSearchResource = agent
|
||||
.tool_resources?.[tool_resource] ?? {
|
||||
const prevResource: t.ExecuteCodeResource | t.AgentFileResource = agent.tool_resources?.[
|
||||
tool_resource
|
||||
] ?? {
|
||||
file_ids: [],
|
||||
};
|
||||
if (!prevResource.file_ids) {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,9 @@
|
|||
"com_agents_create_error": "There was an error creating your agent.",
|
||||
"com_agents_description_placeholder": "Optional: Describe your Agent here",
|
||||
"com_agents_enable_file_search": "Enable File Search",
|
||||
"com_agents_file_context": "File Context (OCR)",
|
||||
"com_agents_file_context_disabled": "Agent must be created before uploading files for File Context.",
|
||||
"com_agents_file_context_info": "Files uploaded as \"Context\" are processed using OCR to extract text, which is then added to the Agent's instructions. Ideal for documents, images with text, or PDFs where you need the full text content of a file",
|
||||
"com_agents_file_search_disabled": "Agent must be created before uploading files for File Search.",
|
||||
"com_agents_file_search_info": "When enabled, the agent will be informed of the exact filenames listed below, allowing it to retrieve relevant context from these files.",
|
||||
"com_agents_instructions_placeholder": "The system instructions that the agent uses",
|
||||
|
|
@ -811,10 +814,12 @@
|
|||
"com_ui_upload_code_files": "Upload for Code Interpreter",
|
||||
"com_ui_upload_delay": "Uploading \"{{0}}\" is taking more time than anticipated. Please wait while the file finishes indexing for retrieval.",
|
||||
"com_ui_upload_error": "There was an error uploading your file",
|
||||
"com_ui_upload_file_context": "Upload File Context",
|
||||
"com_ui_upload_file_search": "Upload for File Search",
|
||||
"com_ui_upload_files": "Upload files",
|
||||
"com_ui_upload_image": "Upload an image",
|
||||
"com_ui_upload_image_input": "Upload Image",
|
||||
"com_ui_upload_ocr_text": "Upload as Text",
|
||||
"com_ui_upload_invalid": "Invalid file for upload. Must be an image not exceeding the limit",
|
||||
"com_ui_upload_invalid_var": "Invalid file for upload. Must be an image not exceeding {{0}} MB",
|
||||
"com_ui_upload_success": "Successfully uploaded file",
|
||||
|
|
|
|||
|
|
@ -58,6 +58,9 @@ export const processAgentOption = ({
|
|||
label: _agent?.name ?? '',
|
||||
value: _agent?.id ?? '',
|
||||
icon: isGlobal ? <EarthIcon className="icon-md text-green-400" /> : null,
|
||||
context_files: _agent?.tool_resources?.ocr?.file_ids
|
||||
? ([] as Array<[string, ExtendedFile]>)
|
||||
: undefined,
|
||||
knowledge_files: _agent?.tool_resources?.file_search?.file_ids
|
||||
? ([] as Array<[string, ExtendedFile]>)
|
||||
: undefined,
|
||||
|
|
@ -83,7 +86,7 @@ export const processAgentOption = ({
|
|||
const source =
|
||||
tool_resource === EToolResources.file_search
|
||||
? FileSources.vectordb
|
||||
: file?.source ?? FileSources.local;
|
||||
: (file?.source ?? FileSources.local);
|
||||
|
||||
if (file) {
|
||||
list?.push([
|
||||
|
|
@ -97,6 +100,7 @@ export const processAgentOption = ({
|
|||
height: file.height,
|
||||
size: file.bytes,
|
||||
preview: file.filepath,
|
||||
metadata: file.metadata,
|
||||
progress: 1,
|
||||
source,
|
||||
},
|
||||
|
|
@ -117,6 +121,16 @@ export const processAgentOption = ({
|
|||
}
|
||||
};
|
||||
|
||||
if (agent.context_files && _agent?.tool_resources?.ocr?.file_ids) {
|
||||
_agent.tool_resources.ocr.file_ids.forEach((file_id) =>
|
||||
handleFile({
|
||||
file_id,
|
||||
list: agent.context_files,
|
||||
tool_resource: EToolResources.ocr,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
if (agent.knowledge_files && _agent?.tool_resources?.file_search?.file_ids) {
|
||||
_agent.tool_resources.file_search.file_ids.forEach((file_id) =>
|
||||
handleFile({
|
||||
|
|
|
|||
2
package-lock.json
generated
2
package-lock.json
generated
|
|
@ -41014,7 +41014,7 @@
|
|||
},
|
||||
"packages/data-provider": {
|
||||
"name": "librechat-data-provider",
|
||||
"version": "0.7.7",
|
||||
"version": "0.7.71",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"axios": "^1.8.2",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "librechat-data-provider",
|
||||
"version": "0.7.7",
|
||||
"version": "0.7.71",
|
||||
"description": "data services for librechat apps",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/index.es.js",
|
||||
|
|
|
|||
|
|
@ -168,6 +168,7 @@ export enum AgentCapabilities {
|
|||
artifacts = 'artifacts',
|
||||
actions = 'actions',
|
||||
tools = 'tools',
|
||||
ocr = 'ocr',
|
||||
}
|
||||
|
||||
export const defaultAssistantsVersion = {
|
||||
|
|
@ -242,6 +243,7 @@ export const agentsEndpointSChema = baseEndpointSchema.merge(
|
|||
AgentCapabilities.artifacts,
|
||||
AgentCapabilities.actions,
|
||||
AgentCapabilities.tools,
|
||||
AgentCapabilities.ocr,
|
||||
]),
|
||||
}),
|
||||
);
|
||||
|
|
@ -534,9 +536,22 @@ export type TStartupConfig = {
|
|||
bundlerURL?: string;
|
||||
};
|
||||
|
||||
export enum OCRStrategy {
|
||||
MISTRAL_OCR = 'mistral_ocr',
|
||||
CUSTOM_OCR = 'custom_ocr',
|
||||
}
|
||||
|
||||
export const ocrSchema = z.object({
|
||||
mistralModel: z.string().optional(),
|
||||
apiKey: z.string().optional().default('OCR_API_KEY'),
|
||||
baseURL: z.string().optional().default('OCR_BASEURL'),
|
||||
strategy: z.nativeEnum(OCRStrategy).default(OCRStrategy.MISTRAL_OCR),
|
||||
});
|
||||
|
||||
export const configSchema = z.object({
|
||||
version: z.string(),
|
||||
cache: z.boolean().default(true),
|
||||
ocr: ocrSchema.optional(),
|
||||
secureImageLinks: z.boolean().optional(),
|
||||
imageOutputType: z.nativeEnum(EImageOutputType).default(EImageOutputType.PNG),
|
||||
includedTools: z.array(z.string()).optional(),
|
||||
|
|
@ -1175,7 +1190,7 @@ export enum Constants {
|
|||
/** Key for the app's version. */
|
||||
VERSION = 'v0.7.7',
|
||||
/** Key for the Custom Config's version (librechat.yaml). */
|
||||
CONFIG_VERSION = '1.2.1',
|
||||
CONFIG_VERSION = '1.2.2',
|
||||
/** Standard value for the first message's `parentMessageId` value, to indicate no parent exists. */
|
||||
NO_PARENT = '00000000-0000-0000-0000-000000000000',
|
||||
/** Standard value for the initial conversationId before a request is sent */
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ export * from './file-config';
|
|||
export * from './artifacts';
|
||||
/* schema helpers */
|
||||
export * from './parsers';
|
||||
export * from './ocr';
|
||||
export * from './zod';
|
||||
/* custom/dynamic configurations */
|
||||
export * from './generate';
|
||||
|
|
|
|||
14
packages/data-provider/src/ocr.ts
Normal file
14
packages/data-provider/src/ocr.ts
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
import type { TCustomConfig } from '../src/config';
|
||||
import { OCRStrategy } from '../src/config';
|
||||
|
||||
export function loadOCRConfig(config: TCustomConfig['ocr']): TCustomConfig['ocr'] {
|
||||
const baseURL = config?.baseURL ?? '';
|
||||
const apiKey = config?.apiKey ?? '';
|
||||
const mistralModel = config?.mistralModel ?? '';
|
||||
return {
|
||||
apiKey,
|
||||
baseURL,
|
||||
mistralModel,
|
||||
strategy: config?.strategy ?? OCRStrategy.MISTRAL_OCR,
|
||||
};
|
||||
}
|
||||
|
|
@ -1152,7 +1152,6 @@ export const compactAgentsSchema = tConversationSchema
|
|||
iconURL: true,
|
||||
greeting: true,
|
||||
agent_id: true,
|
||||
resendFiles: true,
|
||||
instructions: true,
|
||||
additional_instructions: true,
|
||||
})
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ export enum EToolResources {
|
|||
code_interpreter = 'code_interpreter',
|
||||
execute_code = 'execute_code',
|
||||
file_search = 'file_search',
|
||||
ocr = 'ocr',
|
||||
}
|
||||
|
||||
export type Tool = {
|
||||
|
|
@ -163,7 +164,8 @@ export type AgentModelParameters = {
|
|||
|
||||
export interface AgentToolResources {
|
||||
execute_code?: ExecuteCodeResource;
|
||||
file_search?: AgentFileSearchResource;
|
||||
file_search?: AgentFileResource;
|
||||
ocr?: Omit<AgentFileResource, 'vector_store_ids'>;
|
||||
}
|
||||
export interface ExecuteCodeResource {
|
||||
/**
|
||||
|
|
@ -177,7 +179,7 @@ export interface ExecuteCodeResource {
|
|||
files?: Array<TFile>;
|
||||
}
|
||||
|
||||
export interface AgentFileSearchResource {
|
||||
export interface AgentFileResource {
|
||||
/**
|
||||
* The ID of the vector store attached to this agent. There
|
||||
* can be a maximum of 1 vector store attached to the agent.
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ export enum FileSources {
|
|||
s3 = 's3',
|
||||
vectordb = 'vectordb',
|
||||
execute_code = 'execute_code',
|
||||
mistral_ocr = 'mistral_ocr',
|
||||
text = 'text',
|
||||
}
|
||||
|
||||
export const checkOpenAIStorage = (source: string) =>
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ export interface IMongoFile extends Document {
|
|||
file_id: string;
|
||||
temp_file_id?: string;
|
||||
bytes: number;
|
||||
text?: string;
|
||||
filename: string;
|
||||
filepath: string;
|
||||
object: 'file';
|
||||
|
|
@ -72,6 +73,9 @@ const file: Schema<IMongoFile> = new Schema(
|
|||
type: String,
|
||||
required: true,
|
||||
},
|
||||
text: {
|
||||
type: String,
|
||||
},
|
||||
context: {
|
||||
type: String,
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue