2025-03-10 17:23:46 -04:00
|
|
|
// ~/server/services/Files/MistralOCR/crud.js
|
|
|
|
|
const fs = require('fs');
|
|
|
|
|
const path = require('path');
|
|
|
|
|
const FormData = require('form-data');
|
🔎 feat: Native Web Search with Citation References (#7516)
* WIP: search tool integration
* WIP: Add web search capabilities and API key management to agent actions
* WIP: web search capability to agent configuration and selection
* WIP: Add web search capability to backend agent configuration
* WIP: add web search option to default agent form values
* WIP: add attachments for web search
* feat: add plugin for processing web search citations
* WIP: first pass, Citation UI
* chore: remove console.log
* feat: Add AnimatedTabs component for tabbed UI functionality
* refactor: AnimatedTabs component with CSS animations and stable ID generation
* WIP example content
* feat: SearchContext for managing search results apart from MessageContext
* feat: Enhance AnimatedTabs with underline animation and state management
* WIP: first pass, Implement dynamic tab functionality in Sources component with search results integration
* fix: Update class names for improved styling in Sources and AnimatedTabs components
* feat: Improve styling and layout in Sources component with enhanced button and item designs
* feat: Refactor Sources component to integrate OGDialog for source display and improve layout
* style: Update background color in SourceItem and SourcesGroup components for improved visibility
* refactor: Sources component to enhance SourceItem structure and improve favicon handling
* style: Adjust font size of domain text in SourceItem for better readability
* feat: Add localization for citation source and details in CompositeCitation component
* style: add theming to Citation components
* feat: Enhance SourceItem component with dialog support and improved hovercard functionality
* feat: Add localization for sources tab and image alt text in Sources component
* style: Replace divs with spans for better semantic structure in CompositeCitation and Citation components
* refactor: Sources component to use useMemo for tab generation and improve performance
* chore: bump @librechat/agents to v2.4.318
* chore: update search result types
* fix: search results retrieval in ContentParts component, re-render attachments when expected
* feat: update sources style/types to use latest search result structure
* style: enhance Dialog (expanded) SourceItem component with link wrapping and improved styling
* style: update ImageItem component styling for improved title visibility
* refactor: remove SourceItemBase component and adjust SourceItem layout for improved styling
* chore: linting twcss order
* fix: prevent FileAttachment from rendering search attachments
* fix: append underscore to responseMessageId for unique identification to prevent mapping of previous latest message's attachments
* chore: remove unused parameter 'useSpecs' from loadTools function
* chore: twcss order
* WIP: WebSearch Tool UI
* refactor: add limit parameter to StackedFavicons for customizable source display
* refactor: optimize search results memoization by making more granular and separate conerns
* refactor: integrated StackedFavicons to WebSearch mid-run
* chore: bump @librechat/agents to expose handleToolCallChunks
* chore: use typedefs from dedicated file instead of defining them in AgentClient module
* WIP: first pass, search progress results
* refactor: move createOnSearchResults function to a dedicated search module
* chore: bump @librechat/agents to v2.4.320
* WIP: first pass, search results processed UX
* refactor: consolidate context variables in createOnSearchResults function
* chore: bump @librechat/agents to v2.4.321
* feat: add guidelines for web search tool response formatting in loadTools function
* feat: add isLast prop to Part component and update WebSearch logic for improved state handling
* style: update Hovercard styles for improved UI consistency
* feat: export FaviconImage component for improved accessibility in other modules
* refactor: export getCleanDomain function and use FaviconImage in Citation component for improved source representation
* refactor: implement SourceHovercard component for consistency and DRY compliance
* fix: replace <p> with <span> for snippet and title in SourceItem and SourceHovercard for consistency
* style: `not-prose`
* style: remove 'not-prose' class for consistency in SourceItem, Citation, and SourceHovercard components, adjust style classes
* refactor: `imageUrl` on hover and prevent duplicate sources
* refactor: enhance SourcesGroup dialog layout and improve source item presentation
* refactor: reorganize Web Components, save in same directory
* feat: add 'news' refType to refTypeMap for citation sources
* style: adjust Hovercard width for improved layout
* refactor: update tool usage guidelines for improved clarity and execution
* chore: linting
* feat: add Web Search badge with initial permissions and local storage logic
* feat: add webSearch support to interface and permissions schemas
* feat: implement Web Search API key management and localization updates
* feat: refactor Web Search API key handling and integrate new search API key form
* fix: remove unnecessary visibility state from FileAttachment component
* feat: update WebSearch component to use Globe icon and localized search label
* feat: enhance ApiKeyDialog with dropdown for reranker selection and update translations
* feat: implement dropdown menus for engine, scraper, and reranker selection in ApiKeyDialog
* chore: linting and add unknown instead of `any` type
* feat: refactor ApiKeyDialog and useAuthSearchTool for improved API key management
* refactor: update ocrSchema to use template literals for default apiKey and baseURL
* feat: add web search configuration and utility functions for environment variable extraction
* fix: ensure filepath is defined before checking its prefix in useAttachmentHandler
* feat: enhance web search functionality with improved configuration and environment variable extraction for authFields
* fix: update auth type in TPluginAction and TUpdateUserPlugins to use Partial<Record<string, string>>
* feat: implement web search authentication verification and enhance webSearchAuth structure
* feat: enhance ephemeral agent handling with new web search capability and type definition
* feat: enhance isEphemeralAgent function to include web search selection
* feat: refactor verifyWebSearchAuth to improve key handling and authentication checks
* feat: implement loadWebSearchAuth function for improved web search authentication handling
* feat: enhance web search authentication with new configuration options and refactor related types
* refactor: rename search engine to search provider and update related localization keys
* feat: update verifyWebSearchAuth to handle multiple authentication types and improve error handling
* feat: update ApiKeyDialog to accept authTypes prop and remove isUserProvided check
* feat: add tests for extractWebSearchEnvVars and loadWebSearchAuth functions
* feat: enhance loadWebSearchAuth to support specific service checks for providers, scrapers, and rerankers
* fix: update web search configuration key and adjust auth result handling in loadTools function
* feat: add new progress key for repeated web searching and update localization
* chore: bump @librechat/agents to 2.4.322
* feat: enhance loadTools function to include ISO time and improve search tool logging
* feat: update StackedFavicons to handle negative start index and improve citation attribution styling and text
* chore: update .gitignore to categorize AI-related files
* fix: mobile responsiveness of sources/citations hovercards
* feat: enhance source display with improved line clamping for better readability
* chore: bump @librechat/agents to v2.4.33
* feat: add handling for image sources in references mapping
* chore: bump librechat-data-provider version to 0.7.84
* chore: bump @librechat/agents version to 2.4.34
* fix: update auth handling to support multiple auth types in tools and allow key configuration in agent panel
* chore: remove redundant agent attribution text from search form
* fix: web search auth uninstall
* refactor: convert CheckboxButton to a forwardRef component and update setValue callback signature
* feat: add triggerRef prop to ApiKeyDialog components for improved dialog control
* feat: integrate triggerRef in CodeInterpreter and WebSearch components for enhanced dialog management
* feat: enhance ApiKeyDialog with additional links for Firecrawl and Jina API key guidance
* feat: implement web search configuration handling in ApiKeyDialog and add tests for dropdown visibility
* fix: update webSearchConfig reference in config route for correct payload assignment
* feat: update ApiKeyDialog to conditionally render sections based on authTypes and modify loadWebSearchAuth to correctly categorize authentication types
* feat: refactor ApiKeyDialog and related tests to use SearchCategories and RerankerTypes enums and remove nested ternaries
* refactor: move ThinkingButton rendering to improve layout consistency in ContentParts
* feat: integrate search context into Markdown component to conditionally include unicodeCitation plugin
* chore: bump @librechat/agents to v2.4.35
* chore: remove unused 18n key
* ci: add WEB_SEARCH permission testing and update AppService tests for new webSearch configuration
* ci: add more comprehensive tests for loadWebSearchAuth to validate authentication handling and authTypes structure
* chore: remove debugging console log from web.spec.ts to clean up test output
2025-05-23 00:14:04 -04:00
|
|
|
const {
|
|
|
|
|
FileSources,
|
|
|
|
|
envVarRegex,
|
|
|
|
|
extractEnvVariable,
|
|
|
|
|
extractVariableName,
|
|
|
|
|
} = require('librechat-data-provider');
|
2025-03-10 17:23:46 -04:00
|
|
|
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
|
|
|
|
const { logger, createAxiosInstance } = require('~/config');
|
2025-03-26 14:10:52 -04:00
|
|
|
const { logAxiosError } = require('~/utils/axios');
|
2025-03-10 17:23:46 -04:00
|
|
|
|
|
|
|
|
const axios = createAxiosInstance();
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Uploads a document to Mistral API using file streaming to avoid loading the entire file into memory
|
|
|
|
|
*
|
|
|
|
|
* @param {Object} params Upload parameters
|
|
|
|
|
* @param {string} params.filePath The path to the file on disk
|
|
|
|
|
* @param {string} [params.fileName] Optional filename to use (defaults to the name from filePath)
|
|
|
|
|
* @param {string} params.apiKey Mistral API key
|
|
|
|
|
* @param {string} [params.baseURL=https://api.mistral.ai/v1] Mistral API base URL
|
|
|
|
|
* @returns {Promise<Object>} The response from Mistral API
|
|
|
|
|
*/
|
|
|
|
|
async function uploadDocumentToMistral({
|
|
|
|
|
filePath,
|
|
|
|
|
fileName = '',
|
|
|
|
|
apiKey,
|
|
|
|
|
baseURL = 'https://api.mistral.ai/v1',
|
|
|
|
|
}) {
|
|
|
|
|
const form = new FormData();
|
|
|
|
|
form.append('purpose', 'ocr');
|
|
|
|
|
const actualFileName = fileName || path.basename(filePath);
|
|
|
|
|
const fileStream = fs.createReadStream(filePath);
|
|
|
|
|
form.append('file', fileStream, { filename: actualFileName });
|
|
|
|
|
|
|
|
|
|
return axios
|
|
|
|
|
.post(`${baseURL}/files`, form, {
|
|
|
|
|
headers: {
|
|
|
|
|
Authorization: `Bearer ${apiKey}`,
|
|
|
|
|
...form.getHeaders(),
|
|
|
|
|
},
|
|
|
|
|
maxBodyLength: Infinity,
|
|
|
|
|
maxContentLength: Infinity,
|
|
|
|
|
})
|
|
|
|
|
.then((res) => res.data)
|
|
|
|
|
.catch((error) => {
|
|
|
|
|
throw error;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function getSignedUrl({
|
|
|
|
|
apiKey,
|
|
|
|
|
fileId,
|
|
|
|
|
expiry = 24,
|
|
|
|
|
baseURL = 'https://api.mistral.ai/v1',
|
|
|
|
|
}) {
|
|
|
|
|
return axios
|
|
|
|
|
.get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, {
|
|
|
|
|
headers: {
|
|
|
|
|
Authorization: `Bearer ${apiKey}`,
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
.then((res) => res.data)
|
|
|
|
|
.catch((error) => {
|
|
|
|
|
logger.error('Error fetching signed URL:', error.message);
|
|
|
|
|
throw error;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param {Object} params
|
|
|
|
|
* @param {string} params.apiKey
|
2025-04-15 18:03:56 -04:00
|
|
|
* @param {string} params.url - The document or image URL
|
|
|
|
|
* @param {string} [params.documentType='document_url'] - 'document_url' or 'image_url'
|
|
|
|
|
* @param {string} [params.model]
|
2025-03-10 17:23:46 -04:00
|
|
|
* @param {string} [params.baseURL]
|
|
|
|
|
* @returns {Promise<OCRResult>}
|
|
|
|
|
*/
|
|
|
|
|
async function performOCR({
|
|
|
|
|
apiKey,
|
2025-04-15 18:03:56 -04:00
|
|
|
url,
|
|
|
|
|
documentType = 'document_url',
|
2025-03-10 17:23:46 -04:00
|
|
|
model = 'mistral-ocr-latest',
|
|
|
|
|
baseURL = 'https://api.mistral.ai/v1',
|
|
|
|
|
}) {
|
2025-04-15 18:03:56 -04:00
|
|
|
const documentKey = documentType === 'image_url' ? 'image_url' : 'document_url';
|
2025-03-10 17:23:46 -04:00
|
|
|
return axios
|
|
|
|
|
.post(
|
|
|
|
|
`${baseURL}/ocr`,
|
|
|
|
|
{
|
|
|
|
|
model,
|
2025-06-01 17:48:19 -04:00
|
|
|
image_limit: 0,
|
2025-03-10 17:23:46 -04:00
|
|
|
include_image_base64: false,
|
|
|
|
|
document: {
|
2025-04-15 18:03:56 -04:00
|
|
|
type: documentType,
|
|
|
|
|
[documentKey]: url,
|
2025-03-10 17:23:46 -04:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
headers: {
|
|
|
|
|
'Content-Type': 'application/json',
|
|
|
|
|
Authorization: `Bearer ${apiKey}`,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
.then((res) => res.data)
|
|
|
|
|
.catch((error) => {
|
|
|
|
|
logger.error('Error performing OCR:', error.message);
|
|
|
|
|
throw error;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-15 18:03:56 -04:00
|
|
|
/**
|
|
|
|
|
* Uploads a file to the Mistral OCR API and processes the OCR result.
|
|
|
|
|
*
|
|
|
|
|
* @param {Object} params - The params object.
|
|
|
|
|
* @param {ServerRequest} params.req - The request object from Express. It should have a `user` property with an `id`
|
|
|
|
|
* representing the user
|
|
|
|
|
* @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should
|
|
|
|
|
* have a `mimetype` property that tells us the file type
|
|
|
|
|
* @param {string} params.file_id - The file ID.
|
|
|
|
|
* @param {string} [params.entity_id] - The entity ID, not used here but passed for consistency.
|
|
|
|
|
* @returns {Promise<{ filepath: string, bytes: number }>} - The result object containing the processed `text` and `images` (not currently used),
|
|
|
|
|
* along with the `filename` and `bytes` properties.
|
|
|
|
|
*/
|
2025-03-10 17:23:46 -04:00
|
|
|
const uploadMistralOCR = async ({ req, file, file_id, entity_id }) => {
|
|
|
|
|
try {
|
|
|
|
|
/** @type {TCustomConfig['ocr']} */
|
|
|
|
|
const ocrConfig = req.app.locals?.ocr;
|
|
|
|
|
|
|
|
|
|
const apiKeyConfig = ocrConfig.apiKey || '';
|
|
|
|
|
const baseURLConfig = ocrConfig.baseURL || '';
|
|
|
|
|
|
|
|
|
|
const isApiKeyEnvVar = envVarRegex.test(apiKeyConfig);
|
|
|
|
|
const isBaseURLEnvVar = envVarRegex.test(baseURLConfig);
|
|
|
|
|
|
|
|
|
|
const isApiKeyEmpty = !apiKeyConfig.trim();
|
|
|
|
|
const isBaseURLEmpty = !baseURLConfig.trim();
|
|
|
|
|
|
|
|
|
|
let apiKey, baseURL;
|
|
|
|
|
|
|
|
|
|
if (isApiKeyEnvVar || isBaseURLEnvVar || isApiKeyEmpty || isBaseURLEmpty) {
|
|
|
|
|
const apiKeyVarName = isApiKeyEnvVar ? extractVariableName(apiKeyConfig) : 'OCR_API_KEY';
|
|
|
|
|
const baseURLVarName = isBaseURLEnvVar ? extractVariableName(baseURLConfig) : 'OCR_BASEURL';
|
|
|
|
|
|
|
|
|
|
const authValues = await loadAuthValues({
|
|
|
|
|
userId: req.user.id,
|
|
|
|
|
authFields: [baseURLVarName, apiKeyVarName],
|
|
|
|
|
optional: new Set([baseURLVarName]),
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
apiKey = authValues[apiKeyVarName];
|
|
|
|
|
baseURL = authValues[baseURLVarName];
|
|
|
|
|
} else {
|
|
|
|
|
apiKey = apiKeyConfig;
|
|
|
|
|
baseURL = baseURLConfig;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const mistralFile = await uploadDocumentToMistral({
|
|
|
|
|
filePath: file.path,
|
|
|
|
|
fileName: file.originalname,
|
|
|
|
|
apiKey,
|
|
|
|
|
baseURL,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const modelConfig = ocrConfig.mistralModel || '';
|
|
|
|
|
const model = envVarRegex.test(modelConfig)
|
|
|
|
|
? extractEnvVariable(modelConfig)
|
|
|
|
|
: modelConfig.trim() || 'mistral-ocr-latest';
|
|
|
|
|
|
|
|
|
|
const signedUrlResponse = await getSignedUrl({
|
|
|
|
|
apiKey,
|
|
|
|
|
baseURL,
|
|
|
|
|
fileId: mistralFile.id,
|
|
|
|
|
});
|
|
|
|
|
|
2025-04-15 18:03:56 -04:00
|
|
|
const mimetype = (file.mimetype || '').toLowerCase();
|
|
|
|
|
const originalname = file.originalname || '';
|
|
|
|
|
const isImage =
|
|
|
|
|
mimetype.startsWith('image') || /\.(png|jpe?g|gif|bmp|webp|tiff?)$/i.test(originalname);
|
|
|
|
|
const documentType = isImage ? 'image_url' : 'document_url';
|
|
|
|
|
|
2025-03-10 17:23:46 -04:00
|
|
|
const ocrResult = await performOCR({
|
|
|
|
|
apiKey,
|
|
|
|
|
baseURL,
|
|
|
|
|
model,
|
2025-04-15 18:03:56 -04:00
|
|
|
url: signedUrlResponse.url,
|
|
|
|
|
documentType,
|
2025-03-10 17:23:46 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let aggregatedText = '';
|
|
|
|
|
const images = [];
|
|
|
|
|
ocrResult.pages.forEach((page, index) => {
|
|
|
|
|
if (ocrResult.pages.length > 1) {
|
|
|
|
|
aggregatedText += `# PAGE ${index + 1}\n`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
aggregatedText += page.markdown + '\n\n';
|
|
|
|
|
|
|
|
|
|
if (page.images && page.images.length > 0) {
|
|
|
|
|
page.images.forEach((image) => {
|
|
|
|
|
if (image.image_base64) {
|
|
|
|
|
images.push(image.image_base64);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
filename: file.originalname,
|
|
|
|
|
bytes: aggregatedText.length * 4,
|
|
|
|
|
filepath: FileSources.mistral_ocr,
|
|
|
|
|
text: aggregatedText,
|
|
|
|
|
images,
|
|
|
|
|
};
|
|
|
|
|
} catch (error) {
|
2025-05-27 15:48:43 -04:00
|
|
|
let message = 'Error uploading document to Mistral OCR API';
|
|
|
|
|
const detail = error?.response?.data?.detail;
|
|
|
|
|
if (detail && detail !== '') {
|
|
|
|
|
message = detail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const responseMessage = error?.response?.data?.message;
|
|
|
|
|
throw new Error(
|
|
|
|
|
`${logAxiosError({ error, message })}${responseMessage && responseMessage !== '' ? ` - ${responseMessage}` : ''}`,
|
|
|
|
|
);
|
2025-03-10 17:23:46 -04:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
module.exports = {
|
|
|
|
|
uploadDocumentToMistral,
|
|
|
|
|
uploadMistralOCR,
|
|
|
|
|
getSignedUrl,
|
|
|
|
|
performOCR,
|
|
|
|
|
};
|