📝 fix: Mistral OCR Image Support and Azure Agent Titles (#6901)

* fix: azure title model

* refactor: typing for uploadMistralOCR

* fix: update conversation ID handling in useSSE for better state management, only use PENDING_CONVO for new conversations

* fix: streamline conversation ID handling in useSSE for simplicity, only needs state update to prevent draft from applying

* fix: update performOCR and tests to support document and image URLs with appropriate types
This commit is contained in:
Danny Avila 2025-04-15 18:03:56 -04:00 committed by GitHub
parent 650e9b4f6c
commit d32f34e5d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 185 additions and 15 deletions

View file

@ -69,16 +69,20 @@ async function getSignedUrl({
/**
* @param {Object} params
* @param {string} params.apiKey
* @param {string} params.documentUrl
* @param {string} params.url - The document or image URL
* @param {string} [params.documentType='document_url'] - 'document_url' or 'image_url'
* @param {string} [params.model]
* @param {string} [params.baseURL]
* @returns {Promise<OCRResult>}
*/
async function performOCR({
apiKey,
documentUrl,
url,
documentType = 'document_url',
model = 'mistral-ocr-latest',
baseURL = 'https://api.mistral.ai/v1',
}) {
const documentKey = documentType === 'image_url' ? 'image_url' : 'document_url';
return axios
.post(
`${baseURL}/ocr`,
@ -86,8 +90,8 @@ async function performOCR({
model,
include_image_base64: false,
document: {
type: 'document_url',
document_url: documentUrl,
type: documentType,
[documentKey]: url,
},
},
{
@ -109,6 +113,19 @@ function extractVariableName(str) {
return match ? match[1] : null;
}
/**
* Uploads a file to the Mistral OCR API and processes the OCR result.
*
* @param {Object} params - The params object.
* @param {ServerRequest} params.req - The request object from Express. It should have a `user` property with an `id`
* representing the user
* @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should
* have a `mimetype` property that tells us the file type
* @param {string} params.file_id - The file ID.
* @param {string} [params.entity_id] - The entity ID, not used here but passed for consistency.
* @returns {Promise<{ filepath: string, bytes: number }>} - The result object containing the processed `text` and `images` (not currently used),
* along with the `filename` and `bytes` properties.
*/
const uploadMistralOCR = async ({ req, file, file_id, entity_id }) => {
try {
/** @type {TCustomConfig['ocr']} */
@ -160,11 +177,18 @@ const uploadMistralOCR = async ({ req, file, file_id, entity_id }) => {
fileId: mistralFile.id,
});
const mimetype = (file.mimetype || '').toLowerCase();
const originalname = file.originalname || '';
const isImage =
mimetype.startsWith('image') || /\.(png|jpe?g|gif|bmp|webp|tiff?)$/i.test(originalname);
const documentType = isImage ? 'image_url' : 'document_url';
const ocrResult = await performOCR({
apiKey,
baseURL,
model,
documentUrl: signedUrlResponse.url,
url: signedUrlResponse.url,
documentType,
});
let aggregatedText = '';