🧹 chore: Cleanup base64 Handling for Azure Mistral OCR (#7892)

* 🧹 chore: Remove Comments and Cleanup base64 handling for Azure Mistral OCR

* chore: Remove unnecessary await from MCP instructions formatting in AgentClient

* ci: Update document_url regex in MistralOCR tests to support PDF format
This commit is contained in:
Danny Avila 2025-06-13 18:17:25 -04:00 committed by GitHub
parent 5eb0703f78
commit 0103b4b08a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 5 additions and 13 deletions

View file

@ -330,7 +330,7 @@ class AgentClient extends BaseClient {
if (mcpServers.length > 0) { if (mcpServers.length > 0) {
try { try {
const mcpInstructions = await getMCPManager().formatInstructionsForContext(mcpServers); const mcpInstructions = getMCPManager().formatInstructionsForContext(mcpServers);
if (mcpInstructions) { if (mcpInstructions) {
systemContent = [systemContent, mcpInstructions].filter(Boolean).join('\n\n'); systemContent = [systemContent, mcpInstructions].filter(Boolean).join('\n\n');
logger.debug('[AgentClient] Injected MCP instructions for servers:', mcpServers); logger.debug('[AgentClient] Injected MCP instructions for servers:', mcpServers);

View file

@ -1421,7 +1421,7 @@ describe('MistralOCR Service', () => {
expect.objectContaining({ expect.objectContaining({
document: expect.objectContaining({ document: expect.objectContaining({
type: 'document_url', type: 'document_url',
document_url: expect.stringMatching(/^data:image\/jpeg;base64,/), document_url: expect.stringMatching(/^data:application\/pdf;base64,/),
}), }),
}), }),
expect.any(Object), expect.any(Object),

View file

@ -208,7 +208,6 @@ async function loadAuthConfig(context: OCRContext): Promise<AuthConfig> {
const apiKeyConfig = ocrConfig?.apiKey || ''; const apiKeyConfig = ocrConfig?.apiKey || '';
const baseURLConfig = ocrConfig?.baseURL || ''; const baseURLConfig = ocrConfig?.baseURL || '';
// If both are hardcoded, return them directly
if (!needsEnvLoad(apiKeyConfig) && !needsEnvLoad(baseURLConfig)) { if (!needsEnvLoad(apiKeyConfig) && !needsEnvLoad(baseURLConfig)) {
return { return {
apiKey: apiKeyConfig, apiKey: apiKeyConfig,
@ -216,7 +215,6 @@ async function loadAuthConfig(context: OCRContext): Promise<AuthConfig> {
}; };
} }
// Build auth fields array
const authFields: string[] = []; const authFields: string[] = [];
if (needsEnvLoad(baseURLConfig)) { if (needsEnvLoad(baseURLConfig)) {
@ -227,14 +225,12 @@ async function loadAuthConfig(context: OCRContext): Promise<AuthConfig> {
authFields.push(getEnvVarName(apiKeyConfig, 'OCR_API_KEY')); authFields.push(getEnvVarName(apiKeyConfig, 'OCR_API_KEY'));
} }
// Load auth values
const authValues = await context.loadAuthValues({ const authValues = await context.loadAuthValues({
userId: context.req.user?.id || '', userId: context.req.user?.id || '',
authFields, authFields,
optional: new Set(['OCR_BASEURL']), optional: new Set(['OCR_BASEURL']),
}); });
// Resolve each value
const apiKey = await resolveConfigValue(apiKeyConfig, 'OCR_API_KEY', authValues); const apiKey = await resolveConfigValue(apiKeyConfig, 'OCR_API_KEY', authValues);
const baseURL = await resolveConfigValue( const baseURL = await resolveConfigValue(
baseURLConfig, baseURLConfig,
@ -335,7 +331,6 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
const { apiKey, baseURL } = await loadAuthConfig(context); const { apiKey, baseURL } = await loadAuthConfig(context);
const model = getModelConfig(context.req.app.locals?.ocr); const model = getModelConfig(context.req.app.locals?.ocr);
// Upload file
const mistralFile = await uploadDocumentToMistral({ const mistralFile = await uploadDocumentToMistral({
filePath: context.file.path, filePath: context.file.path,
fileName: context.file.originalname, fileName: context.file.originalname,
@ -343,14 +338,12 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
baseURL, baseURL,
}); });
// Get signed URL
const signedUrlResponse = await getSignedUrl({ const signedUrlResponse = await getSignedUrl({
apiKey, apiKey,
baseURL, baseURL,
fileId: mistralFile.id, fileId: mistralFile.id,
}); });
// Perform OCR
const documentType = getDocumentType(context.file); const documentType = getDocumentType(context.file);
const ocrResult = await performOCR({ const ocrResult = await performOCR({
apiKey, apiKey,
@ -394,21 +387,20 @@ export const uploadAzureMistralOCR = async (
const { apiKey, baseURL } = await loadAuthConfig(context); const { apiKey, baseURL } = await loadAuthConfig(context);
const model = getModelConfig(context.req.app.locals?.ocr); const model = getModelConfig(context.req.app.locals?.ocr);
// Read file as base64
const buffer = fs.readFileSync(context.file.path); const buffer = fs.readFileSync(context.file.path);
const base64 = buffer.toString('base64'); const base64 = buffer.toString('base64');
/** Uses actual mimetype of the file, 'image/jpeg' as fallback since it seems to be accepted regardless of mismatch */
const base64Prefix = `data:${context.file.mimetype || 'image/jpeg'};base64,`;
// Perform OCR directly with base64
const documentType = getDocumentType(context.file); const documentType = getDocumentType(context.file);
const ocrResult = await performOCR({ const ocrResult = await performOCR({
apiKey, apiKey,
baseURL, baseURL,
model, model,
url: `data:image/jpeg;base64,${base64}`, url: `${base64Prefix}${base64}`,
documentType, documentType,
}); });
// Process result
const { text, images } = processOCRResult(ocrResult); const { text, images } = processOCRResult(ocrResult);
return { return {