From 03814337a4ce0eac42c918ece73ddc1e2f961a55 Mon Sep 17 00:00:00 2001 From: devanchohan <87763613+devanchohan@users.noreply.github.com> Date: Wed, 28 Jan 2026 16:50:04 +0700 Subject: [PATCH 1/2] feat: add region configuration for Mistral OCR on Vertex AI Add separate region configuration for Mistral OCR to resolve conflicts with Gemini 3 models that require the global endpoint. Changes: - Add `region` field to OCR schema in data-provider - Add `MISTRAL_VERTEX_REGION` environment variable support - Add `getRegionConfig()` helper function - Update `performGoogleVertexOCR()` to accept region parameter - Add region resolution logic with priority: config > MISTRAL_VERTEX_REGION > GOOGLE_LOC - Maintains full backward compatibility This follows the pattern established by Anthropic Vertex AI configuration. --- packages/api/src/files/mistral/crud.ts | 24 +++++++++++++++++++++++- packages/data-provider/src/config.ts | 1 + 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/api/src/files/mistral/crud.ts b/packages/api/src/files/mistral/crud.ts index fefe4a4675..3d124aa4c5 100644 --- a/packages/api/src/files/mistral/crud.ts +++ b/packages/api/src/files/mistral/crud.ts @@ -317,6 +317,23 @@ function getModelConfig(ocrConfig?: TCustomConfig['ocr']): string { return modelConfig.trim(); } +/** + * Gets the region configuration for Vertex AI + */ +function getRegionConfig(ocrConfig?: TCustomConfig['ocr']): string | undefined { + const regionConfig = ocrConfig?.region || ''; + + if (!regionConfig.trim()) { + return undefined; + } + + if (envVarRegex.test(regionConfig)) { + return extractEnvVariable(regionConfig) || undefined; + } + + return regionConfig.trim(); +} + /** * Determines document type based on file */ @@ -606,15 +623,18 @@ async function performGoogleVertexOCR({ accessToken, projectId, model, + region, documentType = 'document_url', }: { url: string; accessToken: string; projectId: string; model: string; + region?: string; documentType?: 'document_url' | 'image_url'; }): Promise { - const location = process.env.GOOGLE_LOC || 'us-central1'; + // Priority: function parameter > MISTRAL_VERTEX_REGION > GOOGLE_LOC > default + const location = region || process.env.MISTRAL_VERTEX_REGION || process.env.GOOGLE_LOC || 'us-central1'; const modelId = model || 'mistral-ocr-2505'; let baseURL: string; @@ -693,6 +713,7 @@ export const uploadGoogleVertexMistralOCR = async ( try { const { serviceAccount, accessToken } = await loadGoogleAuthConfig(); const model = getModelConfig(context.req.config?.ocr); + const region = getRegionConfig(context.req.config?.ocr); const { content: buffer } = await readFileAsBuffer(context.file.path, { fileSize: context.file.size, @@ -706,6 +727,7 @@ export const uploadGoogleVertexMistralOCR = async ( accessToken, projectId: serviceAccount.project_id!, model, + region, documentType, }); diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 45c964cbd8..2b68964630 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -882,6 +882,7 @@ export const ocrSchema = z.object({ mistralModel: z.string().optional(), apiKey: z.string().optional().default('${OCR_API_KEY}'), baseURL: z.string().optional().default('${OCR_BASEURL}'), + region: z.string().optional().default('${MISTRAL_VERTEX_REGION}'), strategy: z.nativeEnum(OCRStrategy).default(OCRStrategy.MISTRAL_OCR), }); From 2925597365ae322323f4252f1e785c0f3a142330 Mon Sep 17 00:00:00 2001 From: devanchohan <87763613+devanchohan@users.noreply.github.com> Date: Wed, 28 Jan 2026 17:10:45 +0700 Subject: [PATCH 2/2] Update packages/data-provider/src/config.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- packages/data-provider/src/config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 2b68964630..8df567020c 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -882,7 +882,7 @@ export const ocrSchema = z.object({ mistralModel: z.string().optional(), apiKey: z.string().optional().default('${OCR_API_KEY}'), baseURL: z.string().optional().default('${OCR_BASEURL}'), - region: z.string().optional().default('${MISTRAL_VERTEX_REGION}'), + region: z.string().optional(), strategy: z.nativeEnum(OCRStrategy).default(OCRStrategy.MISTRAL_OCR), });