feat: add region configuration for Mistral OCR on Vertex AI

Add separate region configuration for Mistral OCR to resolve conflicts with
Gemini 3 models that require the global endpoint.

Changes:
- Add `region` field to OCR schema in data-provider
- Add `MISTRAL_VERTEX_REGION` environment variable support
- Add `getRegionConfig()` helper function
- Update `performGoogleVertexOCR()` to accept region parameter
- Add region resolution logic with priority: config > MISTRAL_VERTEX_REGION > GOOGLE_LOC
- Maintains full backward compatibility

This follows the pattern established by Anthropic Vertex AI configuration.
This commit is contained in:
devanchohan 2026-01-28 16:50:04 +07:00
parent 0b4deac953
commit 03814337a4
2 changed files with 24 additions and 1 deletions

View file

@ -317,6 +317,23 @@ function getModelConfig(ocrConfig?: TCustomConfig['ocr']): string {
return modelConfig.trim();
}
/**
* Gets the region configuration for Vertex AI
*/
function getRegionConfig(ocrConfig?: TCustomConfig['ocr']): string | undefined {
const regionConfig = ocrConfig?.region || '';
if (!regionConfig.trim()) {
return undefined;
}
if (envVarRegex.test(regionConfig)) {
return extractEnvVariable(regionConfig) || undefined;
}
return regionConfig.trim();
}
/**
* Determines document type based on file
*/
@ -606,15 +623,18 @@ async function performGoogleVertexOCR({
accessToken,
projectId,
model,
region,
documentType = 'document_url',
}: {
url: string;
accessToken: string;
projectId: string;
model: string;
region?: string;
documentType?: 'document_url' | 'image_url';
}): Promise<OCRResult> {
const location = process.env.GOOGLE_LOC || 'us-central1';
// Priority: function parameter > MISTRAL_VERTEX_REGION > GOOGLE_LOC > default
const location = region || process.env.MISTRAL_VERTEX_REGION || process.env.GOOGLE_LOC || 'us-central1';
const modelId = model || 'mistral-ocr-2505';
let baseURL: string;
@ -693,6 +713,7 @@ export const uploadGoogleVertexMistralOCR = async (
try {
const { serviceAccount, accessToken } = await loadGoogleAuthConfig();
const model = getModelConfig(context.req.config?.ocr);
const region = getRegionConfig(context.req.config?.ocr);
const { content: buffer } = await readFileAsBuffer(context.file.path, {
fileSize: context.file.size,
@ -706,6 +727,7 @@ export const uploadGoogleVertexMistralOCR = async (
accessToken,
projectId: serviceAccount.project_id!,
model,
region,
documentType,
});

View file

@ -882,6 +882,7 @@ export const ocrSchema = z.object({
mistralModel: z.string().optional(),
apiKey: z.string().optional().default('${OCR_API_KEY}'),
baseURL: z.string().optional().default('${OCR_BASEURL}'),
region: z.string().optional().default('${MISTRAL_VERTEX_REGION}'),
strategy: z.nativeEnum(OCRStrategy).default(OCRStrategy.MISTRAL_OCR),
});