LibreChat/api/app/clients/OllamaClient.js

169 lines
4.6 KiB
JavaScript
Raw Permalink Normal View History

const { z } = require('zod');
const axios = require('axios');
const { Ollama } = require('ollama');
👁️ feat: Azure Mistral OCR Strategy (#7888) * 👁️ feat: Add Azure Mistral OCR strategy and endpoint integration This commit introduces a new OCR strategy named 'azure_mistral_ocr', allowing the use of a Mistral OCR endpoint deployed on Azure. The configuration, schemas, and file upload strategies have been updated to support this integration, enabling seamless OCR processing via Azure-hosted Mistral services. * 🗑️ chore: Clean up .gitignore by removing commented-out uncommon directory name * chore: remove unused vars * refactor: Move createAxiosInstance to packages/api/utils and update imports - Removed the createAxiosInstance function from the config module and relocated it to a new utils module for better organization. - Updated import paths in relevant files to reflect the new location of createAxiosInstance. - Added tests for createAxiosInstance to ensure proper functionality and proxy configuration handling. * chore: move axios helpers to packages/api - Added logAxiosError function to @librechat/api for centralized error logging. - Updated imports across various files to use the new logAxiosError function. - Removed the old axios.js utility file as it is no longer needed. * chore: Update Jest moduleNameMapper for improved path resolution - Added a new mapping for '~/' to resolve module paths in Jest configuration, enhancing import handling for the project. * feat: Implement Mistral OCR API integration in TS * chore: Update MistralOCR tests based on new imports * fix: Enhance MistralOCR configuration handling and tests - Introduced helper functions for resolving configuration values from environment variables or hardcoded settings. - Updated the uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration resolution logic. - Improved test cases to ensure correct behavior when mixing environment variables and hardcoded values. - Mocked file upload and signed URL responses in tests to validate functionality without external dependencies. * feat: Enhance MistralOCR functionality with improved configuration and error handling - Introduced helper functions for loading authentication configuration and resolving values from environment variables. - Updated uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration logic. - Added utility functions for processing OCR results and creating error messages. - Improved document type determination and result aggregation for better OCR processing. * refactor: Reorganize OCR type imports in Mistral CRUD file - Moved OCRResult, OCRResultPage, and OCRImage imports to a more logical grouping for better readability and maintainability. * feat: Add file exports to API and create files index * chore: Update OCR types for enhanced structure and clarity - Redesigned OCRImage interface to include mandatory fields and improved naming conventions. - Added PageDimensions interface for better representation of page metrics. - Updated OCRResultPage to include dimensions and mandatory images array. - Refined OCRResult to include document annotation and usage information. * refactor: use TS counterpart of uploadOCR methods * ci: Update MistralOCR tests to reflect new OCR result structure * chore: Bump version of @librechat/api to 1.2.3 in package.json and package-lock.json * chore: Update CONFIG_VERSION to 1.2.8 * chore: remove unused sendEvent function from config module (now imported from '@librechat/api') * chore: remove MistralOCR service files and tests (now in '@librechat/api') * ci: update logger import in ModelService tests to use @librechat/data-schemas --------- Co-authored-by: arthurolivierfortin <arthurolivier.fortin@gmail.com>
2025-06-13 15:14:57 -04:00
const { sleep } = require('@librechat/agents');
const { resolveHeaders } = require('@librechat/api');
👁️ feat: Azure Mistral OCR Strategy (#7888) * 👁️ feat: Add Azure Mistral OCR strategy and endpoint integration This commit introduces a new OCR strategy named 'azure_mistral_ocr', allowing the use of a Mistral OCR endpoint deployed on Azure. The configuration, schemas, and file upload strategies have been updated to support this integration, enabling seamless OCR processing via Azure-hosted Mistral services. * 🗑️ chore: Clean up .gitignore by removing commented-out uncommon directory name * chore: remove unused vars * refactor: Move createAxiosInstance to packages/api/utils and update imports - Removed the createAxiosInstance function from the config module and relocated it to a new utils module for better organization. - Updated import paths in relevant files to reflect the new location of createAxiosInstance. - Added tests for createAxiosInstance to ensure proper functionality and proxy configuration handling. * chore: move axios helpers to packages/api - Added logAxiosError function to @librechat/api for centralized error logging. - Updated imports across various files to use the new logAxiosError function. - Removed the old axios.js utility file as it is no longer needed. * chore: Update Jest moduleNameMapper for improved path resolution - Added a new mapping for '~/' to resolve module paths in Jest configuration, enhancing import handling for the project. * feat: Implement Mistral OCR API integration in TS * chore: Update MistralOCR tests based on new imports * fix: Enhance MistralOCR configuration handling and tests - Introduced helper functions for resolving configuration values from environment variables or hardcoded settings. - Updated the uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration resolution logic. - Improved test cases to ensure correct behavior when mixing environment variables and hardcoded values. - Mocked file upload and signed URL responses in tests to validate functionality without external dependencies. * feat: Enhance MistralOCR functionality with improved configuration and error handling - Introduced helper functions for loading authentication configuration and resolving values from environment variables. - Updated uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration logic. - Added utility functions for processing OCR results and creating error messages. - Improved document type determination and result aggregation for better OCR processing. * refactor: Reorganize OCR type imports in Mistral CRUD file - Moved OCRResult, OCRResultPage, and OCRImage imports to a more logical grouping for better readability and maintainability. * feat: Add file exports to API and create files index * chore: Update OCR types for enhanced structure and clarity - Redesigned OCRImage interface to include mandatory fields and improved naming conventions. - Added PageDimensions interface for better representation of page metrics. - Updated OCRResultPage to include dimensions and mandatory images array. - Refined OCRResult to include document annotation and usage information. * refactor: use TS counterpart of uploadOCR methods * ci: Update MistralOCR tests to reflect new OCR result structure * chore: Bump version of @librechat/api to 1.2.3 in package.json and package-lock.json * chore: Update CONFIG_VERSION to 1.2.8 * chore: remove unused sendEvent function from config module (now imported from '@librechat/api') * chore: remove MistralOCR service files and tests (now in '@librechat/api') * ci: update logger import in ModelService tests to use @librechat/data-schemas --------- Co-authored-by: arthurolivierfortin <arthurolivier.fortin@gmail.com>
2025-06-13 15:14:57 -04:00
const { logger } = require('@librechat/data-schemas');
const { Constants } = require('librechat-data-provider');
👁️ feat: Azure Mistral OCR Strategy (#7888) * 👁️ feat: Add Azure Mistral OCR strategy and endpoint integration This commit introduces a new OCR strategy named 'azure_mistral_ocr', allowing the use of a Mistral OCR endpoint deployed on Azure. The configuration, schemas, and file upload strategies have been updated to support this integration, enabling seamless OCR processing via Azure-hosted Mistral services. * 🗑️ chore: Clean up .gitignore by removing commented-out uncommon directory name * chore: remove unused vars * refactor: Move createAxiosInstance to packages/api/utils and update imports - Removed the createAxiosInstance function from the config module and relocated it to a new utils module for better organization. - Updated import paths in relevant files to reflect the new location of createAxiosInstance. - Added tests for createAxiosInstance to ensure proper functionality and proxy configuration handling. * chore: move axios helpers to packages/api - Added logAxiosError function to @librechat/api for centralized error logging. - Updated imports across various files to use the new logAxiosError function. - Removed the old axios.js utility file as it is no longer needed. * chore: Update Jest moduleNameMapper for improved path resolution - Added a new mapping for '~/' to resolve module paths in Jest configuration, enhancing import handling for the project. * feat: Implement Mistral OCR API integration in TS * chore: Update MistralOCR tests based on new imports * fix: Enhance MistralOCR configuration handling and tests - Introduced helper functions for resolving configuration values from environment variables or hardcoded settings. - Updated the uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration resolution logic. - Improved test cases to ensure correct behavior when mixing environment variables and hardcoded values. - Mocked file upload and signed URL responses in tests to validate functionality without external dependencies. * feat: Enhance MistralOCR functionality with improved configuration and error handling - Introduced helper functions for loading authentication configuration and resolving values from environment variables. - Updated uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration logic. - Added utility functions for processing OCR results and creating error messages. - Improved document type determination and result aggregation for better OCR processing. * refactor: Reorganize OCR type imports in Mistral CRUD file - Moved OCRResult, OCRResultPage, and OCRImage imports to a more logical grouping for better readability and maintainability. * feat: Add file exports to API and create files index * chore: Update OCR types for enhanced structure and clarity - Redesigned OCRImage interface to include mandatory fields and improved naming conventions. - Added PageDimensions interface for better representation of page metrics. - Updated OCRResultPage to include dimensions and mandatory images array. - Refined OCRResult to include document annotation and usage information. * refactor: use TS counterpart of uploadOCR methods * ci: Update MistralOCR tests to reflect new OCR result structure * chore: Bump version of @librechat/api to 1.2.3 in package.json and package-lock.json * chore: Update CONFIG_VERSION to 1.2.8 * chore: remove unused sendEvent function from config module (now imported from '@librechat/api') * chore: remove MistralOCR service files and tests (now in '@librechat/api') * ci: update logger import in ModelService tests to use @librechat/data-schemas --------- Co-authored-by: arthurolivierfortin <arthurolivier.fortin@gmail.com>
2025-06-13 15:14:57 -04:00
const { deriveBaseURL } = require('~/utils');
const ollamaPayloadSchema = z.object({
mirostat: z.number().optional(),
mirostat_eta: z.number().optional(),
mirostat_tau: z.number().optional(),
num_ctx: z.number().optional(),
repeat_last_n: z.number().optional(),
repeat_penalty: z.number().optional(),
temperature: z.number().optional(),
seed: z.number().nullable().optional(),
stop: z.array(z.string()).optional(),
tfs_z: z.number().optional(),
num_predict: z.number().optional(),
top_k: z.number().optional(),
top_p: z.number().optional(),
stream: z.optional(z.boolean()),
model: z.string(),
});
/**
* @param {string} imageUrl
* @returns {string}
* @throws {Error}
*/
const getValidBase64 = (imageUrl) => {
const parts = imageUrl.split(';base64,');
if (parts.length === 2) {
return parts[1];
} else {
logger.error('Invalid or no Base64 string found in URL.');
}
};
class OllamaClient {
constructor(options = {}) {
const host = deriveBaseURL(options.baseURL ?? 'http://localhost:11434');
this.streamRate = options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
this.headers = options.headers ?? {};
/** @type {Ollama} */
this.client = new Ollama({ host });
}
/**
* Fetches Ollama models from the specified base API path.
* @param {string} baseURL
* @param {Object} [options] - Optional configuration
* @param {Partial<IUser>} [options.user] - User object for header resolution
* @param {Record<string, string>} [options.headers] - Headers to include in the request
* @returns {Promise<string[]>} The Ollama models.
* @throws {Error} Throws if the Ollama API request fails
*/
static async fetchModels(baseURL, options = {}) {
if (!baseURL) {
return [];
}
const ollamaEndpoint = deriveBaseURL(baseURL);
const resolvedHeaders = resolveHeaders({
headers: options.headers,
user: options.user,
});
/** @type {Promise<AxiosResponse<OllamaListResponse>>} */
const response = await axios.get(`${ollamaEndpoint}/api/tags`, {
headers: resolvedHeaders,
timeout: 5000,
});
const models = response.data.models.map((tag) => tag.name);
return models;
}
/**
* @param {ChatCompletionMessage[]} messages
* @returns {OllamaMessage[]}
*/
static formatOpenAIMessages(messages) {
const ollamaMessages = [];
for (const message of messages) {
if (typeof message.content === 'string') {
ollamaMessages.push({
role: message.role,
content: message.content,
});
continue;
}
let aggregatedText = '';
let imageUrls = [];
for (const content of message.content) {
if (content.type === 'text') {
aggregatedText += content.text + ' ';
} else if (content.type === 'image_url') {
imageUrls.push(getValidBase64(content.image_url.url));
}
}
const ollamaMessage = {
role: message.role,
content: aggregatedText.trim(),
};
if (imageUrls.length > 0) {
ollamaMessage.images = imageUrls;
}
ollamaMessages.push(ollamaMessage);
}
return ollamaMessages;
}
/***
* @param {Object} params
* @param {ChatCompletionPayload} params.payload
* @param {onTokenProgress} params.onProgress
* @param {AbortController} params.abortController
*/
async chatCompletion({ payload, onProgress, abortController = null }) {
let intermediateReply = '';
const parameters = ollamaPayloadSchema.parse(payload);
const messages = OllamaClient.formatOpenAIMessages(payload.messages);
if (parameters.stream) {
const stream = await this.client.chat({
messages,
...parameters,
});
for await (const chunk of stream) {
const token = chunk.message.content;
intermediateReply += token;
onProgress(token);
if (abortController.signal.aborted) {
stream.controller.abort();
break;
}
await sleep(this.streamRate);
}
}
// TODO: regular completion
else {
// const generation = await this.client.generate(payload);
}
return intermediateReply;
}
catch(err) {
logger.error('[OllamaClient.chatCompletion]', err);
throw err;
}
}
module.exports = { OllamaClient, ollamaPayloadSchema };