🛠️ refactor: Model Loading and Custom Endpoint Error Handling (#1849)

* fix: handle non-assistant role ChatCompletionMessage error

* refactor(ModelController): decouple res.send from loading/caching models

* fix(custom/initializeClient): only fetch custom endpoint models if models.fetch is true

* refactor(validateModel): load models if modelsConfig is not yet cached

* docs: update on file upload rate limiting
This commit is contained in:
Danny Avila 2024-02-20 12:57:58 -05:00 committed by GitHub
parent 542494fad6
commit dd8038b375
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 47 additions and 10 deletions

View file

@ -994,6 +994,7 @@ ${convo}
} }
let chatCompletion; let chatCompletion;
/** @type {OpenAI} */
const openai = new OpenAI({ const openai = new OpenAI({
apiKey: this.apiKey, apiKey: this.apiKey,
...opts, ...opts,
@ -1046,6 +1047,16 @@ ${convo}
.on('error', (err) => { .on('error', (err) => {
handleOpenAIErrors(err, errorCallback, 'stream'); handleOpenAIErrors(err, errorCallback, 'stream');
}) })
.on('finalChatCompletion', (finalChatCompletion) => {
const finalMessage = finalChatCompletion?.choices?.[0]?.message;
if (finalMessage && finalMessage?.role !== 'assistant') {
finalChatCompletion.choices[0].message.role = 'assistant';
}
if (finalMessage && !finalMessage?.content?.trim()) {
finalChatCompletion.choices[0].message.content = intermediateReply;
}
})
.on('finalMessage', (message) => { .on('finalMessage', (message) => {
if (message?.role !== 'assistant') { if (message?.role !== 'assistant') {
stream.messages.push({ role: 'assistant', content: intermediateReply }); stream.messages.push({ role: 'assistant', content: intermediateReply });
@ -1117,6 +1128,9 @@ ${convo}
err?.message?.includes( err?.message?.includes(
'OpenAI error: Invalid final message: OpenAI expects final message to include role=assistant', 'OpenAI error: Invalid final message: OpenAI expects final message to include role=assistant',
) || ) ||
err?.message?.includes(
'stream ended without producing a ChatCompletionMessage with role=assistant',
) ||
err?.message?.includes('The server had an error processing your request') || err?.message?.includes('The server had an error processing your request') ||
err?.message?.includes('missing finish_reason') || err?.message?.includes('missing finish_reason') ||
err?.message?.includes('missing role') || err?.message?.includes('missing role') ||

View file

@ -2,12 +2,16 @@ const { CacheKeys } = require('librechat-data-provider');
const { loadDefaultModels, loadConfigModels } = require('~/server/services/Config'); const { loadDefaultModels, loadConfigModels } = require('~/server/services/Config');
const { getLogStores } = require('~/cache'); const { getLogStores } = require('~/cache');
async function modelController(req, res) { /**
* Loads the models from the config.
* @param {Express.Request} req - The Express request object.
* @returns {Promise<TModelsConfig>} The models config.
*/
async function loadModels(req) {
const cache = getLogStores(CacheKeys.CONFIG_STORE); const cache = getLogStores(CacheKeys.CONFIG_STORE);
const cachedModelsConfig = await cache.get(CacheKeys.MODELS_CONFIG); const cachedModelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
if (cachedModelsConfig) { if (cachedModelsConfig) {
res.send(cachedModelsConfig); return cachedModelsConfig;
return;
} }
const defaultModelsConfig = await loadDefaultModels(req); const defaultModelsConfig = await loadDefaultModels(req);
const customModelsConfig = await loadConfigModels(req); const customModelsConfig = await loadConfigModels(req);
@ -15,7 +19,12 @@ async function modelController(req, res) {
const modelConfig = { ...defaultModelsConfig, ...customModelsConfig }; const modelConfig = { ...defaultModelsConfig, ...customModelsConfig };
await cache.set(CacheKeys.MODELS_CONFIG, modelConfig); await cache.set(CacheKeys.MODELS_CONFIG, modelConfig);
return modelConfig;
}
async function modelController(req, res) {
const modelConfig = await loadModels(req);
res.send(modelConfig); res.send(modelConfig);
} }
module.exports = modelController; module.exports = { modelController, loadModels };

View file

@ -1,4 +1,5 @@
const { EModelEndpoint, CacheKeys, ViolationTypes } = require('librechat-data-provider'); const { EModelEndpoint, CacheKeys, ViolationTypes } = require('librechat-data-provider');
const { loadModels } = require('~/server/controllers/ModelController');
const { logViolation, getLogStores } = require('~/cache'); const { logViolation, getLogStores } = require('~/cache');
const { handleError } = require('~/server/utils'); const { handleError } = require('~/server/utils');
@ -17,7 +18,11 @@ const validateModel = async (req, res, next) => {
} }
const cache = getLogStores(CacheKeys.CONFIG_STORE); const cache = getLogStores(CacheKeys.CONFIG_STORE);
const modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG); let modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
if (!modelsConfig) {
modelsConfig = await loadModels(req);
}
if (!modelsConfig) { if (!modelsConfig) {
return handleError(res, { text: 'Models not loaded' }); return handleError(res, { text: 'Models not loaded' });
} }

View file

@ -1,8 +1,8 @@
const express = require('express'); const express = require('express');
const router = express.Router(); const { modelController } = require('~/server/controllers/ModelController');
const controller = require('../controllers/ModelController'); const { requireJwtAuth } = require('~/server/middleware/');
const { requireJwtAuth } = require('../middleware/');
router.get('/', requireJwtAuth, controller); const router = express.Router();
router.get('/', requireJwtAuth, modelController);
module.exports = router; module.exports = router;

View file

@ -41,7 +41,7 @@ const initializeClient = async ({ req, res, endpointOption }) => {
const cache = getLogStores(CacheKeys.TOKEN_CONFIG); const cache = getLogStores(CacheKeys.TOKEN_CONFIG);
let endpointTokenConfig = await cache.get(endpoint); let endpointTokenConfig = await cache.get(endpoint);
if (!endpointTokenConfig) { if (endpointConfig && endpointConfig.models.fetch && !endpointTokenConfig) {
await fetchModels({ apiKey: CUSTOM_API_KEY, baseURL: CUSTOM_BASE_URL, name: endpoint }); await fetchModels({ apiKey: CUSTOM_API_KEY, baseURL: CUSTOM_BASE_URL, name: endpoint });
endpointTokenConfig = await cache.get(endpoint); endpointTokenConfig = await cache.get(endpoint);
} }

View file

@ -32,6 +32,12 @@
* @memberof typedefs * @memberof typedefs
*/ */
/**
* @exports TModelsConfig
* @typedef {import('librechat-data-provider').TModelsConfig} TModelsConfig
* @memberof typedefs
*/
/** /**
* @exports TPlugin * @exports TPlugin
* @typedef {import('librechat-data-provider').TPlugin} TPlugin * @typedef {import('librechat-data-provider').TPlugin} TPlugin

View file

@ -31,11 +31,14 @@ The project's current rate limiters are as follows (see below under setup for de
- Login and registration rate limiting - Login and registration rate limiting
- [optional] Concurrent Message limiting (only X messages at a time per user) - [optional] Concurrent Message limiting (only X messages at a time per user)
- [optional] Message limiting (how often a user can send a message, configurable by IP and User) - [optional] Message limiting (how often a user can send a message, configurable by IP and User)
- [optional] File Upload limiting: configurable through [`librechat.yaml` config file](https://docs.librechat.ai/install/configuration/custom_config.html#rate-limiting).
### Setup ### Setup
The following are all of the related env variables to make use of and configure the mod system. Note this is also found in the [/.env.example](https://github.com/danny-avila/LibreChat/blob/main/.env.example) file, to be set in your own `.env` file. The following are all of the related env variables to make use of and configure the mod system. Note this is also found in the [/.env.example](https://github.com/danny-avila/LibreChat/blob/main/.env.example) file, to be set in your own `.env` file.
**Note:** currently, most of these values are configured through the .env file, but they may soon migrate to be exclusively configured from the [`librechat.yaml` config file](https://docs.librechat.ai/install/configuration/custom_config.html#rate-limiting).
```bash ```bash
BAN_VIOLATIONS=true # Whether or not to enable banning users for violations (they will still be logged) BAN_VIOLATIONS=true # Whether or not to enable banning users for violations (they will still be logged)
BAN_DURATION=1000 * 60 * 60 * 2 # how long the user and associated IP are banned for BAN_DURATION=1000 * 60 * 60 * 2 # how long the user and associated IP are banned for