mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
🛠️ refactor: Model Loading and Custom Endpoint Error Handling (#1849)
* fix: handle non-assistant role ChatCompletionMessage error * refactor(ModelController): decouple res.send from loading/caching models * fix(custom/initializeClient): only fetch custom endpoint models if models.fetch is true * refactor(validateModel): load models if modelsConfig is not yet cached * docs: update on file upload rate limiting
This commit is contained in:
parent
542494fad6
commit
dd8038b375
7 changed files with 47 additions and 10 deletions
|
|
@ -994,6 +994,7 @@ ${convo}
|
|||
}
|
||||
|
||||
let chatCompletion;
|
||||
/** @type {OpenAI} */
|
||||
const openai = new OpenAI({
|
||||
apiKey: this.apiKey,
|
||||
...opts,
|
||||
|
|
@ -1046,6 +1047,16 @@ ${convo}
|
|||
.on('error', (err) => {
|
||||
handleOpenAIErrors(err, errorCallback, 'stream');
|
||||
})
|
||||
.on('finalChatCompletion', (finalChatCompletion) => {
|
||||
const finalMessage = finalChatCompletion?.choices?.[0]?.message;
|
||||
if (finalMessage && finalMessage?.role !== 'assistant') {
|
||||
finalChatCompletion.choices[0].message.role = 'assistant';
|
||||
}
|
||||
|
||||
if (finalMessage && !finalMessage?.content?.trim()) {
|
||||
finalChatCompletion.choices[0].message.content = intermediateReply;
|
||||
}
|
||||
})
|
||||
.on('finalMessage', (message) => {
|
||||
if (message?.role !== 'assistant') {
|
||||
stream.messages.push({ role: 'assistant', content: intermediateReply });
|
||||
|
|
@ -1117,6 +1128,9 @@ ${convo}
|
|||
err?.message?.includes(
|
||||
'OpenAI error: Invalid final message: OpenAI expects final message to include role=assistant',
|
||||
) ||
|
||||
err?.message?.includes(
|
||||
'stream ended without producing a ChatCompletionMessage with role=assistant',
|
||||
) ||
|
||||
err?.message?.includes('The server had an error processing your request') ||
|
||||
err?.message?.includes('missing finish_reason') ||
|
||||
err?.message?.includes('missing role') ||
|
||||
|
|
|
|||
|
|
@ -2,12 +2,16 @@ const { CacheKeys } = require('librechat-data-provider');
|
|||
const { loadDefaultModels, loadConfigModels } = require('~/server/services/Config');
|
||||
const { getLogStores } = require('~/cache');
|
||||
|
||||
async function modelController(req, res) {
|
||||
/**
|
||||
* Loads the models from the config.
|
||||
* @param {Express.Request} req - The Express request object.
|
||||
* @returns {Promise<TModelsConfig>} The models config.
|
||||
*/
|
||||
async function loadModels(req) {
|
||||
const cache = getLogStores(CacheKeys.CONFIG_STORE);
|
||||
const cachedModelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
|
||||
if (cachedModelsConfig) {
|
||||
res.send(cachedModelsConfig);
|
||||
return;
|
||||
return cachedModelsConfig;
|
||||
}
|
||||
const defaultModelsConfig = await loadDefaultModels(req);
|
||||
const customModelsConfig = await loadConfigModels(req);
|
||||
|
|
@ -15,7 +19,12 @@ async function modelController(req, res) {
|
|||
const modelConfig = { ...defaultModelsConfig, ...customModelsConfig };
|
||||
|
||||
await cache.set(CacheKeys.MODELS_CONFIG, modelConfig);
|
||||
return modelConfig;
|
||||
}
|
||||
|
||||
async function modelController(req, res) {
|
||||
const modelConfig = await loadModels(req);
|
||||
res.send(modelConfig);
|
||||
}
|
||||
|
||||
module.exports = modelController;
|
||||
module.exports = { modelController, loadModels };
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
const { EModelEndpoint, CacheKeys, ViolationTypes } = require('librechat-data-provider');
|
||||
const { loadModels } = require('~/server/controllers/ModelController');
|
||||
const { logViolation, getLogStores } = require('~/cache');
|
||||
const { handleError } = require('~/server/utils');
|
||||
|
||||
|
|
@ -17,7 +18,11 @@ const validateModel = async (req, res, next) => {
|
|||
}
|
||||
|
||||
const cache = getLogStores(CacheKeys.CONFIG_STORE);
|
||||
const modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
|
||||
let modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
|
||||
if (!modelsConfig) {
|
||||
modelsConfig = await loadModels(req);
|
||||
}
|
||||
|
||||
if (!modelsConfig) {
|
||||
return handleError(res, { text: 'Models not loaded' });
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const controller = require('../controllers/ModelController');
|
||||
const { requireJwtAuth } = require('../middleware/');
|
||||
const { modelController } = require('~/server/controllers/ModelController');
|
||||
const { requireJwtAuth } = require('~/server/middleware/');
|
||||
|
||||
router.get('/', requireJwtAuth, controller);
|
||||
const router = express.Router();
|
||||
router.get('/', requireJwtAuth, modelController);
|
||||
|
||||
module.exports = router;
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ const initializeClient = async ({ req, res, endpointOption }) => {
|
|||
|
||||
const cache = getLogStores(CacheKeys.TOKEN_CONFIG);
|
||||
let endpointTokenConfig = await cache.get(endpoint);
|
||||
if (!endpointTokenConfig) {
|
||||
if (endpointConfig && endpointConfig.models.fetch && !endpointTokenConfig) {
|
||||
await fetchModels({ apiKey: CUSTOM_API_KEY, baseURL: CUSTOM_BASE_URL, name: endpoint });
|
||||
endpointTokenConfig = await cache.get(endpoint);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,12 @@
|
|||
* @memberof typedefs
|
||||
*/
|
||||
|
||||
/**
|
||||
* @exports TModelsConfig
|
||||
* @typedef {import('librechat-data-provider').TModelsConfig} TModelsConfig
|
||||
* @memberof typedefs
|
||||
*/
|
||||
|
||||
/**
|
||||
* @exports TPlugin
|
||||
* @typedef {import('librechat-data-provider').TPlugin} TPlugin
|
||||
|
|
|
|||
|
|
@ -31,11 +31,14 @@ The project's current rate limiters are as follows (see below under setup for de
|
|||
- Login and registration rate limiting
|
||||
- [optional] Concurrent Message limiting (only X messages at a time per user)
|
||||
- [optional] Message limiting (how often a user can send a message, configurable by IP and User)
|
||||
- [optional] File Upload limiting: configurable through [`librechat.yaml` config file](https://docs.librechat.ai/install/configuration/custom_config.html#rate-limiting).
|
||||
|
||||
### Setup
|
||||
|
||||
The following are all of the related env variables to make use of and configure the mod system. Note this is also found in the [/.env.example](https://github.com/danny-avila/LibreChat/blob/main/.env.example) file, to be set in your own `.env` file.
|
||||
|
||||
**Note:** currently, most of these values are configured through the .env file, but they may soon migrate to be exclusively configured from the [`librechat.yaml` config file](https://docs.librechat.ai/install/configuration/custom_config.html#rate-limiting).
|
||||
|
||||
```bash
|
||||
BAN_VIOLATIONS=true # Whether or not to enable banning users for violations (they will still be logged)
|
||||
BAN_DURATION=1000 * 60 * 60 * 2 # how long the user and associated IP are banned for
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue