🛠️ refactor: Model Loading and Custom Endpoint Error Handling (#1849)

* fix: handle non-assistant role ChatCompletionMessage error * refactor(ModelController): decouple res.send from loading/caching models * fix(custom/initializeClient): only fetch custom endpoint models if models.fetch is true * refactor(validateModel): load models if modelsConfig is not yet cached * docs: update on file upload rate limiting
2025-12-17 08:50:15 +01:00 · 2024-02-20 12:57:58 -05:00 · 2024-02-20 12:57:58 -05:00 · dd8038b375
commit dd8038b375
parent 542494fad6
7 changed files with 47 additions and 10 deletions
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -994,6 +994,7 @@ ${convo}
      }

      let chatCompletion;
+      /** @type {OpenAI} */
      const openai = new OpenAI({
        apiKey: this.apiKey,
        ...opts,
@ -1046,6 +1047,16 @@ ${convo}
          .on('error', (err) => {
            handleOpenAIErrors(err, errorCallback, 'stream');
          })
+          .on('finalChatCompletion', (finalChatCompletion) => {
+            const finalMessage = finalChatCompletion?.choices?.[0]?.message;
+            if (finalMessage && finalMessage?.role !== 'assistant') {
+              finalChatCompletion.choices[0].message.role = 'assistant';
+            }
+
+            if (finalMessage && !finalMessage?.content?.trim()) {
+              finalChatCompletion.choices[0].message.content = intermediateReply;
+            }
+          })
          .on('finalMessage', (message) => {
            if (message?.role !== 'assistant') {
              stream.messages.push({ role: 'assistant', content: intermediateReply });
@ -1117,6 +1128,9 @@ ${convo}
        err?.message?.includes(
          'OpenAI error: Invalid final message: OpenAI expects final message to include role=assistant',
        ) ||
+        err?.message?.includes(
+          'stream ended without producing a ChatCompletionMessage with role=assistant',
+        ) ||
        err?.message?.includes('The server had an error processing your request') ||
        err?.message?.includes('missing finish_reason') ||
        err?.message?.includes('missing role') ||
--- a/api/server/controllers/ModelController.js
+++ b/api/server/controllers/ModelController.js
@ -2,12 +2,16 @@ const { CacheKeys } = require('librechat-data-provider');
 const { loadDefaultModels, loadConfigModels } = require('~/server/services/Config');
 const { getLogStores } = require('~/cache');

-async function modelController(req, res) {
+/**
+ * Loads the models from the config.
+ * @param {Express.Request} req - The Express request object.
+ * @returns {Promise<TModelsConfig>} The models config.
+ */
+async function loadModels(req) {
  const cache = getLogStores(CacheKeys.CONFIG_STORE);
  const cachedModelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
  if (cachedModelsConfig) {
-    res.send(cachedModelsConfig);
-    return;
+    return cachedModelsConfig;
  }
  const defaultModelsConfig = await loadDefaultModels(req);
  const customModelsConfig = await loadConfigModels(req);
@ -15,7 +19,12 @@ async function modelController(req, res) {
  const modelConfig = { ...defaultModelsConfig, ...customModelsConfig };

  await cache.set(CacheKeys.MODELS_CONFIG, modelConfig);
+  return modelConfig;
+}
+
+async function modelController(req, res) {
+  const modelConfig = await loadModels(req);
  res.send(modelConfig);
 }

-module.exports = modelController;
+module.exports = { modelController, loadModels };
--- a/api/server/middleware/validateModel.js
+++ b/api/server/middleware/validateModel.js
@ -1,4 +1,5 @@
 const { EModelEndpoint, CacheKeys, ViolationTypes } = require('librechat-data-provider');
+const { loadModels } = require('~/server/controllers/ModelController');
 const { logViolation, getLogStores } = require('~/cache');
 const { handleError } = require('~/server/utils');

@ -17,7 +18,11 @@ const validateModel = async (req, res, next) => {
  }

  const cache = getLogStores(CacheKeys.CONFIG_STORE);
-  const modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
+  let modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
+  if (!modelsConfig) {
+    modelsConfig = await loadModels(req);
+  }
+
  if (!modelsConfig) {
    return handleError(res, { text: 'Models not loaded' });
  }
--- a/api/server/routes/models.js
+++ b/api/server/routes/models.js
@ -1,8 +1,8 @@
 const express = require('express');
-const router = express.Router();
-const controller = require('../controllers/ModelController');
-const { requireJwtAuth } = require('../middleware/');
+const { modelController } = require('~/server/controllers/ModelController');
+const { requireJwtAuth } = require('~/server/middleware/');

-router.get('/', requireJwtAuth, controller);
+const router = express.Router();
+router.get('/', requireJwtAuth, modelController);

 module.exports = router;
--- a/api/server/services/Endpoints/custom/initializeClient.js
+++ b/api/server/services/Endpoints/custom/initializeClient.js
@ -41,7 +41,7 @@ const initializeClient = async ({ req, res, endpointOption }) => {

  const cache = getLogStores(CacheKeys.TOKEN_CONFIG);
  let endpointTokenConfig = await cache.get(endpoint);
-  if (!endpointTokenConfig) {
+  if (endpointConfig && endpointConfig.models.fetch && !endpointTokenConfig) {
    await fetchModels({ apiKey: CUSTOM_API_KEY, baseURL: CUSTOM_BASE_URL, name: endpoint });
    endpointTokenConfig = await cache.get(endpoint);
  }
--- a/api/typedefs.js
+++ b/api/typedefs.js
@ -32,6 +32,12 @@
 * @memberof typedefs
 */

+/**
+ * @exports TModelsConfig
+ * @typedef {import('librechat-data-provider').TModelsConfig} TModelsConfig
+ * @memberof typedefs
+ */
+
 /**
 * @exports TPlugin
 * @typedef {import('librechat-data-provider').TPlugin} TPlugin
--- a/docs/features/mod_system.md
+++ b/docs/features/mod_system.md
@ -31,11 +31,14 @@ The project's current rate limiters are as follows (see below under setup for de
 - Login and registration rate limiting
 - [optional] Concurrent Message limiting (only X messages at a time per user)
 - [optional] Message limiting (how often a user can send a message, configurable by IP and User)
+- [optional] File Upload limiting: configurable through [`librechat.yaml` config file](https://docs.librechat.ai/install/configuration/custom_config.html#rate-limiting).

 ### Setup

 The following are all of the related env variables to make use of and configure the mod system. Note this is also found in the [/.env.example](https://github.com/danny-avila/LibreChat/blob/main/.env.example) file, to be set in your own `.env` file.

+**Note:** currently, most of these values are configured through the .env file, but they may soon migrate to be exclusively configured from the [`librechat.yaml` config file](https://docs.librechat.ai/install/configuration/custom_config.html#rate-limiting).
+
 ```bash
 BAN_VIOLATIONS=true # Whether or not to enable banning users for violations (they will still be logged)
 BAN_DURATION=1000 * 60 * 60 * 2 # how long the user and associated IP are banned for