🎉 feat: Optimizations and Anthropic Title Generation (#2184)

* feat: add claude-3-haiku-20240307 to default anthropic list

* refactor: optimize `saveMessage` calls mid-stream via throttling

* chore: remove addMetadata operations and consolidate in BaseClient

* fix(listAssistantsForAzure): attempt to specify correct model mapping as accurately as possible (#2177)

* refactor(client): update last conversation setup with current assistant model, call newConvo again when assistants load to allow fast initial load and ensure assistant model is always the default, not the last selected model

* refactor(cache): explicitly add TTL of 2 minutes when setting titleCache and add default TTL of 10 minutes to abortKeys cache

* feat(AnthropicClient): conversation titling using Anthropic Function Calling

* chore: remove extraneous token usage logging

* fix(convos): unhandled edge case for conversation grouping (undefined conversation)

* style: Improved style of Search Bar after recent UI update

* chore: remove unused code, content part helpers

* feat: always show code option
This commit is contained in:
Danny Avila 2024-03-23 20:21:40 -04:00 committed by GitHub
parent 8e7816468d
commit 1f0fb497f8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 426 additions and 188 deletions

View file

@ -1,3 +1,4 @@
const throttle = require('lodash/throttle');
const { getResponseSender, Constants } = require('librechat-data-provider');
const { createAbortController, handleAbortError } = require('~/server/middleware');
const { sendMessage, createOnProgress } = require('~/server/utils');
@ -16,13 +17,10 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
logger.debug('[AskController]', { text, conversationId, ...endpointOption });
let metadata;
let userMessage;
let promptTokens;
let userMessageId;
let responseMessageId;
let lastSavedTimestamp = 0;
let saveDelay = 100;
const sender = getResponseSender({
...endpointOption,
model: endpointOption.modelOptions.model,
@ -31,8 +29,6 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
const newConvo = !conversationId;
const user = req.user.id;
const addMetadata = (data) => (metadata = data);
const getReqData = (data = {}) => {
for (let key in data) {
if (key === 'userMessage') {
@ -54,11 +50,8 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
const { client } = await initializeClient({ req, res, endpointOption });
const { onProgress: progressCallback, getPartialText } = createOnProgress({
onProgress: ({ text: partialText }) => {
const currentTimestamp = Date.now();
if (currentTimestamp - lastSavedTimestamp > saveDelay) {
lastSavedTimestamp = currentTimestamp;
onProgress: throttle(
({ text: partialText }) => {
saveMessage({
messageId: responseMessageId,
sender,
@ -70,12 +63,10 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
error: false,
user,
});
}
if (saveDelay < 500) {
saveDelay = 500;
}
},
},
3000,
{ trailing: false },
),
});
getText = getPartialText;
@ -113,7 +104,6 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
overrideParentMessageId,
getReqData,
onStart,
addMetadata,
abortController,
onProgress: progressCallback.call(null, {
res,
@ -128,10 +118,6 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
response.parentMessageId = overrideParentMessageId;
}
if (metadata) {
response = { ...response, ...metadata };
}
response.endpoint = endpointOption.endpoint;
const conversation = await getConvo(user, conversationId);

View file

@ -1,3 +1,4 @@
const throttle = require('lodash/throttle');
const { getResponseSender } = require('librechat-data-provider');
const { createAbortController, handleAbortError } = require('~/server/middleware');
const { sendMessage, createOnProgress } = require('~/server/utils');
@ -25,11 +26,8 @@ const EditController = async (req, res, next, initializeClient) => {
...endpointOption,
});
let metadata;
let userMessage;
let promptTokens;
let lastSavedTimestamp = 0;
let saveDelay = 100;
const sender = getResponseSender({
...endpointOption,
model: endpointOption.modelOptions.model,
@ -38,7 +36,6 @@ const EditController = async (req, res, next, initializeClient) => {
const userMessageId = parentMessageId;
const user = req.user.id;
const addMetadata = (data) => (metadata = data);
const getReqData = (data = {}) => {
for (let key in data) {
if (key === 'userMessage') {
@ -53,11 +50,8 @@ const EditController = async (req, res, next, initializeClient) => {
const { onProgress: progressCallback, getPartialText } = createOnProgress({
generation,
onProgress: ({ text: partialText }) => {
const currentTimestamp = Date.now();
if (currentTimestamp - lastSavedTimestamp > saveDelay) {
lastSavedTimestamp = currentTimestamp;
onProgress: throttle(
({ text: partialText }) => {
saveMessage({
messageId: responseMessageId,
sender,
@ -70,12 +64,10 @@ const EditController = async (req, res, next, initializeClient) => {
error: false,
user,
});
}
if (saveDelay < 500) {
saveDelay = 500;
}
},
},
3000,
{ trailing: false },
),
});
const getAbortData = () => ({
@ -118,7 +110,6 @@ const EditController = async (req, res, next, initializeClient) => {
overrideParentMessageId,
getReqData,
onStart,
addMetadata,
abortController,
onProgress: progressCallback.call(null, {
res,
@ -127,10 +118,6 @@ const EditController = async (req, res, next, initializeClient) => {
}),
});
if (metadata) {
response = { ...response, ...metadata };
}
const conversation = await getConvo(user, conversationId);
conversation.title =
conversation && !conversation.title ? null : conversation?.title || 'New Chat';

View file

@ -1,6 +1,6 @@
const express = require('express');
const AskController = require('~/server/controllers/AskController');
const { initializeClient } = require('~/server/services/Endpoints/anthropic');
const { addTitle, initializeClient } = require('~/server/services/Endpoints/anthropic');
const {
setHeaders,
handleAbort,
@ -20,7 +20,7 @@ router.post(
buildEndpointOption,
setHeaders,
async (req, res, next) => {
await AskController(req, res, next, initializeClient);
await AskController(req, res, next, initializeClient, addTitle);
},
);

View file

@ -1,11 +1,10 @@
const express = require('express');
const router = express.Router();
const throttle = require('lodash/throttle');
const { getResponseSender, Constants } = require('librechat-data-provider');
const { validateTools } = require('~/app');
const { addTitle } = require('~/server/services/Endpoints/openAI');
const { initializeClient } = require('~/server/services/Endpoints/gptPlugins');
const { saveMessage, getConvoTitle, getConvo } = require('~/models');
const { sendMessage, createOnProgress } = require('~/server/utils');
const { addTitle } = require('~/server/services/Endpoints/openAI');
const {
handleAbort,
createAbortController,
@ -16,8 +15,11 @@ const {
buildEndpointOption,
moderateText,
} = require('~/server/middleware');
const { validateTools } = require('~/app');
const { logger } = require('~/config');
const router = express.Router();
router.use(moderateText);
router.post('/abort', handleAbort());
@ -35,14 +37,13 @@ router.post(
parentMessageId = null,
overrideParentMessageId = null,
} = req.body;
logger.debug('[/ask/gptPlugins]', { text, conversationId, ...endpointOption });
let metadata;
let userMessage;
let promptTokens;
let userMessageId;
let responseMessageId;
let lastSavedTimestamp = 0;
let saveDelay = 100;
const sender = getResponseSender({
...endpointOption,
model: endpointOption.modelOptions.model,
@ -52,7 +53,6 @@ router.post(
const plugins = [];
const addMetadata = (data) => (metadata = data);
const getReqData = (data = {}) => {
for (let key in data) {
if (key === 'userMessage') {
@ -68,6 +68,7 @@ router.post(
}
};
const throttledSaveMessage = throttle(saveMessage, 3000, { trailing: false });
let streaming = null;
let timer = null;
@ -77,31 +78,22 @@ router.post(
getPartialText,
} = createOnProgress({
onProgress: ({ text: partialText }) => {
const currentTimestamp = Date.now();
if (timer) {
clearTimeout(timer);
}
if (currentTimestamp - lastSavedTimestamp > saveDelay) {
lastSavedTimestamp = currentTimestamp;
saveMessage({
messageId: responseMessageId,
sender,
conversationId,
parentMessageId: overrideParentMessageId || userMessageId,
text: partialText,
model: endpointOption.modelOptions.model,
unfinished: true,
error: false,
plugins,
user,
});
}
if (saveDelay < 500) {
saveDelay = 500;
}
throttledSaveMessage({
messageId: responseMessageId,
sender,
conversationId,
parentMessageId: overrideParentMessageId || userMessageId,
text: partialText,
model: endpointOption.modelOptions.model,
unfinished: true,
error: false,
plugins,
user,
});
streaming = new Promise((resolve) => {
timer = setTimeout(() => {
@ -180,7 +172,6 @@ router.post(
onToolStart,
onToolEnd,
onStart,
addMetadata,
getPartialText,
...endpointOption,
onProgress: progressCallback.call(null, {
@ -196,10 +187,6 @@ router.post(
response.parentMessageId = overrideParentMessageId;
}
if (metadata) {
response = { ...response, ...metadata };
}
logger.debug('[/ask/gptPlugins]', response);
response.plugins = plugins.map((p) => ({ ...p, loading: false }));

View file

@ -1,10 +1,6 @@
const express = require('express');
const router = express.Router();
const { validateTools } = require('~/app');
const throttle = require('lodash/throttle');
const { getResponseSender } = require('librechat-data-provider');
const { saveMessage, getConvoTitle, getConvo } = require('~/models');
const { initializeClient } = require('~/server/services/Endpoints/gptPlugins');
const { sendMessage, createOnProgress, formatSteps, formatAction } = require('~/server/utils');
const {
handleAbort,
createAbortController,
@ -15,8 +11,14 @@ const {
buildEndpointOption,
moderateText,
} = require('~/server/middleware');
const { sendMessage, createOnProgress, formatSteps, formatAction } = require('~/server/utils');
const { initializeClient } = require('~/server/services/Endpoints/gptPlugins');
const { saveMessage, getConvoTitle, getConvo } = require('~/models');
const { validateTools } = require('~/app');
const { logger } = require('~/config');
const router = express.Router();
router.use(moderateText);
router.post('/abort', handleAbort());
@ -45,11 +47,9 @@ router.post(
conversationId,
...endpointOption,
});
let metadata;
let userMessage;
let promptTokens;
let lastSavedTimestamp = 0;
let saveDelay = 100;
const sender = getResponseSender({
...endpointOption,
model: endpointOption.modelOptions.model,
@ -64,7 +64,6 @@ router.post(
outputs: null,
};
const addMetadata = (data) => (metadata = data);
const getReqData = (data = {}) => {
for (let key in data) {
if (key === 'userMessage') {
@ -77,6 +76,7 @@ router.post(
}
};
const throttledSaveMessage = throttle(saveMessage, 3000, { trailing: false });
const {
onProgress: progressCallback,
sendIntermediateMessage,
@ -84,31 +84,22 @@ router.post(
} = createOnProgress({
generation,
onProgress: ({ text: partialText }) => {
const currentTimestamp = Date.now();
if (plugin.loading === true) {
plugin.loading = false;
}
if (currentTimestamp - lastSavedTimestamp > saveDelay) {
lastSavedTimestamp = currentTimestamp;
saveMessage({
messageId: responseMessageId,
sender,
conversationId,
parentMessageId: overrideParentMessageId || userMessageId,
text: partialText,
model: endpointOption.modelOptions.model,
unfinished: true,
isEdited: true,
error: false,
user,
});
}
if (saveDelay < 500) {
saveDelay = 500;
}
throttledSaveMessage({
messageId: responseMessageId,
sender,
conversationId,
parentMessageId: overrideParentMessageId || userMessageId,
text: partialText,
model: endpointOption.modelOptions.model,
unfinished: true,
isEdited: true,
error: false,
user,
});
},
});
@ -161,7 +152,6 @@ router.post(
onAgentAction,
onChainEnd,
onStart,
addMetadata,
...endpointOption,
onProgress: progressCallback.call(null, {
res,
@ -176,10 +166,6 @@ router.post(
response.parentMessageId = overrideParentMessageId;
}
if (metadata) {
response = { ...response, ...metadata };
}
logger.debug('[/edit/gptPlugins] CLIENT RESPONSE', response);
response.plugin = { ...plugin, loading: false };
await saveMessage({ ...response, user });

View file

@ -0,0 +1,32 @@
const { CacheKeys } = require('librechat-data-provider');
const getLogStores = require('~/cache/getLogStores');
const { isEnabled } = require('~/server/utils');
const { saveConvo } = require('~/models');
const addTitle = async (req, { text, response, client }) => {
const { TITLE_CONVO = 'true' } = process.env ?? {};
if (!isEnabled(TITLE_CONVO)) {
return;
}
if (client.options.titleConvo === false) {
return;
}
// If the request was aborted, don't generate the title.
if (client.abortController.signal.aborted) {
return;
}
const titleCache = getLogStores(CacheKeys.GEN_TITLE);
const key = `${req.user.id}-${response.conversationId}`;
const title = await client.titleConvo({ text, responseText: response?.text });
await titleCache.set(key, title, 120000);
await saveConvo(req.user.id, {
conversationId: response.conversationId,
title,
});
};
module.exports = addTitle;

View file

@ -1,8 +1,9 @@
const addTitle = require('./addTitle');
const buildOptions = require('./buildOptions');
const initializeClient = require('./initializeClient');
module.exports = {
// addTitle, // todo
addTitle,
buildOptions,
initializeClient,
};

View file

@ -17,7 +17,7 @@ const addTitle = async (req, { text, responseText, conversationId, client }) =>
const key = `${req.user.id}-${conversationId}`;
const title = await client.titleConvo({ text, conversationId, responseText });
await titleCache.set(key, title);
await titleCache.set(key, title, 120000);
await saveConvo(req.user.id, {
conversationId,

View file

@ -35,23 +35,46 @@ const listAssistants = async ({ req, res, query }) => {
* @returns {Promise<AssistantListResponse>} A promise that resolves to an array of assistant data merged with their respective model information.
*/
const listAssistantsForAzure = async ({ req, res, azureConfig = {}, query }) => {
/** @type {Array<[string, TAzureModelConfig]>} */
const groupModelTuples = [];
const promises = [];
const models = [];
/** @type {Array<TAzureGroup>} */
const groups = [];
const { groupMap, assistantGroups } = azureConfig;
for (const groupName of assistantGroups) {
const group = groupMap[groupName];
req.body.model = Object.keys(group?.models)[0];
models.push(req.body.model);
groups.push(group);
const currentModelTuples = Object.entries(group?.models);
groupModelTuples.push(currentModelTuples);
/* The specified model is only necessary to
fetch assistants for the shared instance */
req.body.model = currentModelTuples[0][0];
promises.push(listAssistants({ req, res, query }));
}
const resolvedQueries = await Promise.all(promises);
const data = resolvedQueries.flatMap((res, i) =>
res.data.map((assistant) => {
const model = models[i];
return { ...assistant, model } ?? {};
const deploymentName = assistant.model;
const currentGroup = groups[i];
const currentModelTuples = groupModelTuples[i];
const firstModel = currentModelTuples[0][0];
if (currentGroup.deploymentName === deploymentName) {
return { ...assistant, model: firstModel };
}
for (const [model, modelConfig] of currentModelTuples) {
if (modelConfig.deploymentName === deploymentName) {
return { ...assistant, model };
}
}
return { ...assistant, model: firstModel };
}),
);

View file

@ -22,7 +22,7 @@ const addTitle = async (req, { text, response, client }) => {
const key = `${req.user.id}-${response.conversationId}`;
const title = await client.titleConvo({ text, responseText: response?.text });
await titleCache.set(key, title);
await titleCache.set(key, title, 120000);
await saveConvo(req.user.id, {
conversationId: response.conversationId,
title,