🧠 feat: Cohere support as Custom Endpoint (#2328)

* chore: bump cohere-ai, fix firebase vulnerabilities by going down versions

* feat: cohere rates and context windows

* feat(createCoherePayload): transform openai payload for cohere compatibility

* feat: cohere backend support

* refactor(UnknownIcon): optimize icon render and add cohere

* docs: add cohere to Compatible AI Endpoints

* Update ai_endpoints.md
This commit is contained in:
Danny Avila 2024-04-05 15:19:41 -04:00 committed by GitHub
parent daa5f43ac6
commit cd7f3a51e1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1007 additions and 622 deletions

View file

@ -23,7 +23,7 @@ class BaseClient {
throw new Error('Method \'setOptions\' must be implemented.');
}
getCompletion() {
async getCompletion() {
throw new Error('Method \'getCompletion\' must be implemented.');
}

View file

@ -3,10 +3,13 @@ const crypto = require('crypto');
const {
EModelEndpoint,
resolveHeaders,
CohereConstants,
mapModelToAzureConfig,
} = require('librechat-data-provider');
const { CohereClient } = require('cohere-ai');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source');
const { createCoherePayload } = require('./llm');
const { Agent, ProxyAgent } = require('undici');
const BaseClient = require('./BaseClient');
const { logger } = require('~/config');
@ -147,7 +150,8 @@ class ChatGPTClient extends BaseClient {
return tokenizer;
}
async getCompletion(input, onProgress, abortController = null) {
/** @type {getCompletion} */
async getCompletion(input, onProgress, onTokenProgress, abortController = null) {
if (!abortController) {
abortController = new AbortController();
}
@ -305,6 +309,11 @@ class ChatGPTClient extends BaseClient {
});
}
if (baseURL.startsWith(CohereConstants.API_URL)) {
const payload = createCoherePayload({ modelOptions });
return await this.cohereChatCompletion({ payload, onTokenProgress });
}
if (baseURL.includes('v1') && !baseURL.includes('/completions') && !this.isChatCompletion) {
baseURL = baseURL.split('v1')[0] + 'v1/completions';
} else if (
@ -408,6 +417,35 @@ class ChatGPTClient extends BaseClient {
return response.json();
}
/** @type {cohereChatCompletion} */
async cohereChatCompletion({ payload, onTokenProgress }) {
const cohere = new CohereClient({
token: this.apiKey,
environment: this.completionsUrl,
});
if (!payload.stream) {
const chatResponse = await cohere.chat(payload);
return chatResponse.text;
}
const chatStream = await cohere.chatStream(payload);
let reply = '';
for await (const message of chatStream) {
if (!message) {
continue;
}
if (message.eventType === 'text-generation' && message.text) {
onTokenProgress(message.text);
} else if (message.eventType === 'stream-end' && message.response) {
reply = message.response.text;
}
}
return reply;
}
async generateTitle(userMessage, botMessage) {
const instructionsPayload = {
role: 'system',

View file

@ -5,6 +5,7 @@ const {
EModelEndpoint,
resolveHeaders,
ImageDetailCost,
CohereConstants,
getResponseSender,
validateVisionModel,
mapModelToAzureConfig,
@ -16,7 +17,13 @@ const {
getModelMaxTokens,
genAzureChatCompletion,
} = require('~/utils');
const { truncateText, formatMessage, createContextHandlers, CUT_OFF_PROMPT } = require('./prompts');
const {
truncateText,
formatMessage,
createContextHandlers,
CUT_OFF_PROMPT,
titleInstruction,
} = require('./prompts');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const { handleOpenAIErrors } = require('./tools/util');
const spendTokens = require('~/models/spendTokens');
@ -39,7 +46,10 @@ class OpenAIClient extends BaseClient {
super(apiKey, options);
this.ChatGPTClient = new ChatGPTClient();
this.buildPrompt = this.ChatGPTClient.buildPrompt.bind(this);
/** @type {getCompletion} */
this.getCompletion = this.ChatGPTClient.getCompletion.bind(this);
/** @type {cohereChatCompletion} */
this.cohereChatCompletion = this.ChatGPTClient.cohereChatCompletion.bind(this);
this.contextStrategy = options.contextStrategy
? options.contextStrategy.toLowerCase()
: 'discard';
@ -48,6 +58,9 @@ class OpenAIClient extends BaseClient {
this.azure = options.azure || false;
this.setOptions(options);
this.metadata = {};
/** @type {string | undefined} - The API Completions URL */
this.completionsUrl;
}
// TODO: PluginsClient calls this 3x, unneeded
@ -533,6 +546,7 @@ class OpenAIClient extends BaseClient {
return result;
}
/** @type {sendCompletion} */
async sendCompletion(payload, opts = {}) {
let reply = '';
let result = null;
@ -541,7 +555,7 @@ class OpenAIClient extends BaseClient {
const invalidBaseUrl = this.completionsUrl && extractBaseURL(this.completionsUrl) === null;
const useOldMethod = !!(invalidBaseUrl || !this.isChatCompletion || typeof Bun !== 'undefined');
if (typeof opts.onProgress === 'function' && useOldMethod) {
await this.getCompletion(
const completionResult = await this.getCompletion(
payload,
(progressMessage) => {
if (progressMessage === '[DONE]') {
@ -574,8 +588,13 @@ class OpenAIClient extends BaseClient {
opts.onProgress(token);
reply += token;
},
opts.onProgress,
opts.abortController || new AbortController(),
);
if (completionResult && typeof completionResult === 'string') {
reply = completionResult;
}
} else if (typeof opts.onProgress === 'function' || this.options.useChatCompletion) {
reply = await this.chatCompletion({
payload,
@ -586,9 +605,14 @@ class OpenAIClient extends BaseClient {
result = await this.getCompletion(
payload,
null,
opts.onProgress,
opts.abortController || new AbortController(),
);
if (result && typeof result === 'string') {
return result.trim();
}
logger.debug('[OpenAIClient] sendCompletion: result', result);
if (this.isChatCompletion) {
@ -760,8 +784,7 @@ class OpenAIClient extends BaseClient {
const instructionsPayload = [
{
role: 'system',
content: `Detect user language and write in the same language an extremely concise title for this conversation, which you must accurately detect.
Write in the detected language. Title in 5 Words or Less. No Punctuation or Quotation. Do not mention the language. All first letters of every word should be capitalized and write the title in User Language only.
content: `Please generate ${titleInstruction}
${convo}
@ -770,8 +793,12 @@ ${convo}
];
try {
let useChatCompletion = true;
if (CohereConstants.API_URL) {
useChatCompletion = false;
}
title = (
await this.sendPayload(instructionsPayload, { modelOptions, useChatCompletion: true })
await this.sendPayload(instructionsPayload, { modelOptions, useChatCompletion })
).replaceAll('"', '');
} catch (e) {
logger.error(

View file

@ -0,0 +1,85 @@
const { CohereConstants } = require('librechat-data-provider');
const { titleInstruction } = require('../prompts/titlePrompts');
// Mapping OpenAI roles to Cohere roles
const roleMap = {
user: CohereConstants.ROLE_USER,
assistant: CohereConstants.ROLE_CHATBOT,
system: CohereConstants.ROLE_SYSTEM, // Recognize and map the system role explicitly
};
/**
* Adjusts an OpenAI ChatCompletionPayload to conform with Cohere's expected chat payload format.
* Now includes handling for "system" roles explicitly mentioned.
*
* @param {Object} options - Object containing the model options.
* @param {ChatCompletionPayload} options.modelOptions - The OpenAI model payload options.
* @returns {CohereChatStreamRequest} Cohere-compatible chat API payload.
*/
function createCoherePayload({ modelOptions }) {
/** @type {string | undefined} */
let preamble;
let latestUserMessageContent = '';
const {
stream,
stop,
top_p,
temperature,
frequency_penalty,
presence_penalty,
max_tokens,
messages,
model,
...rest
} = modelOptions;
// Filter out the latest user message and transform remaining messages to Cohere's chat_history format
let chatHistory = messages.reduce((acc, message, index, arr) => {
const isLastUserMessage = index === arr.length - 1 && message.role === 'user';
const messageContent =
typeof message.content === 'string'
? message.content
: message.content.map((part) => (part.type === 'text' ? part.text : '')).join(' ');
if (isLastUserMessage) {
latestUserMessageContent = messageContent;
} else {
acc.push({
role: roleMap[message.role] || CohereConstants.ROLE_USER,
message: messageContent,
});
}
return acc;
}, []);
if (
chatHistory.length === 1 &&
chatHistory[0].role === CohereConstants.ROLE_SYSTEM &&
!latestUserMessageContent.length
) {
const message = chatHistory[0].message;
latestUserMessageContent = message.includes(titleInstruction)
? CohereConstants.TITLE_MESSAGE
: '.';
preamble = message;
}
return {
message: latestUserMessageContent,
model: model,
chat_history: chatHistory,
stream: stream ?? false,
temperature: temperature,
frequency_penalty: frequency_penalty,
presence_penalty: presence_penalty,
max_tokens: max_tokens,
stop_sequences: stop,
preamble,
p: top_p,
...rest,
};
}
module.exports = createCoherePayload;

View file

@ -1,7 +1,9 @@
const createLLM = require('./createLLM');
const RunManager = require('./RunManager');
const createCoherePayload = require('./createCoherePayload');
module.exports = {
createLLM,
RunManager,
createCoherePayload,
};

View file

@ -27,6 +27,8 @@ ${convo}`,
return titlePrompt;
};
const titleInstruction =
'a concise, 5-word-or-less title for the conversation, using its same language, with no punctuation. Apply title case conventions appropriate for the language. For English, use AP Stylebook Title Case. Never directly mention the language name or the word "title"';
const titleFunctionPrompt = `In this environment you have access to a set of tools you can use to generate the conversation title.
You may call them like this:
@ -51,7 +53,7 @@ Submit a brief title in the conversation's language, following the parameter des
<parameter>
<name>title</name>
<type>string</type>
<description>A concise, 5-word-or-less title for the conversation, using its same language, with no punctuation. Apply title case conventions appropriate for the language. For English, use AP Stylebook Title Case. Never directly mention the language name or the word "title"</description>
<description>${titleInstruction}</description>
</parameter>
</parameters>
</tool_description>
@ -80,6 +82,7 @@ function parseTitleFromPrompt(prompt) {
module.exports = {
langPrompt,
titleInstruction,
createTitlePrompt,
titleFunctionPrompt,
parseTitleFromPrompt,