⚙️ feat: Adjust Rate of Stream Progress (#3244)

* chore: bump data-provider and add MESSAGES CacheKey

* refactor: avoid saving messages while streaming, save partial text to cache instead

* fix(ci): processChunks

* chore: logging aborted request to debug

* feat: set stream rate for token processing

* chore: specify default stream rate

* fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment

* refactor: abstract the error handler

* feat: streamRate for assistants; refactor: update default rate for token

* refactor: update error handling in assistants/errors.js

* refactor: update error handling in assistants/errors.js
This commit is contained in:
Danny Avila 2024-07-17 10:47:17 -04:00 committed by GitHub
parent 1c282d1517
commit 5d40d0a37a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 661 additions and 309 deletions

View file

@ -2,8 +2,9 @@ const Anthropic = require('@anthropic-ai/sdk');
const { HttpsProxyAgent } = require('https-proxy-agent');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
const {
getResponseSender,
Constants,
EModelEndpoint,
getResponseSender,
validateVisionModel,
} = require('librechat-data-provider');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
@ -16,6 +17,7 @@ const {
} = require('./prompts');
const spendTokens = require('~/models/spendTokens');
const { getModelMaxTokens } = require('~/utils');
const { sleep } = require('~/server/utils');
const BaseClient = require('./BaseClient');
const { logger } = require('~/config');
@ -605,6 +607,7 @@ class AnthropicClient extends BaseClient {
};
const maxRetries = 3;
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
async function processResponse() {
let attempts = 0;
@ -627,6 +630,8 @@ class AnthropicClient extends BaseClient {
} else if (completion.completion) {
handleChunk(completion.completion);
}
await sleep(streamRate);
}
// Successful processing, exit loop

View file

@ -1,10 +1,11 @@
const crypto = require('crypto');
const fetch = require('node-fetch');
const { supportsBalanceCheck, Constants } = require('librechat-data-provider');
const { supportsBalanceCheck, Constants, CacheKeys, Time } = require('librechat-data-provider');
const { getMessages, saveMessage, updateMessage, saveConvo } = require('~/models');
const { addSpaceIfNeeded, isEnabled } = require('~/server/utils');
const checkBalance = require('~/models/checkBalance');
const { getFiles } = require('~/models/File');
const { getLogStores } = require('~/cache');
const TextStream = require('./TextStream');
const { logger } = require('~/config');
@ -540,6 +541,15 @@ class BaseClient {
await this.recordTokenUsage({ promptTokens, completionTokens });
}
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
const messageCache = getLogStores(CacheKeys.MESSAGES);
messageCache.set(
responseMessageId,
{
text: responseMessage.text,
complete: true,
},
Time.FIVE_MINUTES,
);
delete responseMessage.tokenCount;
return responseMessage;
}

View file

@ -13,10 +13,12 @@ const {
endpointSettings,
EModelEndpoint,
VisionModes,
Constants,
AuthKeys,
} = require('librechat-data-provider');
const { encodeAndFormat } = require('~/server/services/Files/images');
const { getModelMaxTokens } = require('~/utils');
const { sleep } = require('~/server/utils');
const { logger } = require('~/config');
const {
formatMessage,
@ -620,8 +622,9 @@ class GoogleClient extends BaseClient {
}
async getCompletion(_payload, options = {}) {
const { onProgress, abortController } = options;
const { parameters, instances } = _payload;
const { onProgress, abortController } = options;
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {};
let examples;
@ -701,6 +704,7 @@ class GoogleClient extends BaseClient {
delay,
});
reply += chunkText;
await sleep(streamRate);
}
return reply;
}
@ -712,10 +716,17 @@ class GoogleClient extends BaseClient {
safetySettings: safetySettings,
});
let delay = this.isGenerativeModel ? 12 : 8;
if (modelName.includes('flash')) {
delay = 5;
let delay = this.options.streamRate || 8;
if (!this.options.streamRate) {
if (this.isGenerativeModel) {
delay = 12;
}
if (modelName.includes('flash')) {
delay = 5;
}
}
for await (const chunk of stream) {
const chunkText = chunk?.content ?? chunk;
await this.generateTextStream(chunkText, onProgress, {

View file

@ -1,7 +1,9 @@
const { z } = require('zod');
const axios = require('axios');
const { Ollama } = require('ollama');
const { Constants } = require('librechat-data-provider');
const { deriveBaseURL } = require('~/utils');
const { sleep } = require('~/server/utils');
const { logger } = require('~/config');
const ollamaPayloadSchema = z.object({
@ -40,6 +42,7 @@ const getValidBase64 = (imageUrl) => {
class OllamaClient {
constructor(options = {}) {
const host = deriveBaseURL(options.baseURL ?? 'http://localhost:11434');
this.streamRate = options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
/** @type {Ollama} */
this.client = new Ollama({ host });
}
@ -136,6 +139,8 @@ class OllamaClient {
stream.controller.abort();
break;
}
await sleep(this.streamRate);
}
}
// TODO: regular completion

View file

@ -1182,8 +1182,10 @@ ${convo}
});
}
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
if (this.message_file_map && this.isOllama) {
const ollamaClient = new OllamaClient({ baseURL });
const ollamaClient = new OllamaClient({ baseURL, streamRate });
return await ollamaClient.chatCompletion({
payload: modelOptions,
onProgress,
@ -1221,8 +1223,6 @@ ${convo}
}
});
const azureDelay = this.modelOptions.model?.includes('gpt-4') ? 30 : 17;
for await (const chunk of stream) {
const token = chunk.choices[0]?.delta?.content || '';
intermediateReply += token;
@ -1232,9 +1232,7 @@ ${convo}
break;
}
if (this.azure) {
await sleep(azureDelay);
}
await sleep(streamRate);
}
if (!UnexpectedRoleError) {

View file

@ -1,5 +1,6 @@
const OpenAIClient = require('./OpenAIClient');
const { CallbackManager } = require('langchain/callbacks');
const { CacheKeys, Time } = require('librechat-data-provider');
const { BufferMemory, ChatMessageHistory } = require('langchain/memory');
const { initializeCustomAgent, initializeFunctionsAgent } = require('./agents');
const { addImages, buildErrorInput, buildPromptPrefix } = require('./output_parsers');
@ -11,6 +12,7 @@ const { SelfReflectionTool } = require('./tools');
const { isEnabled } = require('~/server/utils');
const { extractBaseURL } = require('~/utils');
const { loadTools } = require('./tools/util');
const { getLogStores } = require('~/cache');
const { logger } = require('~/config');
class PluginsClient extends OpenAIClient {
@ -220,6 +222,13 @@ class PluginsClient extends OpenAIClient {
}
}
/**
*
* @param {TMessage} responseMessage
* @param {Partial<TMessage>} saveOptions
* @param {string} user
* @returns
*/
async handleResponseMessage(responseMessage, saveOptions, user) {
const { output, errorMessage, ...result } = this.result;
logger.debug('[PluginsClient][handleResponseMessage] Output:', {
@ -239,6 +248,15 @@ class PluginsClient extends OpenAIClient {
}
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
const messageCache = getLogStores(CacheKeys.MESSAGES);
messageCache.set(
responseMessage.messageId,
{
text: responseMessage.text,
complete: true,
},
Time.FIVE_MINUTES,
);
delete responseMessage.tokenCount;
return { ...responseMessage, ...result };
}